From a9fd053b56c6bb14972ab7a19da0b575fe4c5d66 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 19 Nov 2015 10:15:17 +0100
Subject: iio: st_sensors: support active-low interrupts

Most ST MEMS Sensors that support interrupts can also handle sending
an active low interrupt, i.e. going from high to low on data ready
(or other interrupt) and thus triggering on a falling edge to the
interrupt controller.

Set up logic to inspect the interrupt line we get for a sensor: if
it is triggering on rising edge, leave everything alone, but if it
triggers on falling edges, set up active low, and if unsupported
configurations appear: warn with errors and reconfigure the interrupt
to a rising edge, which all interrupt generating sensors support.

Create a local header for st_sensors_core.h to share functions
between the sensor core and the trigger setup code.

Cc: Giuseppe Barba <giuseppe.barba@st.com>
Cc: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/common/st_sensors.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 2fe939c73cd2..6670c3d25c58 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -119,6 +119,8 @@ struct st_sensor_bdu {
  * @addr: address of the register.
  * @mask_int1: mask to enable/disable IRQ on INT1 pin.
  * @mask_int2: mask to enable/disable IRQ on INT2 pin.
+ * @addr_ihl: address to enable/disable active low on the INT lines.
+ * @mask_ihl: mask to enable/disable active low on the INT lines.
  * struct ig1 - represents the Interrupt Generator 1 of sensors.
  * @en_addr: address of the enable ig1 register.
  * @en_mask: mask to write the on/off value for enable.
@@ -127,6 +129,8 @@ struct st_sensor_data_ready_irq {
 	u8 addr;
 	u8 mask_int1;
 	u8 mask_int2;
+	u8 addr_ihl;
+	u8 mask_ihl;
 	struct {
 		u8 en_addr;
 		u8 en_mask;
-- 
cgit v1.2.3


From 23a1f8d44c0bca48f04fc2a2f1edafd826ce6133 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Tue, 8 Dec 2015 16:04:31 +0200
Subject: mac80211: process and save VHT MU-MIMO group frame

The Group ID Management frame is an Action frame of
category VHT. It is transmitted by the AP to assign
or change the user position of a STA for one or more
group IDs.
Process and save the group membership data. Notify
underlying driver of changes.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  |  7 +++++++
 include/net/mac80211.h     | 17 +++++++++++++++++
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/iface.c       | 10 ++++++++++
 net/mac80211/mlme.c        |  7 +++++++
 net/mac80211/rx.c          |  5 +++++
 net/mac80211/util.c        |  3 +++
 net/mac80211/vht.c         | 25 +++++++++++++++++++++++++
 8 files changed, 76 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 452c0b0d2f32..d9ddb89533a7 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -843,6 +843,8 @@ enum ieee80211_vht_opmode_bits {
 };
 
 #define WLAN_SA_QUERY_TR_ID_LEN 2
+#define WLAN_MEMBERSHIP_LEN 8
+#define WLAN_USER_POSITION_LEN 16
 
 /**
  * struct ieee80211_tpc_report_ie
@@ -989,6 +991,11 @@ struct ieee80211_mgmt {
 					u8 action_code;
 					u8 operating_mode;
 				} __packed vht_opmode_notif;
+				struct {
+					u8 action_code;
+					u8 membership[WLAN_MEMBERSHIP_LEN];
+					u8 position[WLAN_USER_POSITION_LEN];
+				} __packed vht_group_notif;
 				struct {
 					u8 action_code;
 					u8 dialog_token;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7c30faff245f..8da483b2c067 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -298,6 +298,7 @@ struct ieee80211_vif_chanctx_switch {
  *	note that this is only called when it changes after the channel
  *	context had been assigned.
  * @BSS_CHANGED_OCB: OCB join status changed
+ * @BSS_CHANGED_MU_GROUPS: VHT MU-MIMO group id or user position changed
  */
 enum ieee80211_bss_change {
 	BSS_CHANGED_ASSOC		= 1<<0,
@@ -323,6 +324,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_BEACON_INFO		= 1<<20,
 	BSS_CHANGED_BANDWIDTH		= 1<<21,
 	BSS_CHANGED_OCB                 = 1<<22,
+	BSS_CHANGED_MU_GROUPS		= 1<<23,
 
 	/* when adding here, make sure to change ieee80211_reconfig */
 };
@@ -435,6 +437,19 @@ struct ieee80211_event {
 	} u;
 };
 
+/**
+ * struct ieee80211_mu_group_data - STA's VHT MU-MIMO group data
+ *
+ * This structure describes the group id data of VHT MU-MIMO
+ *
+ * @membership: 64 bits array - a bit is set if station is member of the group
+ * @position: 2 bits per group id indicating the position in the group
+ */
+struct ieee80211_mu_group_data {
+	u8 membership[WLAN_MEMBERSHIP_LEN];
+	u8 position[WLAN_USER_POSITION_LEN];
+};
+
 /**
  * struct ieee80211_bss_conf - holds the BSS's changing parameters
  *
@@ -477,6 +492,7 @@ struct ieee80211_event {
  * @enable_beacon: whether beaconing should be enabled or not
  * @chandef: Channel definition for this BSS -- the hardware might be
  *	configured a higher bandwidth than this BSS uses, for example.
+ * @mu_group: VHT MU-MIMO group membership data
  * @ht_operation_mode: HT operation mode like in &struct ieee80211_ht_operation.
  *	This field is only valid when the channel is a wide HT/VHT channel.
  *	Note that with TDLS this can be the case (channel is HT, protection must
@@ -535,6 +551,7 @@ struct ieee80211_bss_conf {
 	s32 cqm_rssi_thold;
 	u32 cqm_rssi_hyst;
 	struct cfg80211_chan_def chandef;
+	struct ieee80211_mu_group_data mu_group;
 	__be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN];
 	int arp_addr_cnt;
 	bool qos;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b84f6aa32c08..747402d8c7a9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1714,6 +1714,8 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta);
 enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta);
 void ieee80211_sta_set_rx_nss(struct sta_info *sta);
+void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
+				 struct ieee80211_mgmt *mgmt);
 u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
                                   struct sta_info *sta, u8 opmode,
 				  enum ieee80211_band band);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index c9e325d2e120..33ae3c81bfc5 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1271,6 +1271,16 @@ static void ieee80211_iface_work(struct work_struct *work)
 				}
 			}
 			mutex_unlock(&local->sta_mtx);
+		} else if (ieee80211_is_action(mgmt->frame_control) &&
+			   mgmt->u.action.category == WLAN_CATEGORY_VHT) {
+			switch (mgmt->u.action.u.vht_group_notif.action_code) {
+			case WLAN_VHT_ACTION_GROUPID_MGMT:
+				ieee80211_process_mu_groups(sdata, mgmt);
+				break;
+			default:
+				WARN_ON(1);
+				break;
+			}
 		} else if (ieee80211_is_data_qos(mgmt->frame_control)) {
 			struct ieee80211_hdr *hdr = (void *)mgmt;
 			/*
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1c342e2592c4..31d5881b31fa 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2079,6 +2079,13 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask));
 	memset(&ifmgd->vht_capa, 0, sizeof(ifmgd->vht_capa));
 	memset(&ifmgd->vht_capa_mask, 0, sizeof(ifmgd->vht_capa_mask));
+
+	/* reset MU-MIMO ownership and group data */
+	memset(sdata->vif.bss_conf.mu_group.membership, 0,
+	       sizeof(sdata->vif.bss_conf.mu_group.membership));
+	memset(sdata->vif.bss_conf.mu_group.position, 0,
+	       sizeof(sdata->vif.bss_conf.mu_group.position));
+	changed |= BSS_CHANGED_MU_GROUPS;
 	sdata->flags &= ~IEEE80211_SDATA_MU_MIMO_OWNER;
 
 	sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index bc081850ac0e..a5668b54015f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2738,6 +2738,11 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 						    opmode, status->band);
 			goto handled;
 		}
+		case WLAN_VHT_ACTION_GROUPID_MGMT: {
+			if (len < IEEE80211_MIN_ACTION_SIZE + 25)
+				goto invalid;
+			goto queue;
+		}
 		default:
 			break;
 		}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 3943d4bf289c..f4b2c04e7d81 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1928,6 +1928,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 			  BSS_CHANGED_IDLE |
 			  BSS_CHANGED_TXPOWER;
 
+		if (sdata->flags & IEEE80211_SDATA_MU_MIMO_OWNER)
+			changed |= BSS_CHANGED_MU_GROUPS;
+
 		switch (sdata->vif.type) {
 		case NL80211_IFTYPE_STATION:
 			changed |= BSS_CHANGED_ASSOC |
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index c38b2f07a919..050de08bf82e 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -1,6 +1,9 @@
 /*
  * VHT handling
  *
+ * Portions of this file
+ * Copyright(c) 2015 Intel Deutschland GmbH
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -425,6 +428,28 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 	return changed;
 }
 
+void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
+				 struct ieee80211_mgmt *mgmt)
+{
+	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
+
+	if (!(sdata->flags & IEEE80211_SDATA_MU_MIMO_OWNER))
+		return;
+
+	if (!memcmp(mgmt->u.action.u.vht_group_notif.position,
+		    bss_conf->mu_group.position, WLAN_USER_POSITION_LEN) &&
+	    !memcmp(mgmt->u.action.u.vht_group_notif.membership,
+		    bss_conf->mu_group.membership, WLAN_MEMBERSHIP_LEN))
+		return;
+
+	memcpy(mgmt->u.action.u.vht_group_notif.membership,
+	       bss_conf->mu_group.membership, WLAN_MEMBERSHIP_LEN);
+	memcpy(mgmt->u.action.u.vht_group_notif.position,
+	       bss_conf->mu_group.position, WLAN_USER_POSITION_LEN);
+
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_MU_GROUPS);
+}
+
 void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 				 struct sta_info *sta, u8 opmode,
 				 enum ieee80211_band band)
-- 
cgit v1.2.3


From b429fab4467e2320f67c058d7419c03c7221d125 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Tue, 19 Jan 2016 17:14:39 -0800
Subject: regmap: clairify meaning of max_register

The exact meaning of max_register is not entierly clear. Follow
the common wording and use "address" instead of "index".

Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 18394343f489..27aaac9027c4 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -162,7 +162,7 @@ typedef void (*regmap_unlock)(void *);
  *		  This field is a duplicate of a similar file in
  *		  'struct regmap_bus' and serves exact same purpose.
  *		   Use it only for "no-bus" cases.
- * @max_register: Optional, specifies the maximum valid register index.
+ * @max_register: Optional, specifies the maximum valid register address.
  * @wr_table:     Optional, points to a struct regmap_access_table specifying
  *                valid ranges for write access.
  * @rd_table:     As above, for read access.
-- 
cgit v1.2.3


From c03d996900f9d063b47ef7462885a9085c8a587f Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Wed, 2 Dec 2015 12:01:05 +0100
Subject: mtd: nand: add NAND_NEED_SCRAMBLING option flag

Some MLC NANDs are sensitive to repeated patterns and require data to be
scrambled in order to limit the number of bitflips.
Add a new flag to let the NAND controller know about this constraint.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/nand.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index bdd68e22b5a5..a13dfd5bc58b 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -168,6 +168,12 @@ typedef enum {
 /* Device supports subpage reads */
 #define NAND_SUBPAGE_READ	0x00001000
 
+/*
+ * Some MLC NANDs need data scrambling to limit bitflips caused by repeated
+ * patterns.
+ */
+#define NAND_NEED_SCRAMBLING	0x00002000
+
 /* Options valid for Samsung large page devices */
 #define NAND_SAMSUNG_LP_OPTIONS NAND_CACHEPRG
 
-- 
cgit v1.2.3


From fd2a2f20c7a8173acd4858e5178eb40fd7c025b9 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Thu, 14 Jan 2016 15:44:50 +0100
Subject: mtd: onenand: make onenand_scan_bbt() static

Like was done in commit 17799359e7b3fa6ef4f2bf926cd6821cf7903ecf
("mtd: nand_bbt: make nand_scan_bbt() static") for the NAND code, this
commit makes the onenand_scan_bbt() function static in the OneNAND
code, since it is only used in onenand_bbt.c itself.

Consequently, the EXPORT_SYMBOL() and declaration in bbm.h are also
removed.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/onenand/onenand_bbt.c | 3 +--
 include/linux/mtd/bbm.h           | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/onenand/onenand_bbt.c b/drivers/mtd/onenand/onenand_bbt.c
index 08d0085f3e93..5f8d47073c97 100644
--- a/drivers/mtd/onenand/onenand_bbt.c
+++ b/drivers/mtd/onenand/onenand_bbt.c
@@ -179,7 +179,7 @@ static int onenand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt)
  * by the onenand_release function.
  *
  */
-int onenand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd)
+static int onenand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd)
 {
 	struct onenand_chip *this = mtd->priv;
 	struct bbm_info *bbm = this->bbm;
@@ -248,5 +248,4 @@ int onenand_default_bbt(struct mtd_info *mtd)
 	return onenand_scan_bbt(mtd, bbm->badblock_pattern);
 }
 
-EXPORT_SYMBOL(onenand_scan_bbt);
 EXPORT_SYMBOL(onenand_default_bbt);
diff --git a/include/linux/mtd/bbm.h b/include/linux/mtd/bbm.h
index 36bb6a503f19..3bf8f954b642 100644
--- a/include/linux/mtd/bbm.h
+++ b/include/linux/mtd/bbm.h
@@ -166,7 +166,6 @@ struct bbm_info {
 };
 
 /* OneNAND BBT interface */
-extern int onenand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd);
 extern int onenand_default_bbt(struct mtd_info *mtd);
 
 #endif	/* __LINUX_MTD_BBM_H */
-- 
cgit v1.2.3


From 131497acd88a4456d99247cee457baefd2817835 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Date: Sat, 16 Jan 2016 14:24:07 +0100
Subject: iio: add ad5761 DAC driver

ad5761 is a 1-channel DAC with configurable output range.
The driver uses the regulator interface for its voltage ref.

It shares its register layout with ad5761r, ad5721 and ad5721r.

Differences:
ad5761* are 16 bit, ad5721* are 12 bits.
ad57*1r have an internal reference.

Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 CREDITS                              |   1 +
 drivers/iio/dac/Kconfig              |  10 +
 drivers/iio/dac/Makefile             |   1 +
 drivers/iio/dac/ad5761.c             | 430 +++++++++++++++++++++++++++++++++++
 include/linux/platform_data/ad5761.h |  44 ++++
 5 files changed, 486 insertions(+)
 create mode 100644 drivers/iio/dac/ad5761.c
 create mode 100644 include/linux/platform_data/ad5761.h

(limited to 'include/linux')

diff --git a/CREDITS b/CREDITS
index 8207cc62ee9d..44ea433d70a1 100644
--- a/CREDITS
+++ b/CREDITS
@@ -3035,6 +3035,7 @@ D: PLX USB338x driver
 D: PCA9634 driver
 D: Option GTM671WFS
 D: Fintek F81216A
+D: AD5761 iio driver
 D: Various kernel hacks
 S: Qtechnology A/S
 S: Valby Langgade 142
diff --git a/drivers/iio/dac/Kconfig b/drivers/iio/dac/Kconfig
index 5263c5125fbb..5dc71505da61 100644
--- a/drivers/iio/dac/Kconfig
+++ b/drivers/iio/dac/Kconfig
@@ -111,6 +111,16 @@ config AD5755
 	  To compile this driver as a module, choose M here: the
 	  module will be called ad5755.
 
+config AD5761
+	tristate "Analog Devices AD5761/61R/21/21R DAC driver"
+	depends on SPI_MASTER
+	help
+	  Say yes here to build support for Analog Devices AD5761, AD5761R, AD5721,
+	  AD5721R Digital to Analog Converter.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ad5761.
+
 config AD5764
 	tristate "Analog Devices AD5764/64R/44/44R DAC driver"
 	depends on SPI_MASTER
diff --git a/drivers/iio/dac/Makefile b/drivers/iio/dac/Makefile
index 63ae05633e0c..cb525b53fc7b 100644
--- a/drivers/iio/dac/Makefile
+++ b/drivers/iio/dac/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_AD5504) += ad5504.o
 obj-$(CONFIG_AD5446) += ad5446.o
 obj-$(CONFIG_AD5449) += ad5449.o
 obj-$(CONFIG_AD5755) += ad5755.o
+obj-$(CONFIG_AD5761) += ad5761.o
 obj-$(CONFIG_AD5764) += ad5764.o
 obj-$(CONFIG_AD5791) += ad5791.o
 obj-$(CONFIG_AD5686) += ad5686.o
diff --git a/drivers/iio/dac/ad5761.c b/drivers/iio/dac/ad5761.c
new file mode 100644
index 000000000000..d6510d6928b3
--- /dev/null
+++ b/drivers/iio/dac/ad5761.c
@@ -0,0 +1,430 @@
+/*
+ * AD5721, AD5721R, AD5761, AD5761R, Voltage Output Digital to Analog Converter
+ *
+ * Copyright 2016 Qtechnology A/S
+ * 2016 Ricardo Ribalda <ricardo.ribalda@gmail.com>
+ *
+ * Licensed under the GPL-2.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+#include <linux/bitops.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+#include <linux/regulator/consumer.h>
+#include <linux/platform_data/ad5761.h>
+
+#define AD5761_ADDR(addr)		((addr & 0xf) << 16)
+#define AD5761_ADDR_NOOP		0x0
+#define AD5761_ADDR_DAC_WRITE		0x3
+#define AD5761_ADDR_CTRL_WRITE_REG	0x4
+#define AD5761_ADDR_SW_DATA_RESET	0x7
+#define AD5761_ADDR_DAC_READ		0xb
+#define AD5761_ADDR_CTRL_READ_REG	0xc
+#define AD5761_ADDR_SW_FULL_RESET	0xf
+
+#define AD5761_CTRL_USE_INTVREF		BIT(5)
+#define AD5761_CTRL_ETS			BIT(6)
+
+/**
+ * struct ad5761_chip_info - chip specific information
+ * @int_vref:	Value of the internal reference voltage in mV - 0 if external
+ *		reference voltage is used
+ * @channel:	channel specification
+*/
+
+struct ad5761_chip_info {
+	unsigned long int_vref;
+	const struct iio_chan_spec channel;
+};
+
+struct ad5761_range_params {
+	int m;
+	int c;
+};
+
+enum ad5761_supported_device_ids {
+	ID_AD5721,
+	ID_AD5721R,
+	ID_AD5761,
+	ID_AD5761R,
+};
+
+/**
+ * struct ad5761_state - driver instance specific data
+ * @spi:		spi_device
+ * @vref_reg:		reference voltage regulator
+ * @use_intref:		true when the internal voltage reference is used
+ * @vref:		actual voltage reference in mVolts
+ * @range:		output range mode used
+ * @data:		cache aligned spi buffer
+ */
+struct ad5761_state {
+	struct spi_device		*spi;
+	struct regulator		*vref_reg;
+
+	bool use_intref;
+	int vref;
+	enum ad5761_voltage_range range;
+
+	/*
+	 * DMA (thus cache coherency maintenance) requires the
+	 * transfer buffers to live in their own cache lines.
+	 */
+	union {
+		__be32 d32;
+		u8 d8[4];
+	} data[3] ____cacheline_aligned;
+};
+
+static const struct ad5761_range_params ad5761_range_params[] = {
+	[AD5761_VOLTAGE_RANGE_M10V_10V] = {
+		.m = 80,
+		.c = 40,
+	},
+	[AD5761_VOLTAGE_RANGE_0V_10V] = {
+		.m = 40,
+		.c = 0,
+	},
+	[AD5761_VOLTAGE_RANGE_M5V_5V] = {
+		.m = 40,
+		.c = 20,
+	},
+	[AD5761_VOLTAGE_RANGE_0V_5V] = {
+		.m = 20,
+		.c = 0,
+	},
+	[AD5761_VOLTAGE_RANGE_M2V5_7V5] = {
+		.m = 40,
+		.c = 10,
+	},
+	[AD5761_VOLTAGE_RANGE_M3V_3V] = {
+		.m = 24,
+		.c = 12,
+	},
+	[AD5761_VOLTAGE_RANGE_0V_16V] = {
+		.m = 64,
+		.c = 0,
+	},
+	[AD5761_VOLTAGE_RANGE_0V_20V] = {
+		.m = 80,
+		.c = 0,
+	},
+};
+
+static int _ad5761_spi_write(struct ad5761_state *st, u8 addr, u16 val)
+{
+	st->data[0].d32 = cpu_to_be32(AD5761_ADDR(addr) | val);
+
+	return spi_write(st->spi, &st->data[0].d8[1], 3);
+}
+
+static int ad5761_spi_write(struct iio_dev *indio_dev, u8 addr, u16 val)
+{
+	struct ad5761_state *st = iio_priv(indio_dev);
+	int ret;
+
+	mutex_lock(&indio_dev->mlock);
+	ret = _ad5761_spi_write(st, addr, val);
+	mutex_unlock(&indio_dev->mlock);
+
+	return ret;
+}
+
+static int _ad5761_spi_read(struct ad5761_state *st, u8 addr, u16 *val)
+{
+	int ret;
+	struct spi_transfer xfers[] = {
+		{
+			.tx_buf = &st->data[0].d8[1],
+			.bits_per_word = 8,
+			.len = 3,
+			.cs_change = true,
+		}, {
+			.tx_buf = &st->data[1].d8[1],
+			.rx_buf = &st->data[2].d8[1],
+			.bits_per_word = 8,
+			.len = 3,
+		},
+	};
+
+	st->data[0].d32 = cpu_to_be32(AD5761_ADDR(addr));
+	st->data[1].d32 = cpu_to_be32(AD5761_ADDR(AD5761_ADDR_NOOP));
+
+	ret = spi_sync_transfer(st->spi, xfers, ARRAY_SIZE(xfers));
+
+	*val = be32_to_cpu(st->data[2].d32);
+
+	return ret;
+}
+
+static int ad5761_spi_read(struct iio_dev *indio_dev, u8 addr, u16 *val)
+{
+	struct ad5761_state *st = iio_priv(indio_dev);
+	int ret;
+
+	mutex_lock(&indio_dev->mlock);
+	ret = _ad5761_spi_read(st, addr, val);
+	mutex_unlock(&indio_dev->mlock);
+
+	return ret;
+}
+
+static int ad5761_spi_set_range(struct ad5761_state *st,
+				enum ad5761_voltage_range range)
+{
+	u16 aux;
+	int ret;
+
+	aux = (range & 0x7) | AD5761_CTRL_ETS;
+
+	if (st->use_intref)
+		aux |= AD5761_CTRL_USE_INTVREF;
+
+	ret = _ad5761_spi_write(st, AD5761_ADDR_SW_FULL_RESET, 0);
+	if (ret)
+		return ret;
+
+	ret = _ad5761_spi_write(st, AD5761_ADDR_CTRL_WRITE_REG, aux);
+	if (ret)
+		return ret;
+
+	st->range = range;
+
+	return 0;
+}
+
+static int ad5761_read_raw(struct iio_dev *indio_dev,
+			   struct iio_chan_spec const *chan,
+			   int *val,
+			   int *val2,
+			   long mask)
+{
+	struct ad5761_state *st;
+	int ret;
+	u16 aux;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_RAW:
+		ret = ad5761_spi_read(indio_dev, AD5761_ADDR_DAC_READ, &aux);
+		if (ret)
+			return ret;
+		*val = aux >> chan->scan_type.shift;
+		return IIO_VAL_INT;
+	case IIO_CHAN_INFO_SCALE:
+		st = iio_priv(indio_dev);
+		*val = st->vref * ad5761_range_params[st->range].m;
+		*val /= 10;
+		*val2 = chan->scan_type.realbits;
+		return IIO_VAL_FRACTIONAL_LOG2;
+	case IIO_CHAN_INFO_OFFSET:
+		st = iio_priv(indio_dev);
+		*val = -(1 << chan->scan_type.realbits);
+		*val *=	ad5761_range_params[st->range].c;
+		*val /=	ad5761_range_params[st->range].m;
+		return IIO_VAL_INT;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int ad5761_write_raw(struct iio_dev *indio_dev,
+			    struct iio_chan_spec const *chan,
+			    int val,
+			    int val2,
+			    long mask)
+{
+	u16 aux;
+
+	if (mask != IIO_CHAN_INFO_RAW)
+		return -EINVAL;
+
+	if (val2 || (val << chan->scan_type.shift) > 0xffff || val < 0)
+		return -EINVAL;
+
+	aux = val << chan->scan_type.shift;
+
+	return ad5761_spi_write(indio_dev, AD5761_ADDR_DAC_WRITE, aux);
+}
+
+static const struct iio_info ad5761_info = {
+	.read_raw = &ad5761_read_raw,
+	.write_raw = &ad5761_write_raw,
+	.driver_module = THIS_MODULE,
+};
+
+#define AD5761_CHAN(_bits) {				\
+	.type = IIO_VOLTAGE,				\
+	.output = 1,					\
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),	\
+	.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE) |	\
+		BIT(IIO_CHAN_INFO_OFFSET),		\
+	.scan_type = {					\
+		.sign = 'u',				\
+		.realbits = (_bits),			\
+		.storagebits = 16,			\
+		.shift = 16 - (_bits),			\
+	},						\
+}
+
+static const struct ad5761_chip_info ad5761_chip_infos[] = {
+	[ID_AD5721] = {
+		.int_vref = 0,
+		.channel = AD5761_CHAN(12),
+	},
+	[ID_AD5721R] = {
+		.int_vref = 2500,
+		.channel = AD5761_CHAN(12),
+	},
+	[ID_AD5761] = {
+		.int_vref = 0,
+		.channel = AD5761_CHAN(16),
+	},
+	[ID_AD5761R] = {
+		.int_vref = 2500,
+		.channel = AD5761_CHAN(16),
+	},
+};
+
+static int ad5761_get_vref(struct ad5761_state *st,
+			   const struct ad5761_chip_info *chip_info)
+{
+	int ret;
+
+	st->vref_reg = devm_regulator_get_optional(&st->spi->dev, "vref");
+	if (PTR_ERR(st->vref_reg) == -ENODEV) {
+		/* Use Internal regulator */
+		if (!chip_info->int_vref) {
+			dev_err(&st->spi->dev,
+				"Voltage reference not found\n");
+			return -EIO;
+		}
+
+		st->use_intref = true;
+		st->vref = chip_info->int_vref;
+		return 0;
+	}
+
+	if (IS_ERR(st->vref_reg)) {
+		dev_err(&st->spi->dev,
+			"Error getting voltage reference regulator\n");
+		return PTR_ERR(st->vref_reg);
+	}
+
+	ret = regulator_enable(st->vref_reg);
+	if (ret) {
+		dev_err(&st->spi->dev,
+			 "Failed to enable voltage reference\n");
+		return ret;
+	}
+
+	ret = regulator_get_voltage(st->vref_reg);
+	if (ret < 0) {
+		dev_err(&st->spi->dev,
+			 "Failed to get voltage reference value\n");
+		goto disable_regulator_vref;
+	}
+
+	if (ret < 2000000 || ret > 3000000) {
+		dev_warn(&st->spi->dev,
+			 "Invalid external voltage ref. value %d uV\n", ret);
+		ret = -EIO;
+		goto disable_regulator_vref;
+	}
+
+	st->vref = ret / 1000;
+	st->use_intref = false;
+
+	return 0;
+
+disable_regulator_vref:
+	regulator_disable(st->vref_reg);
+	st->vref_reg = NULL;
+	return ret;
+}
+
+static int ad5761_probe(struct spi_device *spi)
+{
+	struct iio_dev *iio_dev;
+	struct ad5761_state *st;
+	int ret;
+	const struct ad5761_chip_info *chip_info =
+		&ad5761_chip_infos[spi_get_device_id(spi)->driver_data];
+	enum ad5761_voltage_range voltage_range = AD5761_VOLTAGE_RANGE_0V_5V;
+	struct ad5761_platform_data *pdata = dev_get_platdata(&spi->dev);
+
+	iio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*st));
+	if (!iio_dev)
+		return -ENOMEM;
+
+	st = iio_priv(iio_dev);
+
+	st->spi = spi;
+	spi_set_drvdata(spi, iio_dev);
+
+	ret = ad5761_get_vref(st, chip_info);
+	if (ret)
+		return ret;
+
+	if (pdata)
+		voltage_range = pdata->voltage_range;
+
+	ret = ad5761_spi_set_range(st, voltage_range);
+	if (ret)
+		goto disable_regulator_err;
+
+	iio_dev->dev.parent = &spi->dev;
+	iio_dev->info = &ad5761_info;
+	iio_dev->modes = INDIO_DIRECT_MODE;
+	iio_dev->channels = &chip_info->channel;
+	iio_dev->num_channels = 1;
+	iio_dev->name = spi_get_device_id(st->spi)->name;
+	ret = iio_device_register(iio_dev);
+	if (ret)
+		goto disable_regulator_err;
+
+	return 0;
+
+disable_regulator_err:
+	if (!IS_ERR_OR_NULL(st->vref_reg))
+		regulator_disable(st->vref_reg);
+
+	return ret;
+}
+
+static int ad5761_remove(struct spi_device *spi)
+{
+	struct iio_dev *iio_dev = spi_get_drvdata(spi);
+	struct ad5761_state *st = iio_priv(iio_dev);
+
+	iio_device_unregister(iio_dev);
+
+	if (!IS_ERR_OR_NULL(st->vref_reg))
+		regulator_disable(st->vref_reg);
+
+	return 0;
+}
+
+static const struct spi_device_id ad5761_id[] = {
+	{"ad5721", ID_AD5721},
+	{"ad5721r", ID_AD5721R},
+	{"ad5761", ID_AD5761},
+	{"ad5761r", ID_AD5761R},
+	{}
+};
+MODULE_DEVICE_TABLE(spi, ad5761_id);
+
+static struct spi_driver ad5761_driver = {
+	.driver = {
+		   .name = "ad5761",
+		   },
+	.probe = ad5761_probe,
+	.remove = ad5761_remove,
+	.id_table = ad5761_id,
+};
+module_spi_driver(ad5761_driver);
+
+MODULE_AUTHOR("Ricardo Ribalda <ricardo.ribalda@gmail.com>");
+MODULE_DESCRIPTION("Analog Devices AD5721, AD5721R, AD5761, AD5761R driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/ad5761.h b/include/linux/platform_data/ad5761.h
new file mode 100644
index 000000000000..7bd8ed7d978e
--- /dev/null
+++ b/include/linux/platform_data/ad5761.h
@@ -0,0 +1,44 @@
+/*
+ * AD5721, AD5721R, AD5761, AD5761R, Voltage Output Digital to Analog Converter
+ *
+ * Copyright 2016 Qtechnology A/S
+ * 2016 Ricardo Ribalda <ricardo.ribalda@gmail.com>
+ *
+ * Licensed under the GPL-2.
+ */
+#ifndef __LINUX_PLATFORM_DATA_AD5761_H__
+#define __LINUX_PLATFORM_DATA_AD5761_H__
+
+/**
+ * enum ad5761_voltage_range - Voltage range the AD5761 is configured for.
+ * @AD5761_VOLTAGE_RANGE_M10V_10V:  -10V to  10V
+ * @AD5761_VOLTAGE_RANGE_0V_10V:      0V to  10V
+ * @AD5761_VOLTAGE_RANGE_M5V_5V:     -5V to   5V
+ * @AD5761_VOLTAGE_RANGE_0V_5V:       0V to   5V
+ * @AD5761_VOLTAGE_RANGE_M2V5_7V5: -2.5V to 7.5V
+ * @AD5761_VOLTAGE_RANGE_M3V_3V:     -3V to   3V
+ * @AD5761_VOLTAGE_RANGE_0V_16V:      0V to  16V
+ * @AD5761_VOLTAGE_RANGE_0V_20V:      0V to  20V
+ */
+
+enum ad5761_voltage_range {
+	AD5761_VOLTAGE_RANGE_M10V_10V,
+	AD5761_VOLTAGE_RANGE_0V_10V,
+	AD5761_VOLTAGE_RANGE_M5V_5V,
+	AD5761_VOLTAGE_RANGE_0V_5V,
+	AD5761_VOLTAGE_RANGE_M2V5_7V5,
+	AD5761_VOLTAGE_RANGE_M3V_3V,
+	AD5761_VOLTAGE_RANGE_0V_16V,
+	AD5761_VOLTAGE_RANGE_0V_20V,
+};
+
+/**
+ * struct ad5761_platform_data - AD5761 DAC driver platform data
+ * @voltage_range: Voltage range the AD5761 is configured for
+ */
+
+struct ad5761_platform_data {
+	enum ad5761_voltage_range voltage_range;
+};
+
+#endif
-- 
cgit v1.2.3


From 132f5005abbd480b66c58f8113ed876b99b6318e Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp@codeaurora.org>
Date: Thu, 10 Dec 2015 18:28:53 -0600
Subject: usb: host: ehci-msm: Allow LS devices to work

Disable the silicon quirk which is normally enabled for HSIC
host mode. This would otherwise prevent low speed devices
from enumerating properly.

Signed-off-by: Jack Pham <jackp@codeaurora.org>
Signed-off-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-msm.c      | 2 ++
 include/linux/usb/msm_hsusb_hw.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/host/ehci-msm.c b/drivers/usb/host/ehci-msm.c
index c23e2858c815..aed499d64029 100644
--- a/drivers/usb/host/ehci-msm.c
+++ b/drivers/usb/host/ehci-msm.c
@@ -61,6 +61,8 @@ static int ehci_msm_reset(struct usb_hcd *hcd)
 	writel(0x8, USB_AHBMODE);
 	/* Disable streaming mode and select host mode */
 	writel(0x13, USB_USBMODE);
+	/* Disable ULPI_TX_PKT_EN_CLR_FIX which is valid only for HSIC */
+	writel(readl(USB_GENCONFIG_2) & ~ULPI_TX_PKT_EN_CLR_FIX, USB_GENCONFIG_2);
 
 	return 0;
 }
diff --git a/include/linux/usb/msm_hsusb_hw.h b/include/linux/usb/msm_hsusb_hw.h
index e159b39f67a2..974c3796a23f 100644
--- a/include/linux/usb/msm_hsusb_hw.h
+++ b/include/linux/usb/msm_hsusb_hw.h
@@ -22,6 +22,7 @@
 #define USB_AHBBURST         (MSM_USB_BASE + 0x0090)
 #define USB_AHBMODE          (MSM_USB_BASE + 0x0098)
 #define USB_GENCONFIG_2      (MSM_USB_BASE + 0x00a0)
+#define ULPI_TX_PKT_EN_CLR_FIX	BIT(19)
 
 #define USB_CAPLENGTH        (MSM_USB_BASE + 0x0100) /* 8 bit */
 
-- 
cgit v1.2.3


From 203d4f347d86aa9e78342457aa7a3844c4fadd1d Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 14 Jan 2016 16:24:45 +0100
Subject: reset: Make reset_control_ops const

The ops pointer is holding a pointer to a structure that is usually not
modified. Make it const.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/linux/reset-controller.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h
index ce6b962ffed4..a3a5bcdb1d02 100644
--- a/include/linux/reset-controller.h
+++ b/include/linux/reset-controller.h
@@ -38,7 +38,7 @@ struct of_phandle_args;
  * @nr_resets: number of reset controls in this reset controller device
  */
 struct reset_controller_dev {
-	struct reset_control_ops *ops;
+	const struct reset_control_ops *ops;
 	struct module *owner;
 	struct list_head list;
 	struct device_node *of_node;
-- 
cgit v1.2.3


From 8d1a0ae724ad74ef7946a45e3b2d3e01f39df02b Mon Sep 17 00:00:00 2001
From: Martin Fuzzey <mfuzzey@parkeon.com>
Date: Wed, 13 Jan 2016 23:36:26 -0500
Subject: ARM: perf: Set ARMv7 SDER SUNIDEN bit

ARMv7 counters other than the CPU cycle counter only work if the Secure
Debug Enable Register (SDER) SUNIDEN bit is set.

Since access to the SDER is only possible in secure state, it will
only be done if the device tree property "secure-reg-access" is set.

Without this:

 Performance counter stats for 'sleep 1':

          14606094 cycles                    #    0.000 GHz
                 0 instructions              #    0.00  insns per cycle

After applying:

 Performance counter stats for 'sleep 1':

           5843809 cycles
           2566484 instructions              #    0.44  insns per cycle

       1.020144000 seconds time elapsed

Some platforms (eg i.MX53) may also need additional platform specific
setup.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Martin Fuzzey <mfuzzey@parkeon.com>
Signed-off-by: Pooya Keshavarzi <Pooya.Keshavarzi@de.bosch.com>
Signed-off-by: George G. Davis <george_davis@mentor.com>
[will: add warning if property is found on arm64]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/devicetree/bindings/arm/pmu.txt | 10 ++++++++++
 arch/arm/kernel/perf_event_v7.c               | 13 ++++++++++++-
 drivers/perf/arm_pmu.c                        |  9 +++++++++
 include/linux/perf/arm_pmu.h                  |  1 +
 4 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 56518839f52a..b6056d3bca06 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -46,6 +46,16 @@ Optional properties:
 - qcom,no-pc-write : Indicates that this PMU doesn't support the 0xc and 0xd
                      events.
 
+- secure-reg-access : Indicates that the ARMv7 Secure Debug Enable Register
+		      (SDER) is accessible. This will cause the driver to do
+		      any setup required that is only possible in ARMv7 secure
+		      state. If not present the ARMv7 SDER will not be touched,
+		      which means the PMU may fail to operate unless external
+		      code (bootloader or security monitor) has performed the
+		      appropriate initialisation. Note that this property is
+		      not valid for non-ARMv7 CPUs or ARMv7 CPUs booting Linux
+		      in Non-secure state.
+
 Example:
 
 pmu {
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 4152158f6e6a..15063851cd10 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -712,6 +712,11 @@ static const struct attribute_group *armv7_pmuv2_attr_groups[] = {
 #define	ARMV7_EXCLUDE_USER	(1 << 30)
 #define	ARMV7_INCLUDE_HYP	(1 << 27)
 
+/*
+ * Secure debug enable reg
+ */
+#define ARMV7_SDER_SUNIDEN	BIT(1) /* Permit non-invasive debug */
+
 static inline u32 armv7_pmnc_read(void)
 {
 	u32 val;
@@ -1094,7 +1099,13 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event,
 static void armv7pmu_reset(void *info)
 {
 	struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
-	u32 idx, nb_cnt = cpu_pmu->num_events;
+	u32 idx, nb_cnt = cpu_pmu->num_events, val;
+
+	if (cpu_pmu->secure_access) {
+		asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val));
+		val |= ARMV7_SDER_SUNIDEN;
+		asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (val));
+	}
 
 	/* The counter and interrupt enable registers are unknown at reset. */
 	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 166637f2917c..eb5bee07526b 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -889,6 +889,15 @@ int arm_pmu_device_probe(struct platform_device *pdev,
 	if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
 		init_fn = of_id->data;
 
+		pmu->secure_access = of_property_read_bool(pdev->dev.of_node,
+							   "secure-reg-access");
+
+		/* arm64 systems boot only as non-secure */
+		if (IS_ENABLED(CONFIG_ARM64) && pmu->secure_access) {
+			pr_warn("ignoring \"secure-reg-access\" property for arm64\n");
+			pmu->secure_access = false;
+		}
+
 		ret = of_pmu_irq_cfg(pmu);
 		if (!ret)
 			ret = init_fn(pmu);
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 83b5e34c6580..2d5eaaa90078 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -104,6 +104,7 @@ struct arm_pmu {
 	atomic_t	active_events;
 	struct mutex	reserve_mutex;
 	u64		max_period;
+	bool		secure_access; /* 32-bit ARM only */
 	struct platform_device	*plat_device;
 	struct pmu_hw_events	__percpu *hw_events;
 	struct notifier_block	hotplug_nb;
-- 
cgit v1.2.3


From f8543c06b3948102d50ee6dfdbb06f1e7945baf1 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Mon, 7 Dec 2015 23:25:57 +0100
Subject: mtd: inftl: kill unused oobinfo field

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/inftl.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/inftl.h b/include/linux/mtd/inftl.h
index 02cd5f9b79b8..8255118be0f0 100644
--- a/include/linux/mtd/inftl.h
+++ b/include/linux/mtd/inftl.h
@@ -44,7 +44,6 @@ struct INFTLrecord {
 	unsigned int nb_blocks;		/* number of physical blocks */
 	unsigned int nb_boot_blocks;	/* number of blocks used by the bios */
 	struct erase_info instr;
-	struct nand_ecclayout oobinfo;
 };
 
 int INFTL_mount(struct INFTLrecord *s);
-- 
cgit v1.2.3


From 2c9e57799f1b7ef35a9e7f774edac6b3feac6399 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Mon, 7 Dec 2015 23:25:58 +0100
Subject: mtd: nftl: kill unused oobinfo field

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/nftl.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nftl.h b/include/linux/mtd/nftl.h
index b059629e22bc..044daa02b8ff 100644
--- a/include/linux/mtd/nftl.h
+++ b/include/linux/mtd/nftl.h
@@ -50,7 +50,6 @@ struct NFTLrecord {
         unsigned int nb_blocks;		/* number of physical blocks */
         unsigned int nb_boot_blocks;	/* number of blocks used by the bios */
         struct erase_info instr;
-	struct nand_ecclayout oobinfo;
 };
 
 int NFTL_mount(struct NFTLrecord *s);
-- 
cgit v1.2.3


From f88f44cbf10cf5bd42d6e46ea6625d9bfa5f155d Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Mon, 7 Dec 2015 23:25:59 +0100
Subject: mtd: nand: s3c2410: kill the ->ecc_layout field

The s3c2410 is allowing board data to overload the default ECC layout
defined inside the driver, but this feature is not used by board
specific definitions.
Kill this field so that we can easily move to a model where ecclayout
are dynamically allocated by the NAND controller driver.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 arch/arm/plat-samsung/devs.c                   | 9 ---------
 drivers/mtd/nand/s3c2410.c                     | 3 ---
 include/linux/platform_data/mtd-nand-s3c2410.h | 1 -
 3 files changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c
index b53d4ff3befb..84baa16f4c0b 100644
--- a/arch/arm/plat-samsung/devs.c
+++ b/arch/arm/plat-samsung/devs.c
@@ -727,15 +727,6 @@ static int __init s3c_nand_copy_set(struct s3c2410_nand_set *set)
 			return -ENOMEM;
 	}
 
-	if (set->ecc_layout) {
-		ptr = kmemdup(set->ecc_layout,
-			      sizeof(struct nand_ecclayout), GFP_KERNEL);
-		set->ecc_layout = ptr;
-
-		if (!ptr)
-			return -ENOMEM;
-	}
-
 	return 0;
 }
 
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index 01ac74fa3b95..9c9397b54b2c 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -861,9 +861,6 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 	chip->ecc.mode	    = NAND_ECC_SOFT;
 #endif
 
-	if (set->ecc_layout != NULL)
-		chip->ecc.layout = set->ecc_layout;
-
 	if (set->disable_ecc)
 		chip->ecc.mode	= NAND_ECC_NONE;
 
diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h
index 36bb92172f47..c55e42ee57fa 100644
--- a/include/linux/platform_data/mtd-nand-s3c2410.h
+++ b/include/linux/platform_data/mtd-nand-s3c2410.h
@@ -40,7 +40,6 @@ struct s3c2410_nand_set {
 	char			*name;
 	int			*nr_map;
 	struct mtd_partition	*partitions;
-	struct nand_ecclayout	*ecc_layout;
 };
 
 struct s3c2410_platform_nand {
-- 
cgit v1.2.3


From 02db97a9de1c80bd5551ba46d901cb4d912f78f2 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Mon, 7 Dec 2015 23:26:01 +0100
Subject: mtd: nand: kill unused ->ecclayout field in platform_nand_chip struct

This field is not set in any board file and can thus be dropped.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/plat_nand.c | 1 -
 include/linux/mtd/nand.h     | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/plat_nand.c b/drivers/mtd/nand/plat_nand.c
index a0e26dea1424..e4e50da30444 100644
--- a/drivers/mtd/nand/plat_nand.c
+++ b/drivers/mtd/nand/plat_nand.c
@@ -73,7 +73,6 @@ static int plat_nand_probe(struct platform_device *pdev)
 	data->chip.bbt_options |= pdata->chip.bbt_options;
 
 	data->chip.ecc.hwctl = pdata->ctrl.hwcontrol;
-	data->chip.ecc.layout = pdata->chip.ecclayout;
 	data->chip.ecc.mode = NAND_ECC_SOFT;
 
 	platform_set_drvdata(pdev, data);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index a13dfd5bc58b..7604f4be3386 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -902,7 +902,6 @@ extern int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len,
  * @chip_delay:		R/B delay value in us
  * @options:		Option flags, e.g. 16bit buswidth
  * @bbt_options:	BBT option flags, e.g. NAND_BBT_USE_FLASH
- * @ecclayout:		ECC layout info structure
  * @part_probe_types:	NULL-terminated array of probe types
  */
 struct platform_nand_chip {
@@ -910,7 +909,6 @@ struct platform_nand_chip {
 	int chip_offset;
 	int nr_partitions;
 	struct mtd_partition *partitions;
-	struct nand_ecclayout *ecclayout;
 	int chip_delay;
 	unsigned int options;
 	unsigned int bbt_options;
-- 
cgit v1.2.3


From 3b5cf20cf439c3e8963c2d2a3f225434d31827e3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 24 Jan 2016 21:17:59 +0800
Subject: sunrpc: Use skcipher and ahash/shash

This patch replaces uses of blkcipher with skcipher and the long
obsolete hash interface with either shash (for non-SG users) and
ahash.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/sunrpc/gss_krb5.h       |  32 ++--
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 350 +++++++++++++++++++++-------------
 net/sunrpc/auth_gss/gss_krb5_keys.c   |  12 +-
 net/sunrpc/auth_gss/gss_krb5_mech.c   |  89 ++++-----
 net/sunrpc/auth_gss/gss_krb5_seqnum.c |  22 +--
 net/sunrpc/auth_gss/gss_krb5_wrap.c   |  24 +--
 6 files changed, 306 insertions(+), 223 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index df02a4188487..7df625d41e35 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -36,7 +36,7 @@
  *
  */
 
-#include <linux/crypto.h>
+#include <crypto/skcipher.h>
 #include <linux/sunrpc/auth_gss.h>
 #include <linux/sunrpc/gss_err.h>
 #include <linux/sunrpc/gss_asn1.h>
@@ -71,10 +71,10 @@ struct gss_krb5_enctype {
 	const u32		keyed_cksum;	/* is it a keyed cksum? */
 	const u32		keybytes;	/* raw key len, in bytes */
 	const u32		keylength;	/* final key len, in bytes */
-	u32 (*encrypt) (struct crypto_blkcipher *tfm,
+	u32 (*encrypt) (struct crypto_skcipher *tfm,
 			void *iv, void *in, void *out,
 			int length);		/* encryption function */
-	u32 (*decrypt) (struct crypto_blkcipher *tfm,
+	u32 (*decrypt) (struct crypto_skcipher *tfm,
 			void *iv, void *in, void *out,
 			int length);		/* decryption function */
 	u32 (*mk_key) (const struct gss_krb5_enctype *gk5e,
@@ -98,12 +98,12 @@ struct krb5_ctx {
 	u32			enctype;
 	u32			flags;
 	const struct gss_krb5_enctype *gk5e; /* enctype-specific info */
-	struct crypto_blkcipher	*enc;
-	struct crypto_blkcipher	*seq;
-	struct crypto_blkcipher *acceptor_enc;
-	struct crypto_blkcipher *initiator_enc;
-	struct crypto_blkcipher *acceptor_enc_aux;
-	struct crypto_blkcipher *initiator_enc_aux;
+	struct crypto_skcipher	*enc;
+	struct crypto_skcipher	*seq;
+	struct crypto_skcipher *acceptor_enc;
+	struct crypto_skcipher *initiator_enc;
+	struct crypto_skcipher *acceptor_enc_aux;
+	struct crypto_skcipher *initiator_enc_aux;
 	u8			Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */
 	u8			cksum[GSS_KRB5_MAX_KEYLEN];
 	s32			endtime;
@@ -262,24 +262,24 @@ gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset,
 
 
 u32
-krb5_encrypt(struct crypto_blkcipher *key,
+krb5_encrypt(struct crypto_skcipher *key,
 	     void *iv, void *in, void *out, int length);
 
 u32
-krb5_decrypt(struct crypto_blkcipher *key,
+krb5_decrypt(struct crypto_skcipher *key,
 	     void *iv, void *in, void *out, int length); 
 
 int
-gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *outbuf,
+gss_encrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *outbuf,
 		    int offset, struct page **pages);
 
 int
-gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *inbuf,
+gss_decrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *inbuf,
 		    int offset);
 
 s32
 krb5_make_seq_num(struct krb5_ctx *kctx,
-		struct crypto_blkcipher *key,
+		struct crypto_skcipher *key,
 		int direction,
 		u32 seqnum, unsigned char *cksum, unsigned char *buf);
 
@@ -320,12 +320,12 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset,
 
 int
 krb5_rc4_setup_seq_key(struct krb5_ctx *kctx,
-		       struct crypto_blkcipher *cipher,
+		       struct crypto_skcipher *cipher,
 		       unsigned char *cksum);
 
 int
 krb5_rc4_setup_enc_key(struct krb5_ctx *kctx,
-		       struct crypto_blkcipher *cipher,
+		       struct crypto_skcipher *cipher,
 		       s32 seqnum);
 void
 gss_krb5_make_confounder(char *p, u32 conflen);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index fee3c15a4b52..d94a8e1e9f05 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -34,11 +34,12 @@
  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  */
 
+#include <crypto/hash.h>
+#include <crypto/skcipher.h>
 #include <linux/err.h>
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
-#include <linux/crypto.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/random.h>
@@ -51,7 +52,7 @@
 
 u32
 krb5_encrypt(
-	struct crypto_blkcipher *tfm,
+	struct crypto_skcipher *tfm,
 	void * iv,
 	void * in,
 	void * out,
@@ -60,24 +61,28 @@ krb5_encrypt(
 	u32 ret = -EINVAL;
 	struct scatterlist sg[1];
 	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
-	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
+	SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
-	if (length % crypto_blkcipher_blocksize(tfm) != 0)
+	if (length % crypto_skcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+	if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
 		dprintk("RPC:       gss_k5encrypt: tfm iv size too large %d\n",
-			crypto_blkcipher_ivsize(tfm));
+			crypto_skcipher_ivsize(tfm));
 		goto out;
 	}
 
 	if (iv)
-		memcpy(local_iv, iv, crypto_blkcipher_ivsize(tfm));
+		memcpy(local_iv, iv, crypto_skcipher_ivsize(tfm));
 
 	memcpy(out, in, length);
 	sg_init_one(sg, out, length);
 
-	ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length);
+	skcipher_request_set_callback(req, 0, NULL, NULL);
+	skcipher_request_set_crypt(req, sg, sg, length, local_iv);
+
+	ret = crypto_skcipher_encrypt(req);
+	skcipher_request_zero(req);
 out:
 	dprintk("RPC:       krb5_encrypt returns %d\n", ret);
 	return ret;
@@ -85,7 +90,7 @@ out:
 
 u32
 krb5_decrypt(
-     struct crypto_blkcipher *tfm,
+     struct crypto_skcipher *tfm,
      void * iv,
      void * in,
      void * out,
@@ -94,23 +99,27 @@ krb5_decrypt(
 	u32 ret = -EINVAL;
 	struct scatterlist sg[1];
 	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
-	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
+	SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
-	if (length % crypto_blkcipher_blocksize(tfm) != 0)
+	if (length % crypto_skcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+	if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
 		dprintk("RPC:       gss_k5decrypt: tfm iv size too large %d\n",
-			crypto_blkcipher_ivsize(tfm));
+			crypto_skcipher_ivsize(tfm));
 		goto out;
 	}
 	if (iv)
-		memcpy(local_iv,iv, crypto_blkcipher_ivsize(tfm));
+		memcpy(local_iv,iv, crypto_skcipher_ivsize(tfm));
 
 	memcpy(out, in, length);
 	sg_init_one(sg, out, length);
 
-	ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length);
+	skcipher_request_set_callback(req, 0, NULL, NULL);
+	skcipher_request_set_crypt(req, sg, sg, length, local_iv);
+
+	ret = crypto_skcipher_decrypt(req);
+	skcipher_request_zero(req);
 out:
 	dprintk("RPC:       gss_k5decrypt returns %d\n",ret);
 	return ret;
@@ -119,9 +128,11 @@ out:
 static int
 checksummer(struct scatterlist *sg, void *data)
 {
-	struct hash_desc *desc = data;
+	struct ahash_request *req = data;
+
+	ahash_request_set_crypt(req, sg, NULL, sg->length);
 
-	return crypto_hash_update(desc, sg, sg->length);
+	return crypto_ahash_update(req);
 }
 
 static int
@@ -152,13 +163,13 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
 		       struct xdr_buf *body, int body_offset, u8 *cksumkey,
 		       unsigned int usage, struct xdr_netobj *cksumout)
 {
-	struct hash_desc                desc;
 	struct scatterlist              sg[1];
 	int err;
 	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
 	u8 rc4salt[4];
-	struct crypto_hash *md5;
-	struct crypto_hash *hmac_md5;
+	struct crypto_ahash *md5;
+	struct crypto_ahash *hmac_md5;
+	struct ahash_request *req;
 
 	if (cksumkey == NULL)
 		return GSS_S_FAILURE;
@@ -174,61 +185,79 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
 		return GSS_S_FAILURE;
 	}
 
-	md5 = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+	md5 = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(md5))
 		return GSS_S_FAILURE;
 
-	hmac_md5 = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
-				     CRYPTO_ALG_ASYNC);
+	hmac_md5 = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0,
+				      CRYPTO_ALG_ASYNC);
 	if (IS_ERR(hmac_md5)) {
-		crypto_free_hash(md5);
+		crypto_free_ahash(md5);
+		return GSS_S_FAILURE;
+	}
+
+	req = ahash_request_alloc(md5, GFP_KERNEL);
+	if (!req) {
+		crypto_free_ahash(hmac_md5);
+		crypto_free_ahash(md5);
 		return GSS_S_FAILURE;
 	}
 
-	desc.tfm = md5;
-	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
 
-	err = crypto_hash_init(&desc);
+	err = crypto_ahash_init(req);
 	if (err)
 		goto out;
 	sg_init_one(sg, rc4salt, 4);
-	err = crypto_hash_update(&desc, sg, 4);
+	ahash_request_set_crypt(req, sg, NULL, 4);
+	err = crypto_ahash_update(req);
 	if (err)
 		goto out;
 
 	sg_init_one(sg, header, hdrlen);
-	err = crypto_hash_update(&desc, sg, hdrlen);
+	ahash_request_set_crypt(req, sg, NULL, hdrlen);
+	err = crypto_ahash_update(req);
 	if (err)
 		goto out;
 	err = xdr_process_buf(body, body_offset, body->len - body_offset,
-			      checksummer, &desc);
+			      checksummer, req);
 	if (err)
 		goto out;
-	err = crypto_hash_final(&desc, checksumdata);
+	ahash_request_set_crypt(req, NULL, checksumdata, 0);
+	err = crypto_ahash_final(req);
 	if (err)
 		goto out;
 
-	desc.tfm = hmac_md5;
-	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	ahash_request_free(req);
+	req = ahash_request_alloc(hmac_md5, GFP_KERNEL);
+	if (!req) {
+		crypto_free_ahash(hmac_md5);
+		crypto_free_ahash(md5);
+		return GSS_S_FAILURE;
+	}
+
+	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
 
-	err = crypto_hash_init(&desc);
+	err = crypto_ahash_init(req);
 	if (err)
 		goto out;
-	err = crypto_hash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
+	err = crypto_ahash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
 	if (err)
 		goto out;
 
-	sg_init_one(sg, checksumdata, crypto_hash_digestsize(md5));
-	err = crypto_hash_digest(&desc, sg, crypto_hash_digestsize(md5),
-				 checksumdata);
+	sg_init_one(sg, checksumdata, crypto_ahash_digestsize(md5));
+	ahash_request_set_crypt(req, sg, checksumdata,
+				crypto_ahash_digestsize(md5));
+	err = crypto_ahash_digest(req);
 	if (err)
 		goto out;
 
 	memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
 	cksumout->len = kctx->gk5e->cksumlength;
 out:
-	crypto_free_hash(md5);
-	crypto_free_hash(hmac_md5);
+	ahash_request_free(req);
+	crypto_free_ahash(md5);
+	crypto_free_ahash(hmac_md5);
 	return err ? GSS_S_FAILURE : 0;
 }
 
@@ -242,7 +271,8 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
 	      struct xdr_buf *body, int body_offset, u8 *cksumkey,
 	      unsigned int usage, struct xdr_netobj *cksumout)
 {
-	struct hash_desc                desc;
+	struct crypto_ahash *tfm;
+	struct ahash_request *req;
 	struct scatterlist              sg[1];
 	int err;
 	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
@@ -259,32 +289,41 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
 		return GSS_S_FAILURE;
 	}
 
-	desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(desc.tfm))
+	tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
 		return GSS_S_FAILURE;
-	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
-	checksumlen = crypto_hash_digestsize(desc.tfm);
+	req = ahash_request_alloc(tfm, GFP_KERNEL);
+	if (!req) {
+		crypto_free_ahash(tfm);
+		return GSS_S_FAILURE;
+	}
+
+	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+
+	checksumlen = crypto_ahash_digestsize(tfm);
 
 	if (cksumkey != NULL) {
-		err = crypto_hash_setkey(desc.tfm, cksumkey,
-					 kctx->gk5e->keylength);
+		err = crypto_ahash_setkey(tfm, cksumkey,
+					  kctx->gk5e->keylength);
 		if (err)
 			goto out;
 	}
 
-	err = crypto_hash_init(&desc);
+	err = crypto_ahash_init(req);
 	if (err)
 		goto out;
 	sg_init_one(sg, header, hdrlen);
-	err = crypto_hash_update(&desc, sg, hdrlen);
+	ahash_request_set_crypt(req, sg, NULL, hdrlen);
+	err = crypto_ahash_update(req);
 	if (err)
 		goto out;
 	err = xdr_process_buf(body, body_offset, body->len - body_offset,
-			      checksummer, &desc);
+			      checksummer, req);
 	if (err)
 		goto out;
-	err = crypto_hash_final(&desc, checksumdata);
+	ahash_request_set_crypt(req, NULL, checksumdata, 0);
+	err = crypto_ahash_final(req);
 	if (err)
 		goto out;
 
@@ -307,7 +346,8 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
 	}
 	cksumout->len = kctx->gk5e->cksumlength;
 out:
-	crypto_free_hash(desc.tfm);
+	ahash_request_free(req);
+	crypto_free_ahash(tfm);
 	return err ? GSS_S_FAILURE : 0;
 }
 
@@ -323,7 +363,8 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
 		 struct xdr_buf *body, int body_offset, u8 *cksumkey,
 		 unsigned int usage, struct xdr_netobj *cksumout)
 {
-	struct hash_desc desc;
+	struct crypto_ahash *tfm;
+	struct ahash_request *req;
 	struct scatterlist sg[1];
 	int err;
 	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
@@ -340,31 +381,39 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
 		return GSS_S_FAILURE;
 	}
 
-	desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
-							CRYPTO_ALG_ASYNC);
-	if (IS_ERR(desc.tfm))
+	tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
 		return GSS_S_FAILURE;
-	checksumlen = crypto_hash_digestsize(desc.tfm);
-	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	checksumlen = crypto_ahash_digestsize(tfm);
+
+	req = ahash_request_alloc(tfm, GFP_KERNEL);
+	if (!req) {
+		crypto_free_ahash(tfm);
+		return GSS_S_FAILURE;
+	}
 
-	err = crypto_hash_setkey(desc.tfm, cksumkey, kctx->gk5e->keylength);
+	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+
+	err = crypto_ahash_setkey(tfm, cksumkey, kctx->gk5e->keylength);
 	if (err)
 		goto out;
 
-	err = crypto_hash_init(&desc);
+	err = crypto_ahash_init(req);
 	if (err)
 		goto out;
 	err = xdr_process_buf(body, body_offset, body->len - body_offset,
-			      checksummer, &desc);
+			      checksummer, req);
 	if (err)
 		goto out;
 	if (header != NULL) {
 		sg_init_one(sg, header, hdrlen);
-		err = crypto_hash_update(&desc, sg, hdrlen);
+		ahash_request_set_crypt(req, sg, NULL, hdrlen);
+		err = crypto_ahash_update(req);
 		if (err)
 			goto out;
 	}
-	err = crypto_hash_final(&desc, checksumdata);
+	ahash_request_set_crypt(req, NULL, checksumdata, 0);
+	err = crypto_ahash_final(req);
 	if (err)
 		goto out;
 
@@ -381,13 +430,14 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
 		break;
 	}
 out:
-	crypto_free_hash(desc.tfm);
+	ahash_request_free(req);
+	crypto_free_ahash(tfm);
 	return err ? GSS_S_FAILURE : 0;
 }
 
 struct encryptor_desc {
 	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
-	struct blkcipher_desc desc;
+	struct skcipher_request *req;
 	int pos;
 	struct xdr_buf *outbuf;
 	struct page **pages;
@@ -402,6 +452,7 @@ encryptor(struct scatterlist *sg, void *data)
 {
 	struct encryptor_desc *desc = data;
 	struct xdr_buf *outbuf = desc->outbuf;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
 	struct page *in_page;
 	int thislen = desc->fraglen + sg->length;
 	int fraglen, ret;
@@ -427,7 +478,7 @@ encryptor(struct scatterlist *sg, void *data)
 	desc->fraglen += sg->length;
 	desc->pos += sg->length;
 
-	fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
+	fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
 	thislen -= fraglen;
 
 	if (thislen == 0)
@@ -436,8 +487,10 @@ encryptor(struct scatterlist *sg, void *data)
 	sg_mark_end(&desc->infrags[desc->fragno - 1]);
 	sg_mark_end(&desc->outfrags[desc->fragno - 1]);
 
-	ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags,
-					  desc->infrags, thislen);
+	skcipher_request_set_crypt(desc->req, desc->infrags, desc->outfrags,
+				   thislen, desc->iv);
+
+	ret = crypto_skcipher_encrypt(desc->req);
 	if (ret)
 		return ret;
 
@@ -459,18 +512,20 @@ encryptor(struct scatterlist *sg, void *data)
 }
 
 int
-gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
+gss_encrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
 		    int offset, struct page **pages)
 {
 	int ret;
 	struct encryptor_desc desc;
+	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+
+	BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
 
-	BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0);
+	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_callback(req, 0, NULL, NULL);
 
 	memset(desc.iv, 0, sizeof(desc.iv));
-	desc.desc.tfm = tfm;
-	desc.desc.info = desc.iv;
-	desc.desc.flags = 0;
+	desc.req = req;
 	desc.pos = offset;
 	desc.outbuf = buf;
 	desc.pages = pages;
@@ -481,12 +536,13 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
 	sg_init_table(desc.outfrags, 4);
 
 	ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc);
+	skcipher_request_zero(req);
 	return ret;
 }
 
 struct decryptor_desc {
 	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
-	struct blkcipher_desc desc;
+	struct skcipher_request *req;
 	struct scatterlist frags[4];
 	int fragno;
 	int fraglen;
@@ -497,6 +553,7 @@ decryptor(struct scatterlist *sg, void *data)
 {
 	struct decryptor_desc *desc = data;
 	int thislen = desc->fraglen + sg->length;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
 	int fraglen, ret;
 
 	/* Worst case is 4 fragments: head, end of page 1, start
@@ -507,7 +564,7 @@ decryptor(struct scatterlist *sg, void *data)
 	desc->fragno++;
 	desc->fraglen += sg->length;
 
-	fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
+	fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
 	thislen -= fraglen;
 
 	if (thislen == 0)
@@ -515,8 +572,10 @@ decryptor(struct scatterlist *sg, void *data)
 
 	sg_mark_end(&desc->frags[desc->fragno - 1]);
 
-	ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags,
-					  desc->frags, thislen);
+	skcipher_request_set_crypt(desc->req, desc->frags, desc->frags,
+				   thislen, desc->iv);
+
+	ret = crypto_skcipher_decrypt(desc->req);
 	if (ret)
 		return ret;
 
@@ -535,24 +594,29 @@ decryptor(struct scatterlist *sg, void *data)
 }
 
 int
-gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
+gss_decrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
 		    int offset)
 {
+	int ret;
 	struct decryptor_desc desc;
+	SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
 	/* XXXJBF: */
-	BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0);
+	BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
+
+	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_callback(req, 0, NULL, NULL);
 
 	memset(desc.iv, 0, sizeof(desc.iv));
-	desc.desc.tfm = tfm;
-	desc.desc.info = desc.iv;
-	desc.desc.flags = 0;
+	desc.req = req;
 	desc.fragno = 0;
 	desc.fraglen = 0;
 
 	sg_init_table(desc.frags, 4);
 
-	return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
+	ret = xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
+	skcipher_request_zero(req);
+	return ret;
 }
 
 /*
@@ -594,12 +658,12 @@ xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
 }
 
 static u32
-gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
+gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
 		   u32 offset, u8 *iv, struct page **pages, int encrypt)
 {
 	u32 ret;
 	struct scatterlist sg[1];
-	struct blkcipher_desc desc = { .tfm = cipher, .info = iv };
+	SKCIPHER_REQUEST_ON_STACK(req, cipher);
 	u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2];
 	struct page **save_pages;
 	u32 len = buf->len - offset;
@@ -625,10 +689,16 @@ gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
 
 	sg_init_one(sg, data, len);
 
+	skcipher_request_set_tfm(req, cipher);
+	skcipher_request_set_callback(req, 0, NULL, NULL);
+	skcipher_request_set_crypt(req, sg, sg, len, iv);
+
 	if (encrypt)
-		ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
+		ret = crypto_skcipher_encrypt(req);
 	else
-		ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, len);
+		ret = crypto_skcipher_decrypt(req);
+
+	skcipher_request_zero(req);
 
 	if (ret)
 		goto out;
@@ -647,7 +717,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 	struct xdr_netobj hmac;
 	u8 *cksumkey;
 	u8 *ecptr;
-	struct crypto_blkcipher *cipher, *aux_cipher;
+	struct crypto_skcipher *cipher, *aux_cipher;
 	int blocksize;
 	struct page **save_pages;
 	int nblocks, nbytes;
@@ -666,7 +736,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 		cksumkey = kctx->acceptor_integ;
 		usage = KG_USAGE_ACCEPTOR_SEAL;
 	}
-	blocksize = crypto_blkcipher_blocksize(cipher);
+	blocksize = crypto_skcipher_blocksize(cipher);
 
 	/* hide the gss token header and insert the confounder */
 	offset += GSS_KRB5_TOK_HDR_LEN;
@@ -719,20 +789,24 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 	memset(desc.iv, 0, sizeof(desc.iv));
 
 	if (cbcbytes) {
+		SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
+
 		desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
 		desc.fragno = 0;
 		desc.fraglen = 0;
 		desc.pages = pages;
 		desc.outbuf = buf;
-		desc.desc.info = desc.iv;
-		desc.desc.flags = 0;
-		desc.desc.tfm = aux_cipher;
+		desc.req = req;
+
+		skcipher_request_set_tfm(req, aux_cipher);
+		skcipher_request_set_callback(req, 0, NULL, NULL);
 
 		sg_init_table(desc.infrags, 4);
 		sg_init_table(desc.outfrags, 4);
 
 		err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN,
 				      cbcbytes, encryptor, &desc);
+		skcipher_request_zero(req);
 		if (err)
 			goto out_err;
 	}
@@ -763,7 +837,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 	struct xdr_buf subbuf;
 	u32 ret = 0;
 	u8 *cksum_key;
-	struct crypto_blkcipher *cipher, *aux_cipher;
+	struct crypto_skcipher *cipher, *aux_cipher;
 	struct xdr_netobj our_hmac_obj;
 	u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
 	u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
@@ -782,7 +856,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 		cksum_key = kctx->initiator_integ;
 		usage = KG_USAGE_INITIATOR_SEAL;
 	}
-	blocksize = crypto_blkcipher_blocksize(cipher);
+	blocksize = crypto_skcipher_blocksize(cipher);
 
 
 	/* create a segment skipping the header and leaving out the checksum */
@@ -799,15 +873,19 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 	memset(desc.iv, 0, sizeof(desc.iv));
 
 	if (cbcbytes) {
+		SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
+
 		desc.fragno = 0;
 		desc.fraglen = 0;
-		desc.desc.info = desc.iv;
-		desc.desc.flags = 0;
-		desc.desc.tfm = aux_cipher;
+		desc.req = req;
+
+		skcipher_request_set_tfm(req, aux_cipher);
+		skcipher_request_set_callback(req, 0, NULL, NULL);
 
 		sg_init_table(desc.frags, 4);
 
 		ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc);
+		skcipher_request_zero(req);
 		if (ret)
 			goto out_err;
 	}
@@ -850,61 +928,62 @@ out_err:
  * Set the key of the given cipher.
  */
 int
-krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
+krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
 		       unsigned char *cksum)
 {
-	struct crypto_hash *hmac;
-	struct hash_desc desc;
-	struct scatterlist sg[1];
+	struct crypto_shash *hmac;
+	struct shash_desc *desc;
 	u8 Kseq[GSS_KRB5_MAX_KEYLEN];
 	u32 zeroconstant = 0;
 	int err;
 
 	dprintk("%s: entered\n", __func__);
 
-	hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	hmac = crypto_alloc_shash(kctx->gk5e->cksum_name, 0, 0);
 	if (IS_ERR(hmac)) {
 		dprintk("%s: error %ld, allocating hash '%s'\n",
 			__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
 		return PTR_ERR(hmac);
 	}
 
-	desc.tfm = hmac;
-	desc.flags = 0;
+	desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc) {
+		dprintk("%s: failed to allocate shash descriptor for '%s'\n",
+			__func__, kctx->gk5e->cksum_name);
+		crypto_free_shash(hmac);
+		return -ENOMEM;
+	}
 
-	err = crypto_hash_init(&desc);
-	if (err)
-		goto out_err;
+	desc->tfm = hmac;
+	desc->flags = 0;
 
 	/* Compute intermediate Kseq from session key */
-	err = crypto_hash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
+	err = crypto_shash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
 	if (err)
 		goto out_err;
 
-	sg_init_one(sg, &zeroconstant, 4);
-	err = crypto_hash_digest(&desc, sg, 4, Kseq);
+	err = crypto_shash_digest(desc, (u8 *)&zeroconstant, 4, Kseq);
 	if (err)
 		goto out_err;
 
 	/* Compute final Kseq from the checksum and intermediate Kseq */
-	err = crypto_hash_setkey(hmac, Kseq, kctx->gk5e->keylength);
+	err = crypto_shash_setkey(hmac, Kseq, kctx->gk5e->keylength);
 	if (err)
 		goto out_err;
 
-	sg_set_buf(sg, cksum, 8);
-
-	err = crypto_hash_digest(&desc, sg, 8, Kseq);
+	err = crypto_shash_digest(desc, cksum, 8, Kseq);
 	if (err)
 		goto out_err;
 
-	err = crypto_blkcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
+	err = crypto_skcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
 	if (err)
 		goto out_err;
 
 	err = 0;
 
 out_err:
-	crypto_free_hash(hmac);
+	kzfree(desc);
+	crypto_free_shash(hmac);
 	dprintk("%s: returning %d\n", __func__, err);
 	return err;
 }
@@ -914,12 +993,11 @@ out_err:
  * Set the key of cipher kctx->enc.
  */
 int
-krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
+krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
 		       s32 seqnum)
 {
-	struct crypto_hash *hmac;
-	struct hash_desc desc;
-	struct scatterlist sg[1];
+	struct crypto_shash *hmac;
+	struct shash_desc *desc;
 	u8 Kcrypt[GSS_KRB5_MAX_KEYLEN];
 	u8 zeroconstant[4] = {0};
 	u8 seqnumarray[4];
@@ -927,35 +1005,38 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
 
 	dprintk("%s: entered, seqnum %u\n", __func__, seqnum);
 
-	hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	hmac = crypto_alloc_shash(kctx->gk5e->cksum_name, 0, 0);
 	if (IS_ERR(hmac)) {
 		dprintk("%s: error %ld, allocating hash '%s'\n",
 			__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
 		return PTR_ERR(hmac);
 	}
 
-	desc.tfm = hmac;
-	desc.flags = 0;
+	desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc) {
+		dprintk("%s: failed to allocate shash descriptor for '%s'\n",
+			__func__, kctx->gk5e->cksum_name);
+		crypto_free_shash(hmac);
+		return -ENOMEM;
+	}
 
-	err = crypto_hash_init(&desc);
-	if (err)
-		goto out_err;
+	desc->tfm = hmac;
+	desc->flags = 0;
 
 	/* Compute intermediate Kcrypt from session key */
 	for (i = 0; i < kctx->gk5e->keylength; i++)
 		Kcrypt[i] = kctx->Ksess[i] ^ 0xf0;
 
-	err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
+	err = crypto_shash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
 	if (err)
 		goto out_err;
 
-	sg_init_one(sg, zeroconstant, 4);
-	err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
+	err = crypto_shash_digest(desc, zeroconstant, 4, Kcrypt);
 	if (err)
 		goto out_err;
 
 	/* Compute final Kcrypt from the seqnum and intermediate Kcrypt */
-	err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
+	err = crypto_shash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
 	if (err)
 		goto out_err;
 
@@ -964,20 +1045,19 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
 	seqnumarray[2] = (unsigned char) ((seqnum >> 8) & 0xff);
 	seqnumarray[3] = (unsigned char) ((seqnum >> 0) & 0xff);
 
-	sg_set_buf(sg, seqnumarray, 4);
-
-	err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
+	err = crypto_shash_digest(desc, seqnumarray, 4, Kcrypt);
 	if (err)
 		goto out_err;
 
-	err = crypto_blkcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
+	err = crypto_skcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
 	if (err)
 		goto out_err;
 
 	err = 0;
 
 out_err:
-	crypto_free_hash(hmac);
+	kzfree(desc);
+	crypto_free_shash(hmac);
 	dprintk("%s: returning %d\n", __func__, err);
 	return err;
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 234fa8d0fd9b..870133146026 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -54,9 +54,9 @@
  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  */
 
+#include <crypto/skcipher.h>
 #include <linux/err.h>
 #include <linux/types.h>
-#include <linux/crypto.h>
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/lcm.h>
@@ -147,7 +147,7 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 	size_t blocksize, keybytes, keylength, n;
 	unsigned char *inblockdata, *outblockdata, *rawkey;
 	struct xdr_netobj inblock, outblock;
-	struct crypto_blkcipher *cipher;
+	struct crypto_skcipher *cipher;
 	u32 ret = EINVAL;
 
 	blocksize = gk5e->blocksize;
@@ -157,11 +157,11 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 	if ((inkey->len != keylength) || (outkey->len != keylength))
 		goto err_return;
 
-	cipher = crypto_alloc_blkcipher(gk5e->encrypt_name, 0,
-					CRYPTO_ALG_ASYNC);
+	cipher = crypto_alloc_skcipher(gk5e->encrypt_name, 0,
+				       CRYPTO_ALG_ASYNC);
 	if (IS_ERR(cipher))
 		goto err_return;
-	if (crypto_blkcipher_setkey(cipher, inkey->data, inkey->len))
+	if (crypto_skcipher_setkey(cipher, inkey->data, inkey->len))
 		goto err_return;
 
 	/* allocate and set up buffers */
@@ -238,7 +238,7 @@ err_free_in:
 	memset(inblockdata, 0, blocksize);
 	kfree(inblockdata);
 err_free_cipher:
-	crypto_free_blkcipher(cipher);
+	crypto_free_skcipher(cipher);
 err_return:
 	return ret;
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 28db442a0034..71341ccb9890 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -34,6 +34,8 @@
  *
  */
 
+#include <crypto/hash.h>
+#include <crypto/skcipher.h>
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -42,7 +44,6 @@
 #include <linux/sunrpc/auth.h>
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/sunrpc/xdr.h>
-#include <linux/crypto.h>
 #include <linux/sunrpc/gss_krb5_enctypes.h>
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -217,7 +218,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
 
 static inline const void *
 get_key(const void *p, const void *end,
-	struct krb5_ctx *ctx, struct crypto_blkcipher **res)
+	struct krb5_ctx *ctx, struct crypto_skcipher **res)
 {
 	struct xdr_netobj	key;
 	int			alg;
@@ -245,7 +246,7 @@ get_key(const void *p, const void *end,
 	if (IS_ERR(p))
 		goto out_err;
 
-	*res = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
+	*res = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
 							CRYPTO_ALG_ASYNC);
 	if (IS_ERR(*res)) {
 		printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
@@ -253,7 +254,7 @@ get_key(const void *p, const void *end,
 		*res = NULL;
 		goto out_err_free_key;
 	}
-	if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
+	if (crypto_skcipher_setkey(*res, key.data, key.len)) {
 		printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
 			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
 		goto out_err_free_tfm;
@@ -263,7 +264,7 @@ get_key(const void *p, const void *end,
 	return p;
 
 out_err_free_tfm:
-	crypto_free_blkcipher(*res);
+	crypto_free_skcipher(*res);
 out_err_free_key:
 	kfree(key.data);
 	p = ERR_PTR(-EINVAL);
@@ -335,30 +336,30 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	return 0;
 
 out_err_free_key2:
-	crypto_free_blkcipher(ctx->seq);
+	crypto_free_skcipher(ctx->seq);
 out_err_free_key1:
-	crypto_free_blkcipher(ctx->enc);
+	crypto_free_skcipher(ctx->enc);
 out_err_free_mech:
 	kfree(ctx->mech_used.data);
 out_err:
 	return PTR_ERR(p);
 }
 
-static struct crypto_blkcipher *
+static struct crypto_skcipher *
 context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
 {
-	struct crypto_blkcipher *cp;
+	struct crypto_skcipher *cp;
 
-	cp = crypto_alloc_blkcipher(cname, 0, CRYPTO_ALG_ASYNC);
+	cp = crypto_alloc_skcipher(cname, 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(cp)) {
 		dprintk("gss_kerberos_mech: unable to initialize "
 			"crypto algorithm %s\n", cname);
 		return NULL;
 	}
-	if (crypto_blkcipher_setkey(cp, key, ctx->gk5e->keylength)) {
+	if (crypto_skcipher_setkey(cp, key, ctx->gk5e->keylength)) {
 		dprintk("gss_kerberos_mech: error setting key for "
 			"crypto algorithm %s\n", cname);
-		crypto_free_blkcipher(cp);
+		crypto_free_skcipher(cp);
 		return NULL;
 	}
 	return cp;
@@ -412,9 +413,9 @@ context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
 	return 0;
 
 out_free_enc:
-	crypto_free_blkcipher(ctx->enc);
+	crypto_free_skcipher(ctx->enc);
 out_free_seq:
-	crypto_free_blkcipher(ctx->seq);
+	crypto_free_skcipher(ctx->seq);
 out_err:
 	return -EINVAL;
 }
@@ -427,18 +428,17 @@ out_err:
 static int
 context_derive_keys_rc4(struct krb5_ctx *ctx)
 {
-	struct crypto_hash *hmac;
+	struct crypto_shash *hmac;
 	char sigkeyconstant[] = "signaturekey";
 	int slen = strlen(sigkeyconstant) + 1;	/* include null terminator */
-	struct hash_desc desc;
-	struct scatterlist sg[1];
+	struct shash_desc *desc;
 	int err;
 
 	dprintk("RPC:       %s: entered\n", __func__);
 	/*
 	 * derive cksum (aka Ksign) key
 	 */
-	hmac = crypto_alloc_hash(ctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	hmac = crypto_alloc_shash(ctx->gk5e->cksum_name, 0, 0);
 	if (IS_ERR(hmac)) {
 		dprintk("%s: error %ld allocating hash '%s'\n",
 			__func__, PTR_ERR(hmac), ctx->gk5e->cksum_name);
@@ -446,37 +446,40 @@ context_derive_keys_rc4(struct krb5_ctx *ctx)
 		goto out_err;
 	}
 
-	err = crypto_hash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength);
+	err = crypto_shash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength);
 	if (err)
 		goto out_err_free_hmac;
 
-	sg_init_table(sg, 1);
-	sg_set_buf(sg, sigkeyconstant, slen);
 
-	desc.tfm = hmac;
-	desc.flags = 0;
-
-	err = crypto_hash_init(&desc);
-	if (err)
+	desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc) {
+		dprintk("%s: failed to allocate hash descriptor for '%s'\n",
+			__func__, ctx->gk5e->cksum_name);
+		err = -ENOMEM;
 		goto out_err_free_hmac;
+	}
+
+	desc->tfm = hmac;
+	desc->flags = 0;
 
-	err = crypto_hash_digest(&desc, sg, slen, ctx->cksum);
+	err = crypto_shash_digest(desc, sigkeyconstant, slen, ctx->cksum);
+	kzfree(desc);
 	if (err)
 		goto out_err_free_hmac;
 	/*
-	 * allocate hash, and blkciphers for data and seqnum encryption
+	 * allocate hash, and skciphers for data and seqnum encryption
 	 */
-	ctx->enc = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
-					  CRYPTO_ALG_ASYNC);
+	ctx->enc = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
+					 CRYPTO_ALG_ASYNC);
 	if (IS_ERR(ctx->enc)) {
 		err = PTR_ERR(ctx->enc);
 		goto out_err_free_hmac;
 	}
 
-	ctx->seq = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
-					  CRYPTO_ALG_ASYNC);
+	ctx->seq = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
+					 CRYPTO_ALG_ASYNC);
 	if (IS_ERR(ctx->seq)) {
-		crypto_free_blkcipher(ctx->enc);
+		crypto_free_skcipher(ctx->enc);
 		err = PTR_ERR(ctx->seq);
 		goto out_err_free_hmac;
 	}
@@ -486,7 +489,7 @@ context_derive_keys_rc4(struct krb5_ctx *ctx)
 	err = 0;
 
 out_err_free_hmac:
-	crypto_free_hash(hmac);
+	crypto_free_shash(hmac);
 out_err:
 	dprintk("RPC:       %s: returning %d\n", __func__, err);
 	return err;
@@ -588,7 +591,7 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
 			context_v2_alloc_cipher(ctx, "cbc(aes)",
 						ctx->acceptor_seal);
 		if (ctx->acceptor_enc_aux == NULL) {
-			crypto_free_blkcipher(ctx->initiator_enc_aux);
+			crypto_free_skcipher(ctx->initiator_enc_aux);
 			goto out_free_acceptor_enc;
 		}
 	}
@@ -596,9 +599,9 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
 	return 0;
 
 out_free_acceptor_enc:
-	crypto_free_blkcipher(ctx->acceptor_enc);
+	crypto_free_skcipher(ctx->acceptor_enc);
 out_free_initiator_enc:
-	crypto_free_blkcipher(ctx->initiator_enc);
+	crypto_free_skcipher(ctx->initiator_enc);
 out_err:
 	return -EINVAL;
 }
@@ -710,12 +713,12 @@ static void
 gss_delete_sec_context_kerberos(void *internal_ctx) {
 	struct krb5_ctx *kctx = internal_ctx;
 
-	crypto_free_blkcipher(kctx->seq);
-	crypto_free_blkcipher(kctx->enc);
-	crypto_free_blkcipher(kctx->acceptor_enc);
-	crypto_free_blkcipher(kctx->initiator_enc);
-	crypto_free_blkcipher(kctx->acceptor_enc_aux);
-	crypto_free_blkcipher(kctx->initiator_enc_aux);
+	crypto_free_skcipher(kctx->seq);
+	crypto_free_skcipher(kctx->enc);
+	crypto_free_skcipher(kctx->acceptor_enc);
+	crypto_free_skcipher(kctx->initiator_enc);
+	crypto_free_skcipher(kctx->acceptor_enc_aux);
+	crypto_free_skcipher(kctx->initiator_enc_aux);
 	kfree(kctx->mech_used.data);
 	kfree(kctx);
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 20d55c793eb6..c8b9082f4a9d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -31,9 +31,9 @@
  * PERFORMANCE OF THIS SOFTWARE.
  */
 
+#include <crypto/skcipher.h>
 #include <linux/types.h>
 #include <linux/sunrpc/gss_krb5.h>
-#include <linux/crypto.h>
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY        RPCDBG_AUTH
@@ -43,13 +43,13 @@ static s32
 krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
 		      unsigned char *cksum, unsigned char *buf)
 {
-	struct crypto_blkcipher *cipher;
+	struct crypto_skcipher *cipher;
 	unsigned char plain[8];
 	s32 code;
 
 	dprintk("RPC:       %s:\n", __func__);
-	cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
-					CRYPTO_ALG_ASYNC);
+	cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
+				       CRYPTO_ALG_ASYNC);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
@@ -68,12 +68,12 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
 
 	code = krb5_encrypt(cipher, cksum, plain, buf, 8);
 out:
-	crypto_free_blkcipher(cipher);
+	crypto_free_skcipher(cipher);
 	return code;
 }
 s32
 krb5_make_seq_num(struct krb5_ctx *kctx,
-		struct crypto_blkcipher *key,
+		struct crypto_skcipher *key,
 		int direction,
 		u32 seqnum,
 		unsigned char *cksum, unsigned char *buf)
@@ -101,13 +101,13 @@ static s32
 krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
 		     unsigned char *buf, int *direction, s32 *seqnum)
 {
-	struct crypto_blkcipher *cipher;
+	struct crypto_skcipher *cipher;
 	unsigned char plain[8];
 	s32 code;
 
 	dprintk("RPC:       %s:\n", __func__);
-	cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
-					CRYPTO_ALG_ASYNC);
+	cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
+				       CRYPTO_ALG_ASYNC);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
@@ -130,7 +130,7 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
 	*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
 					(plain[2] << 8) | (plain[3]));
 out:
-	crypto_free_blkcipher(cipher);
+	crypto_free_skcipher(cipher);
 	return code;
 }
 
@@ -142,7 +142,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
 {
 	s32 code;
 	unsigned char plain[8];
-	struct crypto_blkcipher *key = kctx->seq;
+	struct crypto_skcipher *key = kctx->seq;
 
 	dprintk("RPC:       krb5_get_seq_num:\n");
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index ca7e92a32f84..765088e4ad84 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -28,12 +28,12 @@
  * SUCH DAMAGES.
  */
 
+#include <crypto/skcipher.h>
 #include <linux/types.h>
 #include <linux/jiffies.h>
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/random.h>
 #include <linux/pagemap.h>
-#include <linux/crypto.h>
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_AUTH
@@ -174,7 +174,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 
 	now = get_seconds();
 
-	blocksize = crypto_blkcipher_blocksize(kctx->enc);
+	blocksize = crypto_skcipher_blocksize(kctx->enc);
 	gss_krb5_add_padding(buf, offset, blocksize);
 	BUG_ON((buf->len - offset) % blocksize);
 	plainlen = conflen + buf->len - offset;
@@ -239,10 +239,10 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 		return GSS_S_FAILURE;
 
 	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
-		struct crypto_blkcipher *cipher;
+		struct crypto_skcipher *cipher;
 		int err;
-		cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
-						CRYPTO_ALG_ASYNC);
+		cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
+					       CRYPTO_ALG_ASYNC);
 		if (IS_ERR(cipher))
 			return GSS_S_FAILURE;
 
@@ -250,7 +250,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 
 		err = gss_encrypt_xdr_buf(cipher, buf,
 					  offset + headlen - conflen, pages);
-		crypto_free_blkcipher(cipher);
+		crypto_free_skcipher(cipher);
 		if (err)
 			return GSS_S_FAILURE;
 	} else {
@@ -327,18 +327,18 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 		return GSS_S_BAD_SIG;
 
 	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
-		struct crypto_blkcipher *cipher;
+		struct crypto_skcipher *cipher;
 		int err;
 
-		cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
-						CRYPTO_ALG_ASYNC);
+		cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
+					       CRYPTO_ALG_ASYNC);
 		if (IS_ERR(cipher))
 			return GSS_S_FAILURE;
 
 		krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
 
 		err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
-		crypto_free_blkcipher(cipher);
+		crypto_free_skcipher(cipher);
 		if (err)
 			return GSS_S_DEFECTIVE_TOKEN;
 	} else {
@@ -371,7 +371,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	/* Copy the data back to the right position.  XXX: Would probably be
 	 * better to copy and encrypt at the same time. */
 
-	blocksize = crypto_blkcipher_blocksize(kctx->enc);
+	blocksize = crypto_skcipher_blocksize(kctx->enc);
 	data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
 					conflen;
 	orig_start = buf->head[0].iov_base + offset;
@@ -473,7 +473,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
 	*ptr++ = 0xff;
 	be16ptr = (__be16 *)ptr;
 
-	blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc);
+	blocksize = crypto_skcipher_blocksize(kctx->acceptor_enc);
 	*be16ptr++ = 0;
 	/* "inner" token header always uses 0 for RRC */
 	*be16ptr++ = 0;
-- 
cgit v1.2.3


From 110492183c4b8f572b16fce096b9d78e2da30baf Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <js1304@gmail.com>
Date: Tue, 26 Jan 2016 17:15:03 +0900
Subject: crypto: compress - remove unused pcomp interface

It is unused now, so remove it.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig                     |  19 --
 crypto/Makefile                    |   2 -
 crypto/pcompress.c                 | 115 -----------
 crypto/testmgr.c                   | 223 ----------------------
 crypto/testmgr.h                   | 144 --------------
 crypto/zlib.c                      | 381 -------------------------------------
 include/crypto/compress.h          | 145 --------------
 include/crypto/internal/compress.h |  28 ---
 include/linux/crypto.h             |   1 -
 9 files changed, 1058 deletions(-)
 delete mode 100644 crypto/pcompress.c
 delete mode 100644 crypto/zlib.c
 delete mode 100644 include/crypto/compress.h
 delete mode 100644 include/crypto/internal/compress.h

(limited to 'include/linux')

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 7240821137fd..c80d34fc2f8c 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -84,15 +84,6 @@ config CRYPTO_RNG_DEFAULT
 	tristate
 	select CRYPTO_DRBG_MENU
 
-config CRYPTO_PCOMP
-	tristate
-	select CRYPTO_PCOMP2
-	select CRYPTO_ALGAPI
-
-config CRYPTO_PCOMP2
-	tristate
-	select CRYPTO_ALGAPI2
-
 config CRYPTO_AKCIPHER2
 	tristate
 	select CRYPTO_ALGAPI2
@@ -122,7 +113,6 @@ config CRYPTO_MANAGER2
 	select CRYPTO_AEAD2
 	select CRYPTO_HASH2
 	select CRYPTO_BLKCIPHER2
-	select CRYPTO_PCOMP2
 	select CRYPTO_AKCIPHER2
 
 config CRYPTO_USER
@@ -1504,15 +1494,6 @@ config CRYPTO_DEFLATE
 
 	  You will most probably want this if using IPSec.
 
-config CRYPTO_ZLIB
-	tristate "Zlib compression algorithm"
-	select CRYPTO_PCOMP
-	select ZLIB_INFLATE
-	select ZLIB_DEFLATE
-	select NLATTR
-	help
-	  This is the zlib algorithm.
-
 config CRYPTO_LZO
 	tristate "LZO compression algorithm"
 	select CRYPTO_ALGAPI
diff --git a/crypto/Makefile b/crypto/Makefile
index 2acdbbd30475..ffe18c9c9bf4 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -28,7 +28,6 @@ crypto_hash-y += ahash.o
 crypto_hash-y += shash.o
 obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o
 
-obj-$(CONFIG_CRYPTO_PCOMP2) += pcompress.o
 obj-$(CONFIG_CRYPTO_AKCIPHER2) += akcipher.o
 
 $(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h
@@ -99,7 +98,6 @@ obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
 obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
 obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
 obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
-obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
 obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o
 obj-$(CONFIG_CRYPTO_CRC32) += crc32.o
diff --git a/crypto/pcompress.c b/crypto/pcompress.c
deleted file mode 100644
index 7a13b4088857..000000000000
--- a/crypto/pcompress.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Partial (de)compression operations.
- *
- * Copyright 2008 Sony Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.
- * If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/crypto.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/seq_file.h>
-#include <linux/string.h>
-#include <linux/cryptouser.h>
-#include <net/netlink.h>
-
-#include <crypto/compress.h>
-#include <crypto/internal/compress.h>
-
-#include "internal.h"
-
-
-static int crypto_pcomp_init(struct crypto_tfm *tfm, u32 type, u32 mask)
-{
-	return 0;
-}
-
-static int crypto_pcomp_init_tfm(struct crypto_tfm *tfm)
-{
-	return 0;
-}
-
-#ifdef CONFIG_NET
-static int crypto_pcomp_report(struct sk_buff *skb, struct crypto_alg *alg)
-{
-	struct crypto_report_comp rpcomp;
-
-	strncpy(rpcomp.type, "pcomp", sizeof(rpcomp.type));
-	if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS,
-		    sizeof(struct crypto_report_comp), &rpcomp))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return -EMSGSIZE;
-}
-#else
-static int crypto_pcomp_report(struct sk_buff *skb, struct crypto_alg *alg)
-{
-	return -ENOSYS;
-}
-#endif
-
-static void crypto_pcomp_show(struct seq_file *m, struct crypto_alg *alg)
-	__attribute__ ((unused));
-static void crypto_pcomp_show(struct seq_file *m, struct crypto_alg *alg)
-{
-	seq_printf(m, "type         : pcomp\n");
-}
-
-static const struct crypto_type crypto_pcomp_type = {
-	.extsize	= crypto_alg_extsize,
-	.init		= crypto_pcomp_init,
-	.init_tfm	= crypto_pcomp_init_tfm,
-#ifdef CONFIG_PROC_FS
-	.show		= crypto_pcomp_show,
-#endif
-	.report		= crypto_pcomp_report,
-	.maskclear	= ~CRYPTO_ALG_TYPE_MASK,
-	.maskset	= CRYPTO_ALG_TYPE_MASK,
-	.type		= CRYPTO_ALG_TYPE_PCOMPRESS,
-	.tfmsize	= offsetof(struct crypto_pcomp, base),
-};
-
-struct crypto_pcomp *crypto_alloc_pcomp(const char *alg_name, u32 type,
-					u32 mask)
-{
-	return crypto_alloc_tfm(alg_name, &crypto_pcomp_type, type, mask);
-}
-EXPORT_SYMBOL_GPL(crypto_alloc_pcomp);
-
-int crypto_register_pcomp(struct pcomp_alg *alg)
-{
-	struct crypto_alg *base = &alg->base;
-
-	base->cra_type = &crypto_pcomp_type;
-	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
-	base->cra_flags |= CRYPTO_ALG_TYPE_PCOMPRESS;
-
-	return crypto_register_alg(base);
-}
-EXPORT_SYMBOL_GPL(crypto_register_pcomp);
-
-int crypto_unregister_pcomp(struct pcomp_alg *alg)
-{
-	return crypto_unregister_alg(&alg->base);
-}
-EXPORT_SYMBOL_GPL(crypto_unregister_pcomp);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Partial (de)compression type");
-MODULE_AUTHOR("Sony Corporation");
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 5c0963d17de7..cbd78c954844 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -96,13 +96,6 @@ struct comp_test_suite {
 	} comp, decomp;
 };
 
-struct pcomp_test_suite {
-	struct {
-		struct pcomp_testvec *vecs;
-		unsigned int count;
-	} comp, decomp;
-};
-
 struct hash_test_suite {
 	struct hash_testvec *vecs;
 	unsigned int count;
@@ -133,7 +126,6 @@ struct alg_test_desc {
 		struct aead_test_suite aead;
 		struct cipher_test_suite cipher;
 		struct comp_test_suite comp;
-		struct pcomp_test_suite pcomp;
 		struct hash_test_suite hash;
 		struct cprng_test_suite cprng;
 		struct drbg_test_suite drbg;
@@ -1293,183 +1285,6 @@ out:
 	return ret;
 }
 
-static int test_pcomp(struct crypto_pcomp *tfm,
-		      struct pcomp_testvec *ctemplate,
-		      struct pcomp_testvec *dtemplate, int ctcount,
-		      int dtcount)
-{
-	const char *algo = crypto_tfm_alg_driver_name(crypto_pcomp_tfm(tfm));
-	unsigned int i;
-	char result[COMP_BUF_SIZE];
-	int res;
-
-	for (i = 0; i < ctcount; i++) {
-		struct comp_request req;
-		unsigned int produced = 0;
-
-		res = crypto_compress_setup(tfm, ctemplate[i].params,
-					    ctemplate[i].paramsize);
-		if (res) {
-			pr_err("alg: pcomp: compression setup failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, res);
-			return res;
-		}
-
-		res = crypto_compress_init(tfm);
-		if (res) {
-			pr_err("alg: pcomp: compression init failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, res);
-			return res;
-		}
-
-		memset(result, 0, sizeof(result));
-
-		req.next_in = ctemplate[i].input;
-		req.avail_in = ctemplate[i].inlen / 2;
-		req.next_out = result;
-		req.avail_out = ctemplate[i].outlen / 2;
-
-		res = crypto_compress_update(tfm, &req);
-		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
-			pr_err("alg: pcomp: compression update failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, res);
-			return res;
-		}
-		if (res > 0)
-			produced += res;
-
-		/* Add remaining input data */
-		req.avail_in += (ctemplate[i].inlen + 1) / 2;
-
-		res = crypto_compress_update(tfm, &req);
-		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
-			pr_err("alg: pcomp: compression update failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, res);
-			return res;
-		}
-		if (res > 0)
-			produced += res;
-
-		/* Provide remaining output space */
-		req.avail_out += COMP_BUF_SIZE - ctemplate[i].outlen / 2;
-
-		res = crypto_compress_final(tfm, &req);
-		if (res < 0) {
-			pr_err("alg: pcomp: compression final failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, res);
-			return res;
-		}
-		produced += res;
-
-		if (COMP_BUF_SIZE - req.avail_out != ctemplate[i].outlen) {
-			pr_err("alg: comp: Compression test %d failed for %s: "
-			       "output len = %d (expected %d)\n", i + 1, algo,
-			       COMP_BUF_SIZE - req.avail_out,
-			       ctemplate[i].outlen);
-			return -EINVAL;
-		}
-
-		if (produced != ctemplate[i].outlen) {
-			pr_err("alg: comp: Compression test %d failed for %s: "
-			       "returned len = %u (expected %d)\n", i + 1,
-			       algo, produced, ctemplate[i].outlen);
-			return -EINVAL;
-		}
-
-		if (memcmp(result, ctemplate[i].output, ctemplate[i].outlen)) {
-			pr_err("alg: pcomp: Compression test %d failed for "
-			       "%s\n", i + 1, algo);
-			hexdump(result, ctemplate[i].outlen);
-			return -EINVAL;
-		}
-	}
-
-	for (i = 0; i < dtcount; i++) {
-		struct comp_request req;
-		unsigned int produced = 0;
-
-		res = crypto_decompress_setup(tfm, dtemplate[i].params,
-					      dtemplate[i].paramsize);
-		if (res) {
-			pr_err("alg: pcomp: decompression setup failed on "
-			       "test %d for %s: error=%d\n", i + 1, algo, res);
-			return res;
-		}
-
-		res = crypto_decompress_init(tfm);
-		if (res) {
-			pr_err("alg: pcomp: decompression init failed on test "
-			       "%d for %s: error=%d\n", i + 1, algo, res);
-			return res;
-		}
-
-		memset(result, 0, sizeof(result));
-
-		req.next_in = dtemplate[i].input;
-		req.avail_in = dtemplate[i].inlen / 2;
-		req.next_out = result;
-		req.avail_out = dtemplate[i].outlen / 2;
-
-		res = crypto_decompress_update(tfm, &req);
-		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
-			pr_err("alg: pcomp: decompression update failed on "
-			       "test %d for %s: error=%d\n", i + 1, algo, res);
-			return res;
-		}
-		if (res > 0)
-			produced += res;
-
-		/* Add remaining input data */
-		req.avail_in += (dtemplate[i].inlen + 1) / 2;
-
-		res = crypto_decompress_update(tfm, &req);
-		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
-			pr_err("alg: pcomp: decompression update failed on "
-			       "test %d for %s: error=%d\n", i + 1, algo, res);
-			return res;
-		}
-		if (res > 0)
-			produced += res;
-
-		/* Provide remaining output space */
-		req.avail_out += COMP_BUF_SIZE - dtemplate[i].outlen / 2;
-
-		res = crypto_decompress_final(tfm, &req);
-		if (res < 0 && (res != -EAGAIN || req.avail_in)) {
-			pr_err("alg: pcomp: decompression final failed on "
-			       "test %d for %s: error=%d\n", i + 1, algo, res);
-			return res;
-		}
-		if (res > 0)
-			produced += res;
-
-		if (COMP_BUF_SIZE - req.avail_out != dtemplate[i].outlen) {
-			pr_err("alg: comp: Decompression test %d failed for "
-			       "%s: output len = %d (expected %d)\n", i + 1,
-			       algo, COMP_BUF_SIZE - req.avail_out,
-			       dtemplate[i].outlen);
-			return -EINVAL;
-		}
-
-		if (produced != dtemplate[i].outlen) {
-			pr_err("alg: comp: Decompression test %d failed for "
-			       "%s: returned len = %u (expected %d)\n", i + 1,
-			       algo, produced, dtemplate[i].outlen);
-			return -EINVAL;
-		}
-
-		if (memcmp(result, dtemplate[i].output, dtemplate[i].outlen)) {
-			pr_err("alg: pcomp: Decompression test %d failed for "
-			       "%s\n", i + 1, algo);
-			hexdump(result, dtemplate[i].outlen);
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-
-
 static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template,
 		      unsigned int tcount)
 {
@@ -1640,28 +1455,6 @@ static int alg_test_comp(const struct alg_test_desc *desc, const char *driver,
 	return err;
 }
 
-static int alg_test_pcomp(const struct alg_test_desc *desc, const char *driver,
-			  u32 type, u32 mask)
-{
-	struct crypto_pcomp *tfm;
-	int err;
-
-	tfm = crypto_alloc_pcomp(driver, type, mask);
-	if (IS_ERR(tfm)) {
-		pr_err("alg: pcomp: Failed to load transform for %s: %ld\n",
-		       driver, PTR_ERR(tfm));
-		return PTR_ERR(tfm);
-	}
-
-	err = test_pcomp(tfm, desc->suite.pcomp.comp.vecs,
-			 desc->suite.pcomp.decomp.vecs,
-			 desc->suite.pcomp.comp.count,
-			 desc->suite.pcomp.decomp.count);
-
-	crypto_free_pcomp(tfm);
-	return err;
-}
-
 static int alg_test_hash(const struct alg_test_desc *desc, const char *driver,
 			 u32 type, u32 mask)
 {
@@ -3839,22 +3632,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
-	}, {
-		.alg = "zlib",
-		.test = alg_test_pcomp,
-		.fips_allowed = 1,
-		.suite = {
-			.pcomp = {
-				.comp = {
-					.vecs = zlib_comp_tv_template,
-					.count = ZLIB_COMP_TEST_VECTORS
-				},
-				.decomp = {
-					.vecs = zlib_decomp_tv_template,
-					.count = ZLIB_DECOMP_TEST_VECTORS
-				}
-			}
-		}
 	}
 };
 
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index da0a8fd765f4..487ec880e889 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -25,9 +25,6 @@
 #define _CRYPTO_TESTMGR_H
 
 #include <linux/netlink.h>
-#include <linux/zlib.h>
-
-#include <crypto/compress.h>
 
 #define MAX_DIGEST_SIZE		64
 #define MAX_TAP			8
@@ -32268,14 +32265,6 @@ struct comp_testvec {
 	char output[COMP_BUF_SIZE];
 };
 
-struct pcomp_testvec {
-	const void *params;
-	unsigned int paramsize;
-	int inlen, outlen;
-	char input[COMP_BUF_SIZE];
-	char output[COMP_BUF_SIZE];
-};
-
 /*
  * Deflate test vectors (null-terminated strings).
  * Params: winbits=-11, Z_DEFAULT_COMPRESSION, MAX_MEM_LEVEL.
@@ -32356,139 +32345,6 @@ static struct comp_testvec deflate_decomp_tv_template[] = {
 	},
 };
 
-#define ZLIB_COMP_TEST_VECTORS 2
-#define ZLIB_DECOMP_TEST_VECTORS 2
-
-static const struct {
-	struct nlattr nla;
-	int val;
-} deflate_comp_params[] = {
-	{
-		.nla = {
-			.nla_len	= NLA_HDRLEN + sizeof(int),
-			.nla_type	= ZLIB_COMP_LEVEL,
-		},
-		.val			= Z_DEFAULT_COMPRESSION,
-	}, {
-		.nla = {
-			.nla_len	= NLA_HDRLEN + sizeof(int),
-			.nla_type	= ZLIB_COMP_METHOD,
-		},
-		.val			= Z_DEFLATED,
-	}, {
-		.nla = {
-			.nla_len	= NLA_HDRLEN + sizeof(int),
-			.nla_type	= ZLIB_COMP_WINDOWBITS,
-		},
-		.val			= -11,
-	}, {
-		.nla = {
-			.nla_len	= NLA_HDRLEN + sizeof(int),
-			.nla_type	= ZLIB_COMP_MEMLEVEL,
-		},
-		.val			= MAX_MEM_LEVEL,
-	}, {
-		.nla = {
-			.nla_len	= NLA_HDRLEN + sizeof(int),
-			.nla_type	= ZLIB_COMP_STRATEGY,
-		},
-		.val			= Z_DEFAULT_STRATEGY,
-	}
-};
-
-static const struct {
-	struct nlattr nla;
-	int val;
-} deflate_decomp_params[] = {
-	{
-		.nla = {
-			.nla_len	= NLA_HDRLEN + sizeof(int),
-			.nla_type	= ZLIB_DECOMP_WINDOWBITS,
-		},
-		.val			= -11,
-	}
-};
-
-static struct pcomp_testvec zlib_comp_tv_template[] = {
-	{
-		.params = &deflate_comp_params,
-		.paramsize = sizeof(deflate_comp_params),
-		.inlen	= 70,
-		.outlen	= 38,
-		.input	= "Join us now and share the software "
-			"Join us now and share the software ",
-		.output	= "\xf3\xca\xcf\xcc\x53\x28\x2d\x56"
-			  "\xc8\xcb\x2f\x57\x48\xcc\x4b\x51"
-			  "\x28\xce\x48\x2c\x4a\x55\x28\xc9"
-			  "\x48\x55\x28\xce\x4f\x2b\x29\x07"
-			  "\x71\xbc\x08\x2b\x01\x00",
-	}, {
-		.params = &deflate_comp_params,
-		.paramsize = sizeof(deflate_comp_params),
-		.inlen	= 191,
-		.outlen	= 122,
-		.input	= "This document describes a compression method based on the DEFLATE"
-			"compression algorithm.  This document defines the application of "
-			"the DEFLATE algorithm to the IP Payload Compression Protocol.",
-		.output	= "\x5d\x8d\x31\x0e\xc2\x30\x10\x04"
-			  "\xbf\xb2\x2f\xc8\x1f\x10\x04\x09"
-			  "\x89\xc2\x85\x3f\x70\xb1\x2f\xf8"
-			  "\x24\xdb\x67\xd9\x47\xc1\xef\x49"
-			  "\x68\x12\x51\xae\x76\x67\xd6\x27"
-			  "\x19\x88\x1a\xde\x85\xab\x21\xf2"
-			  "\x08\x5d\x16\x1e\x20\x04\x2d\xad"
-			  "\xf3\x18\xa2\x15\x85\x2d\x69\xc4"
-			  "\x42\x83\x23\xb6\x6c\x89\x71\x9b"
-			  "\xef\xcf\x8b\x9f\xcf\x33\xca\x2f"
-			  "\xed\x62\xa9\x4c\x80\xff\x13\xaf"
-			  "\x52\x37\xed\x0e\x52\x6b\x59\x02"
-			  "\xd9\x4e\xe8\x7a\x76\x1d\x02\x98"
-			  "\xfe\x8a\x87\x83\xa3\x4f\x56\x8a"
-			  "\xb8\x9e\x8e\x5c\x57\xd3\xa0\x79"
-			  "\xfa\x02",
-	},
-};
-
-static struct pcomp_testvec zlib_decomp_tv_template[] = {
-	{
-		.params = &deflate_decomp_params,
-		.paramsize = sizeof(deflate_decomp_params),
-		.inlen	= 122,
-		.outlen	= 191,
-		.input	= "\x5d\x8d\x31\x0e\xc2\x30\x10\x04"
-			  "\xbf\xb2\x2f\xc8\x1f\x10\x04\x09"
-			  "\x89\xc2\x85\x3f\x70\xb1\x2f\xf8"
-			  "\x24\xdb\x67\xd9\x47\xc1\xef\x49"
-			  "\x68\x12\x51\xae\x76\x67\xd6\x27"
-			  "\x19\x88\x1a\xde\x85\xab\x21\xf2"
-			  "\x08\x5d\x16\x1e\x20\x04\x2d\xad"
-			  "\xf3\x18\xa2\x15\x85\x2d\x69\xc4"
-			  "\x42\x83\x23\xb6\x6c\x89\x71\x9b"
-			  "\xef\xcf\x8b\x9f\xcf\x33\xca\x2f"
-			  "\xed\x62\xa9\x4c\x80\xff\x13\xaf"
-			  "\x52\x37\xed\x0e\x52\x6b\x59\x02"
-			  "\xd9\x4e\xe8\x7a\x76\x1d\x02\x98"
-			  "\xfe\x8a\x87\x83\xa3\x4f\x56\x8a"
-			  "\xb8\x9e\x8e\x5c\x57\xd3\xa0\x79"
-			  "\xfa\x02",
-		.output	= "This document describes a compression method based on the DEFLATE"
-			"compression algorithm.  This document defines the application of "
-			"the DEFLATE algorithm to the IP Payload Compression Protocol.",
-	}, {
-		.params = &deflate_decomp_params,
-		.paramsize = sizeof(deflate_decomp_params),
-		.inlen	= 38,
-		.outlen	= 70,
-		.input	= "\xf3\xca\xcf\xcc\x53\x28\x2d\x56"
-			  "\xc8\xcb\x2f\x57\x48\xcc\x4b\x51"
-			  "\x28\xce\x48\x2c\x4a\x55\x28\xc9"
-			  "\x48\x55\x28\xce\x4f\x2b\x29\x07"
-			  "\x71\xbc\x08\x2b\x01\x00",
-		.output	= "Join us now and share the software "
-			"Join us now and share the software ",
-	},
-};
-
 /*
  * LZO test vectors (null-terminated strings).
  */
diff --git a/crypto/zlib.c b/crypto/zlib.c
deleted file mode 100644
index d51a30a29e42..000000000000
--- a/crypto/zlib.c
+++ /dev/null
@@ -1,381 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Zlib algorithm
- *
- * Copyright 2008 Sony Corporation
- *
- * Based on deflate.c, which is
- * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * FIXME: deflate transforms will require up to a total of about 436k of kernel
- * memory on i386 (390k for compression, the rest for decompression), as the
- * current zlib kernel code uses a worst case pre-allocation system by default.
- * This needs to be fixed so that the amount of memory required is properly
- * related to the winbits and memlevel parameters.
- */
-
-#define pr_fmt(fmt)	"%s: " fmt, __func__
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/zlib.h>
-#include <linux/vmalloc.h>
-#include <linux/interrupt.h>
-#include <linux/mm.h>
-#include <linux/net.h>
-
-#include <crypto/internal/compress.h>
-
-#include <net/netlink.h>
-
-
-struct zlib_ctx {
-	struct z_stream_s comp_stream;
-	struct z_stream_s decomp_stream;
-	int decomp_windowBits;
-};
-
-
-static void zlib_comp_exit(struct zlib_ctx *ctx)
-{
-	struct z_stream_s *stream = &ctx->comp_stream;
-
-	if (stream->workspace) {
-		zlib_deflateEnd(stream);
-		vfree(stream->workspace);
-		stream->workspace = NULL;
-	}
-}
-
-static void zlib_decomp_exit(struct zlib_ctx *ctx)
-{
-	struct z_stream_s *stream = &ctx->decomp_stream;
-
-	if (stream->workspace) {
-		zlib_inflateEnd(stream);
-		vfree(stream->workspace);
-		stream->workspace = NULL;
-	}
-}
-
-static int zlib_init(struct crypto_tfm *tfm)
-{
-	return 0;
-}
-
-static void zlib_exit(struct crypto_tfm *tfm)
-{
-	struct zlib_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	zlib_comp_exit(ctx);
-	zlib_decomp_exit(ctx);
-}
-
-
-static int zlib_compress_setup(struct crypto_pcomp *tfm, const void *params,
-			       unsigned int len)
-{
-	struct zlib_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
-	struct z_stream_s *stream = &ctx->comp_stream;
-	struct nlattr *tb[ZLIB_COMP_MAX + 1];
-	int window_bits, mem_level;
-	size_t workspacesize;
-	int ret;
-
-	ret = nla_parse(tb, ZLIB_COMP_MAX, params, len, NULL);
-	if (ret)
-		return ret;
-
-	zlib_comp_exit(ctx);
-
-	window_bits = tb[ZLIB_COMP_WINDOWBITS]
-					? nla_get_u32(tb[ZLIB_COMP_WINDOWBITS])
-					: MAX_WBITS;
-	mem_level = tb[ZLIB_COMP_MEMLEVEL]
-					? nla_get_u32(tb[ZLIB_COMP_MEMLEVEL])
-					: DEF_MEM_LEVEL;
-
-	workspacesize = zlib_deflate_workspacesize(window_bits, mem_level);
-	stream->workspace = vzalloc(workspacesize);
-	if (!stream->workspace)
-		return -ENOMEM;
-
-	ret = zlib_deflateInit2(stream,
-				tb[ZLIB_COMP_LEVEL]
-					? nla_get_u32(tb[ZLIB_COMP_LEVEL])
-					: Z_DEFAULT_COMPRESSION,
-				tb[ZLIB_COMP_METHOD]
-					? nla_get_u32(tb[ZLIB_COMP_METHOD])
-					: Z_DEFLATED,
-				window_bits,
-				mem_level,
-				tb[ZLIB_COMP_STRATEGY]
-					? nla_get_u32(tb[ZLIB_COMP_STRATEGY])
-					: Z_DEFAULT_STRATEGY);
-	if (ret != Z_OK) {
-		vfree(stream->workspace);
-		stream->workspace = NULL;
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int zlib_compress_init(struct crypto_pcomp *tfm)
-{
-	int ret;
-	struct zlib_ctx *dctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
-	struct z_stream_s *stream = &dctx->comp_stream;
-
-	ret = zlib_deflateReset(stream);
-	if (ret != Z_OK)
-		return -EINVAL;
-
-	return 0;
-}
-
-static int zlib_compress_update(struct crypto_pcomp *tfm,
-				struct comp_request *req)
-{
-	int ret;
-	struct zlib_ctx *dctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
-	struct z_stream_s *stream = &dctx->comp_stream;
-
-	pr_debug("avail_in %u, avail_out %u\n", req->avail_in, req->avail_out);
-	stream->next_in = req->next_in;
-	stream->avail_in = req->avail_in;
-	stream->next_out = req->next_out;
-	stream->avail_out = req->avail_out;
-
-	ret = zlib_deflate(stream, Z_NO_FLUSH);
-	switch (ret) {
-	case Z_OK:
-		break;
-
-	case Z_BUF_ERROR:
-		pr_debug("zlib_deflate could not make progress\n");
-		return -EAGAIN;
-
-	default:
-		pr_debug("zlib_deflate failed %d\n", ret);
-		return -EINVAL;
-	}
-
-	ret = req->avail_out - stream->avail_out;
-	pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
-		 stream->avail_in, stream->avail_out,
-		 req->avail_in - stream->avail_in, ret);
-	req->next_in = stream->next_in;
-	req->avail_in = stream->avail_in;
-	req->next_out = stream->next_out;
-	req->avail_out = stream->avail_out;
-	return ret;
-}
-
-static int zlib_compress_final(struct crypto_pcomp *tfm,
-			       struct comp_request *req)
-{
-	int ret;
-	struct zlib_ctx *dctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
-	struct z_stream_s *stream = &dctx->comp_stream;
-
-	pr_debug("avail_in %u, avail_out %u\n", req->avail_in, req->avail_out);
-	stream->next_in = req->next_in;
-	stream->avail_in = req->avail_in;
-	stream->next_out = req->next_out;
-	stream->avail_out = req->avail_out;
-
-	ret = zlib_deflate(stream, Z_FINISH);
-	if (ret != Z_STREAM_END) {
-		pr_debug("zlib_deflate failed %d\n", ret);
-		return -EINVAL;
-	}
-
-	ret = req->avail_out - stream->avail_out;
-	pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
-		 stream->avail_in, stream->avail_out,
-		 req->avail_in - stream->avail_in, ret);
-	req->next_in = stream->next_in;
-	req->avail_in = stream->avail_in;
-	req->next_out = stream->next_out;
-	req->avail_out = stream->avail_out;
-	return ret;
-}
-
-
-static int zlib_decompress_setup(struct crypto_pcomp *tfm, const void *params,
-				 unsigned int len)
-{
-	struct zlib_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
-	struct z_stream_s *stream = &ctx->decomp_stream;
-	struct nlattr *tb[ZLIB_DECOMP_MAX + 1];
-	int ret = 0;
-
-	ret = nla_parse(tb, ZLIB_DECOMP_MAX, params, len, NULL);
-	if (ret)
-		return ret;
-
-	zlib_decomp_exit(ctx);
-
-	ctx->decomp_windowBits = tb[ZLIB_DECOMP_WINDOWBITS]
-				 ? nla_get_u32(tb[ZLIB_DECOMP_WINDOWBITS])
-				 : DEF_WBITS;
-
-	stream->workspace = vzalloc(zlib_inflate_workspacesize());
-	if (!stream->workspace)
-		return -ENOMEM;
-
-	ret = zlib_inflateInit2(stream, ctx->decomp_windowBits);
-	if (ret != Z_OK) {
-		vfree(stream->workspace);
-		stream->workspace = NULL;
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int zlib_decompress_init(struct crypto_pcomp *tfm)
-{
-	int ret;
-	struct zlib_ctx *dctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
-	struct z_stream_s *stream = &dctx->decomp_stream;
-
-	ret = zlib_inflateReset(stream);
-	if (ret != Z_OK)
-		return -EINVAL;
-
-	return 0;
-}
-
-static int zlib_decompress_update(struct crypto_pcomp *tfm,
-				  struct comp_request *req)
-{
-	int ret;
-	struct zlib_ctx *dctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
-	struct z_stream_s *stream = &dctx->decomp_stream;
-
-	pr_debug("avail_in %u, avail_out %u\n", req->avail_in, req->avail_out);
-	stream->next_in = req->next_in;
-	stream->avail_in = req->avail_in;
-	stream->next_out = req->next_out;
-	stream->avail_out = req->avail_out;
-
-	ret = zlib_inflate(stream, Z_SYNC_FLUSH);
-	switch (ret) {
-	case Z_OK:
-	case Z_STREAM_END:
-		break;
-
-	case Z_BUF_ERROR:
-		pr_debug("zlib_inflate could not make progress\n");
-		return -EAGAIN;
-
-	default:
-		pr_debug("zlib_inflate failed %d\n", ret);
-		return -EINVAL;
-	}
-
-	ret = req->avail_out - stream->avail_out;
-	pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
-		 stream->avail_in, stream->avail_out,
-		 req->avail_in - stream->avail_in, ret);
-	req->next_in = stream->next_in;
-	req->avail_in = stream->avail_in;
-	req->next_out = stream->next_out;
-	req->avail_out = stream->avail_out;
-	return ret;
-}
-
-static int zlib_decompress_final(struct crypto_pcomp *tfm,
-				 struct comp_request *req)
-{
-	int ret;
-	struct zlib_ctx *dctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
-	struct z_stream_s *stream = &dctx->decomp_stream;
-
-	pr_debug("avail_in %u, avail_out %u\n", req->avail_in, req->avail_out);
-	stream->next_in = req->next_in;
-	stream->avail_in = req->avail_in;
-	stream->next_out = req->next_out;
-	stream->avail_out = req->avail_out;
-
-	if (dctx->decomp_windowBits < 0) {
-		ret = zlib_inflate(stream, Z_SYNC_FLUSH);
-		/*
-		 * Work around a bug in zlib, which sometimes wants to taste an
-		 * extra byte when being used in the (undocumented) raw deflate
-		 * mode. (From USAGI).
-		 */
-		if (ret == Z_OK && !stream->avail_in && stream->avail_out) {
-			const void *saved_next_in = stream->next_in;
-			u8 zerostuff = 0;
-
-			stream->next_in = &zerostuff;
-			stream->avail_in = 1;
-			ret = zlib_inflate(stream, Z_FINISH);
-			stream->next_in = saved_next_in;
-			stream->avail_in = 0;
-		}
-	} else
-		ret = zlib_inflate(stream, Z_FINISH);
-	if (ret != Z_STREAM_END) {
-		pr_debug("zlib_inflate failed %d\n", ret);
-		return -EINVAL;
-	}
-
-	ret = req->avail_out - stream->avail_out;
-	pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
-		 stream->avail_in, stream->avail_out,
-		 req->avail_in - stream->avail_in, ret);
-	req->next_in = stream->next_in;
-	req->avail_in = stream->avail_in;
-	req->next_out = stream->next_out;
-	req->avail_out = stream->avail_out;
-	return ret;
-}
-
-
-static struct pcomp_alg zlib_alg = {
-	.compress_setup		= zlib_compress_setup,
-	.compress_init		= zlib_compress_init,
-	.compress_update	= zlib_compress_update,
-	.compress_final		= zlib_compress_final,
-	.decompress_setup	= zlib_decompress_setup,
-	.decompress_init	= zlib_decompress_init,
-	.decompress_update	= zlib_decompress_update,
-	.decompress_final	= zlib_decompress_final,
-
-	.base			= {
-		.cra_name	= "zlib",
-		.cra_flags	= CRYPTO_ALG_TYPE_PCOMPRESS,
-		.cra_ctxsize	= sizeof(struct zlib_ctx),
-		.cra_module	= THIS_MODULE,
-		.cra_init	= zlib_init,
-		.cra_exit	= zlib_exit,
-	}
-};
-
-static int __init zlib_mod_init(void)
-{
-	return crypto_register_pcomp(&zlib_alg);
-}
-
-static void __exit zlib_mod_fini(void)
-{
-	crypto_unregister_pcomp(&zlib_alg);
-}
-
-module_init(zlib_mod_init);
-module_exit(zlib_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Zlib Compression Algorithm");
-MODULE_AUTHOR("Sony Corporation");
-MODULE_ALIAS_CRYPTO("zlib");
diff --git a/include/crypto/compress.h b/include/crypto/compress.h
deleted file mode 100644
index 5b67af834d83..000000000000
--- a/include/crypto/compress.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Compress: Compression algorithms under the cryptographic API.
- *
- * Copyright 2008 Sony Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.
- * If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _CRYPTO_COMPRESS_H
-#define _CRYPTO_COMPRESS_H
-
-#include <linux/crypto.h>
-
-
-struct comp_request {
-	const void *next_in;		/* next input byte */
-	void *next_out;			/* next output byte */
-	unsigned int avail_in;		/* bytes available at next_in */
-	unsigned int avail_out;		/* bytes available at next_out */
-};
-
-enum zlib_comp_params {
-	ZLIB_COMP_LEVEL = 1,		/* e.g. Z_DEFAULT_COMPRESSION */
-	ZLIB_COMP_METHOD,		/* e.g. Z_DEFLATED */
-	ZLIB_COMP_WINDOWBITS,		/* e.g. MAX_WBITS */
-	ZLIB_COMP_MEMLEVEL,		/* e.g. DEF_MEM_LEVEL */
-	ZLIB_COMP_STRATEGY,		/* e.g. Z_DEFAULT_STRATEGY */
-	__ZLIB_COMP_MAX,
-};
-
-#define ZLIB_COMP_MAX	(__ZLIB_COMP_MAX - 1)
-
-
-enum zlib_decomp_params {
-	ZLIB_DECOMP_WINDOWBITS = 1,	/* e.g. DEF_WBITS */
-	__ZLIB_DECOMP_MAX,
-};
-
-#define ZLIB_DECOMP_MAX	(__ZLIB_DECOMP_MAX - 1)
-
-
-struct crypto_pcomp {
-	struct crypto_tfm base;
-};
-
-struct pcomp_alg {
-	int (*compress_setup)(struct crypto_pcomp *tfm, const void *params,
-			      unsigned int len);
-	int (*compress_init)(struct crypto_pcomp *tfm);
-	int (*compress_update)(struct crypto_pcomp *tfm,
-			       struct comp_request *req);
-	int (*compress_final)(struct crypto_pcomp *tfm,
-			      struct comp_request *req);
-	int (*decompress_setup)(struct crypto_pcomp *tfm, const void *params,
-				unsigned int len);
-	int (*decompress_init)(struct crypto_pcomp *tfm);
-	int (*decompress_update)(struct crypto_pcomp *tfm,
-				 struct comp_request *req);
-	int (*decompress_final)(struct crypto_pcomp *tfm,
-				struct comp_request *req);
-
-	struct crypto_alg base;
-};
-
-extern struct crypto_pcomp *crypto_alloc_pcomp(const char *alg_name, u32 type,
-					       u32 mask);
-
-static inline struct crypto_tfm *crypto_pcomp_tfm(struct crypto_pcomp *tfm)
-{
-	return &tfm->base;
-}
-
-static inline void crypto_free_pcomp(struct crypto_pcomp *tfm)
-{
-	crypto_destroy_tfm(tfm, crypto_pcomp_tfm(tfm));
-}
-
-static inline struct pcomp_alg *__crypto_pcomp_alg(struct crypto_alg *alg)
-{
-	return container_of(alg, struct pcomp_alg, base);
-}
-
-static inline struct pcomp_alg *crypto_pcomp_alg(struct crypto_pcomp *tfm)
-{
-	return __crypto_pcomp_alg(crypto_pcomp_tfm(tfm)->__crt_alg);
-}
-
-static inline int crypto_compress_setup(struct crypto_pcomp *tfm,
-					const void *params, unsigned int len)
-{
-	return crypto_pcomp_alg(tfm)->compress_setup(tfm, params, len);
-}
-
-static inline int crypto_compress_init(struct crypto_pcomp *tfm)
-{
-	return crypto_pcomp_alg(tfm)->compress_init(tfm);
-}
-
-static inline int crypto_compress_update(struct crypto_pcomp *tfm,
-					 struct comp_request *req)
-{
-	return crypto_pcomp_alg(tfm)->compress_update(tfm, req);
-}
-
-static inline int crypto_compress_final(struct crypto_pcomp *tfm,
-					struct comp_request *req)
-{
-	return crypto_pcomp_alg(tfm)->compress_final(tfm, req);
-}
-
-static inline int crypto_decompress_setup(struct crypto_pcomp *tfm,
-					  const void *params, unsigned int len)
-{
-	return crypto_pcomp_alg(tfm)->decompress_setup(tfm, params, len);
-}
-
-static inline int crypto_decompress_init(struct crypto_pcomp *tfm)
-{
-	return crypto_pcomp_alg(tfm)->decompress_init(tfm);
-}
-
-static inline int crypto_decompress_update(struct crypto_pcomp *tfm,
-					   struct comp_request *req)
-{
-	return crypto_pcomp_alg(tfm)->decompress_update(tfm, req);
-}
-
-static inline int crypto_decompress_final(struct crypto_pcomp *tfm,
-					  struct comp_request *req)
-{
-	return crypto_pcomp_alg(tfm)->decompress_final(tfm, req);
-}
-
-#endif	/* _CRYPTO_COMPRESS_H */
diff --git a/include/crypto/internal/compress.h b/include/crypto/internal/compress.h
deleted file mode 100644
index 178a888d1d93..000000000000
--- a/include/crypto/internal/compress.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Compress: Compression algorithms under the cryptographic API.
- *
- * Copyright 2008 Sony Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.
- * If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _CRYPTO_INTERNAL_COMPRESS_H
-#define _CRYPTO_INTERNAL_COMPRESS_H
-
-#include <crypto/compress.h>
-
-extern int crypto_register_pcomp(struct pcomp_alg *alg);
-extern int crypto_unregister_pcomp(struct pcomp_alg *alg);
-
-#endif	/* _CRYPTO_INTERNAL_COMPRESS_H */
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index e71cb70a1ac2..ab2a745d85f3 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -54,7 +54,6 @@
 #define CRYPTO_ALG_TYPE_AHASH		0x0000000a
 #define CRYPTO_ALG_TYPE_RNG		0x0000000c
 #define CRYPTO_ALG_TYPE_AKCIPHER	0x0000000d
-#define CRYPTO_ALG_TYPE_PCOMPRESS	0x0000000f
 
 #define CRYPTO_ALG_TYPE_HASH_MASK	0x0000000e
 #define CRYPTO_ALG_TYPE_AHASH_MASK	0x0000000c
-- 
cgit v1.2.3


From c8b710b3e4348119924051551b836c94835331b1 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 21:13:46 -0800
Subject: tty: Fix ldisc leak in failed tty_init_dev()

release_tty() leaks the ldisc instance when called directly (rather
than when releasing the file descriptor from tty_release()).

Since tty_ldisc_release() clears tty->ldisc, releasing the ldisc
instance at tty teardown if tty->ldisc is non-null is not in danger
of double-releasing the ldisc.

Remove deinitialize_tty_struct() now that free_tty_struct() always
performs the tty_ldisc_deinit().

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/pty.c       |  5 ++---
 drivers/tty/tty_io.c    | 20 +++-----------------
 drivers/tty/tty_ldisc.c |  5 +++--
 include/linux/tty.h     |  1 -
 4 files changed, 8 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index b3110040164a..8cbe802bff1d 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -408,7 +408,7 @@ static int pty_common_install(struct tty_driver *driver, struct tty_struct *tty,
 		   the easy way .. */
 		retval = tty_init_termios(tty);
 		if (retval)
-			goto err_deinit_tty;
+			goto err_free_tty;
 
 		retval = tty_init_termios(o_tty);
 		if (retval)
@@ -447,8 +447,7 @@ static int pty_common_install(struct tty_driver *driver, struct tty_struct *tty,
 err_free_termios:
 	if (legacy)
 		tty_free_termios(tty);
-err_deinit_tty:
-	deinitialize_tty_struct(o_tty);
+err_free_tty:
 	free_tty_struct(o_tty);
 err_put_module:
 	module_put(driver->other->owner);
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 5cec01c75691..c9f2365167df 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -172,6 +172,7 @@ void free_tty_struct(struct tty_struct *tty)
 {
 	if (!tty)
 		return;
+	tty_ldisc_deinit(tty);
 	put_device(tty->dev);
 	kfree(tty->write_buf);
 	tty->magic = 0xDEADDEAD;
@@ -1529,7 +1530,7 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx)
 	tty_lock(tty);
 	retval = tty_driver_install_tty(driver, tty);
 	if (retval < 0)
-		goto err_deinit_tty;
+		goto err_free_tty;
 
 	if (!tty->port)
 		tty->port = driver->ports[idx];
@@ -1551,9 +1552,8 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx)
 	/* Return the tty locked so that it cannot vanish under the caller */
 	return tty;
 
-err_deinit_tty:
+err_free_tty:
 	tty_unlock(tty);
-	deinitialize_tty_struct(tty);
 	free_tty_struct(tty);
 err_module_put:
 	module_put(driver->owner);
@@ -3162,20 +3162,6 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx)
 	return tty;
 }
 
-/**
- *	deinitialize_tty_struct
- *	@tty: tty to deinitialize
- *
- *	This subroutine deinitializes a tty structure that has been newly
- *	allocated but tty_release cannot be called on that yet.
- *
- *	Locking: none - tty in question must not be exposed at this point
- */
-void deinitialize_tty_struct(struct tty_struct *tty)
-{
-	tty_ldisc_deinit(tty);
-}
-
 /**
  *	tty_put_char	-	write one character to a tty
  *	@tty: tty
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index a054d03c22e7..49f0cea1e538 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -797,7 +797,7 @@ void tty_ldisc_init(struct tty_struct *tty)
 }
 
 /**
- *	tty_ldisc_init		-	ldisc cleanup for new tty
+ *	tty_ldisc_deinit	-	ldisc cleanup for new tty
  *	@tty: tty that was allocated recently
  *
  *	The tty structure must not becompletely set up (tty_ldisc_setup) when
@@ -805,7 +805,8 @@ void tty_ldisc_init(struct tty_struct *tty)
  */
 void tty_ldisc_deinit(struct tty_struct *tty)
 {
-	tty_ldisc_put(tty->ldisc);
+	if (tty->ldisc)
+		tty_ldisc_put(tty->ldisc);
 	tty->ldisc = NULL;
 }
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d9fb4b043f56..64e301dbf7b9 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -509,7 +509,6 @@ extern int tty_alloc_file(struct file *file);
 extern void tty_add_file(struct tty_struct *tty, struct file *file);
 extern void tty_free_file(struct file *file);
 extern void free_tty_struct(struct tty_struct *tty);
-extern void deinitialize_tty_struct(struct tty_struct *tty);
 extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx);
 extern int tty_release(struct inode *inode, struct file *filp);
 extern int tty_init_termios(struct tty_struct *tty);
-- 
cgit v1.2.3


From a3123fd0a4a5f9d71afa0ffa82e2086281d81822 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 21:13:48 -0800
Subject: tty: Fix tty_init_termios() declaration

tty_init_termios() never returns an error; re-declare as void. Remove
unnecessary error handling from callers. Remove extern declarations
of tty_free_termios() and free_tty_struct() and re-declare in file
scope.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Acked-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/pty.c            | 15 +++------------
 drivers/tty/tty_io.c         | 13 ++++---------
 drivers/usb/serial/console.c |  6 +-----
 include/linux/tty.h          |  4 +---
 4 files changed, 9 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 8cbe802bff1d..7e885a226f88 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -406,13 +406,8 @@ static int pty_common_install(struct tty_driver *driver, struct tty_struct *tty,
 	if (legacy) {
 		/* We always use new tty termios data so we can do this
 		   the easy way .. */
-		retval = tty_init_termios(tty);
-		if (retval)
-			goto err_free_tty;
-
-		retval = tty_init_termios(o_tty);
-		if (retval)
-			goto err_free_termios;
+		tty_init_termios(tty);
+		tty_init_termios(o_tty);
 
 		driver->other->ttys[idx] = o_tty;
 		driver->ttys[idx] = tty;
@@ -444,11 +439,7 @@ static int pty_common_install(struct tty_driver *driver, struct tty_struct *tty,
 	tty->count++;
 	o_tty->count++;
 	return 0;
-err_free_termios:
-	if (legacy)
-		tty_free_termios(tty);
-err_free_tty:
-	free_tty_struct(o_tty);
+
 err_put_module:
 	module_put(driver->other->owner);
 err:
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index a59930e59660..742860e583ce 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -168,7 +168,7 @@ static void release_tty(struct tty_struct *tty, int idx);
  *	Locking: none. Must be called after tty is definitely unused
  */
 
-void free_tty_struct(struct tty_struct *tty)
+static void free_tty_struct(struct tty_struct *tty)
 {
 	tty_ldisc_deinit(tty);
 	put_device(tty->dev);
@@ -1377,7 +1377,7 @@ static struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver,
  *	the tty_mutex currently so we can be relaxed about ordering.
  */
 
-int tty_init_termios(struct tty_struct *tty)
+void tty_init_termios(struct tty_struct *tty)
 {
 	struct ktermios *tp;
 	int idx = tty->index;
@@ -1395,16 +1395,12 @@ int tty_init_termios(struct tty_struct *tty)
 	/* Compatibility until drivers always set this */
 	tty->termios.c_ispeed = tty_termios_input_baud_rate(&tty->termios);
 	tty->termios.c_ospeed = tty_termios_baud_rate(&tty->termios);
-	return 0;
 }
 EXPORT_SYMBOL_GPL(tty_init_termios);
 
 int tty_standard_install(struct tty_driver *driver, struct tty_struct *tty)
 {
-	int ret = tty_init_termios(tty);
-	if (ret)
-		return ret;
-
+	tty_init_termios(tty);
 	tty_driver_kref_get(driver);
 	tty->count++;
 	driver->ttys[tty->index] = tty;
@@ -1566,7 +1562,7 @@ err_release_tty:
 	return ERR_PTR(retval);
 }
 
-void tty_free_termios(struct tty_struct *tty)
+static void tty_free_termios(struct tty_struct *tty)
 {
 	struct ktermios *tp;
 	int idx = tty->index;
@@ -1585,7 +1581,6 @@ void tty_free_termios(struct tty_struct *tty)
 	}
 	*tp = tty->termios;
 }
-EXPORT_SYMBOL(tty_free_termios);
 
 /**
  *	tty_flush_works		-	flush all works of a tty/pty pair
diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index 3806e7014199..a66b01bb1fa1 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -147,10 +147,7 @@ static int usb_console_setup(struct console *co, char *options)
 			kref_get(&tty->driver->kref);
 			__module_get(tty->driver->owner);
 			tty->ops = &usb_console_fake_tty_ops;
-			if (tty_init_termios(tty)) {
-				retval = -ENOMEM;
-				goto put_tty;
-			}
+			tty_init_termios(tty);
 			tty_port_tty_set(&port->port, tty);
 		}
 
@@ -185,7 +182,6 @@ static int usb_console_setup(struct console *co, char *options)
 
  fail:
 	tty_port_tty_set(&port->port, NULL);
- put_tty:
 	tty_kref_put(tty);
  reset_open_count:
 	port->port.count = 0;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 64e301dbf7b9..d7a7c1efe77f 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -448,7 +448,6 @@ extern int tty_unthrottle_safe(struct tty_struct *tty);
 extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws);
 extern void tty_driver_remove_tty(struct tty_driver *driver,
 				  struct tty_struct *tty);
-extern void tty_free_termios(struct tty_struct *tty);
 extern int is_current_pgrp_orphaned(void);
 extern int is_ignored(int sig);
 extern int tty_signal(int sig, struct tty_struct *tty);
@@ -508,10 +507,9 @@ extern struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx);
 extern int tty_alloc_file(struct file *file);
 extern void tty_add_file(struct tty_struct *tty, struct file *file);
 extern void tty_free_file(struct file *file);
-extern void free_tty_struct(struct tty_struct *tty);
 extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx);
 extern int tty_release(struct inode *inode, struct file *filp);
-extern int tty_init_termios(struct tty_struct *tty);
+extern void tty_init_termios(struct tty_struct *tty);
 extern int tty_standard_install(struct tty_driver *driver,
 		struct tty_struct *tty);
 
-- 
cgit v1.2.3


From 05de87ed9531dc19d87136c9204d251abebc60d3 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 21:13:49 -0800
Subject: tty: Re-declare tty_driver_remove_tty() file scope

tty_driver_remove_tty() is only local-scope; declare as static.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c | 2 +-
 include/linux/tty.h  | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 742860e583ce..6cffe0d57d70 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1437,7 +1437,7 @@ static int tty_driver_install_tty(struct tty_driver *driver,
  *
  *	Locking: tty_mutex for now
  */
-void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct *tty)
+static void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct *tty)
 {
 	if (driver->ops->remove)
 		driver->ops->remove(driver, tty);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d7a7c1efe77f..57c4e03ec2aa 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -446,8 +446,6 @@ extern void tty_unthrottle(struct tty_struct *tty);
 extern int tty_throttle_safe(struct tty_struct *tty);
 extern int tty_unthrottle_safe(struct tty_struct *tty);
 extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws);
-extern void tty_driver_remove_tty(struct tty_driver *driver,
-				  struct tty_struct *tty);
 extern int is_current_pgrp_orphaned(void);
 extern int is_ignored(int sig);
 extern int tty_signal(int sig, struct tty_struct *tty);
-- 
cgit v1.2.3


From c2bb524b2e1a6eddae65139601bee24cb60856a0 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 21:13:51 -0800
Subject: tty: Remove __lockfunc annotation from tty lock functions

The tty lock/unlock code does not belong in the special lockfunc section
which is treated specially by stack backtraces.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_ldisc.c | 16 +++++++---------
 drivers/tty/tty_mutex.c |  8 ++++----
 include/linux/tty.h     |  8 ++++----
 3 files changed, 15 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 49f0cea1e538..713cc2d48846 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -304,13 +304,13 @@ void tty_ldisc_deref(struct tty_ldisc *ld)
 EXPORT_SYMBOL_GPL(tty_ldisc_deref);
 
 
-static inline int __lockfunc
+static inline int
 __tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout)
 {
 	return ldsem_down_write(&tty->ldisc_sem, timeout);
 }
 
-static inline int __lockfunc
+static inline int
 __tty_ldisc_lock_nested(struct tty_struct *tty, unsigned long timeout)
 {
 	return ldsem_down_write_nested(&tty->ldisc_sem,
@@ -322,8 +322,7 @@ static inline void __tty_ldisc_unlock(struct tty_struct *tty)
 	ldsem_up_write(&tty->ldisc_sem);
 }
 
-static int __lockfunc
-tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout)
+static int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout)
 {
 	int ret;
 
@@ -340,7 +339,7 @@ static void tty_ldisc_unlock(struct tty_struct *tty)
 	__tty_ldisc_unlock(tty);
 }
 
-static int __lockfunc
+static int
 tty_ldisc_lock_pair_timeout(struct tty_struct *tty, struct tty_struct *tty2,
 			    unsigned long timeout)
 {
@@ -376,14 +375,13 @@ tty_ldisc_lock_pair_timeout(struct tty_struct *tty, struct tty_struct *tty2,
 	return 0;
 }
 
-static void __lockfunc
-tty_ldisc_lock_pair(struct tty_struct *tty, struct tty_struct *tty2)
+static void tty_ldisc_lock_pair(struct tty_struct *tty, struct tty_struct *tty2)
 {
 	tty_ldisc_lock_pair_timeout(tty, tty2, MAX_SCHEDULE_TIMEOUT);
 }
 
-static void __lockfunc tty_ldisc_unlock_pair(struct tty_struct *tty,
-					     struct tty_struct *tty2)
+static void tty_ldisc_unlock_pair(struct tty_struct *tty,
+				  struct tty_struct *tty2)
 {
 	__tty_ldisc_unlock(tty);
 	if (tty2)
diff --git a/drivers/tty/tty_mutex.c b/drivers/tty/tty_mutex.c
index d2f3c4cd697f..75351e4b77df 100644
--- a/drivers/tty/tty_mutex.c
+++ b/drivers/tty/tty_mutex.c
@@ -10,7 +10,7 @@
  * Getting the big tty mutex.
  */
 
-void __lockfunc tty_lock(struct tty_struct *tty)
+void tty_lock(struct tty_struct *tty)
 {
 	if (WARN(tty->magic != TTY_MAGIC, "L Bad %p\n", tty))
 		return;
@@ -27,7 +27,7 @@ int tty_lock_interruptible(struct tty_struct *tty)
 	return mutex_lock_interruptible(&tty->legacy_mutex);
 }
 
-void __lockfunc tty_unlock(struct tty_struct *tty)
+void tty_unlock(struct tty_struct *tty)
 {
 	if (WARN(tty->magic != TTY_MAGIC, "U Bad %p\n", tty))
 		return;
@@ -36,13 +36,13 @@ void __lockfunc tty_unlock(struct tty_struct *tty)
 }
 EXPORT_SYMBOL(tty_unlock);
 
-void __lockfunc tty_lock_slave(struct tty_struct *tty)
+void tty_lock_slave(struct tty_struct *tty)
 {
 	if (tty && tty != tty->link)
 		tty_lock(tty);
 }
 
-void __lockfunc tty_unlock_slave(struct tty_struct *tty)
+void tty_unlock_slave(struct tty_struct *tty)
 {
 	if (tty && tty != tty->link)
 		tty_unlock(tty);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 57c4e03ec2aa..83f127673bc9 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -643,11 +643,11 @@ extern long vt_compat_ioctl(struct tty_struct *tty,
 
 /* tty_mutex.c */
 /* functions for preparation of BKL removal */
-extern void __lockfunc tty_lock(struct tty_struct *tty);
+extern void tty_lock(struct tty_struct *tty);
 extern int  tty_lock_interruptible(struct tty_struct *tty);
-extern void __lockfunc tty_unlock(struct tty_struct *tty);
-extern void __lockfunc tty_lock_slave(struct tty_struct *tty);
-extern void __lockfunc tty_unlock_slave(struct tty_struct *tty);
+extern void tty_unlock(struct tty_struct *tty);
+extern void tty_lock_slave(struct tty_struct *tty);
+extern void tty_unlock_slave(struct tty_struct *tty);
 extern void tty_set_lock_subclass(struct tty_struct *tty);
 
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.3


From 707b61bba8b16adb6a2cbc72b71a75e09cb0f81a Mon Sep 17 00:00:00 2001
From: Oreste Salerno <oreste.salerno@tomtom.com>
Date: Wed, 27 Jan 2016 13:55:43 -0800
Subject: Input: cyttsp - switch to using device properties

Drop support for platform data passed via a C-structure and switch to
device properties instead, which should make the driver compatible
with all platforms: OF, ACPI and static boards. Static boards should
use property sets to communicate device parameters to the driver.

Signed-off-by: Oreste Salerno <oreste.salerno@tomtom.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 .../bindings/input/touchscreen/cyttsp.txt          |  95 ++++++++++++++
 drivers/input/touchscreen/cyttsp_core.c            | 136 ++++++++++++++-------
 drivers/input/touchscreen/cyttsp_core.h            |   9 +-
 include/linux/input/cyttsp.h                       |  15 ---
 4 files changed, 197 insertions(+), 58 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/input/touchscreen/cyttsp.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/input/touchscreen/cyttsp.txt b/Documentation/devicetree/bindings/input/touchscreen/cyttsp.txt
new file mode 100644
index 000000000000..b75d4cfd2c36
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/cyttsp.txt
@@ -0,0 +1,95 @@
+* Cypress cyttsp touchscreen controller
+
+Required properties:
+ - compatible		: must be "cypress,cyttsp-i2c" or "cypress,cyttsp-spi"
+ - reg			: Device I2C address or SPI chip select number
+ - spi-max-frequency	: Maximum SPI clocking speed of the device (for cyttsp-spi)
+ - interrupt-parent	: the phandle for the gpio controller
+			  (see interrupt binding[0]).
+ - interrupts		: (gpio) interrupt to which the chip is connected
+			  (see interrupt binding[0]).
+ - bootloader-key	: the 8-byte bootloader key that is required to switch
+			  the chip from bootloader mode (default mode) to
+			  application mode.
+			  This property has to be specified as an array of 8
+			  '/bits/ 8' values.
+
+Optional properties:
+ - reset-gpios		: the reset gpio the chip is connected to
+			  (see GPIO binding[1] for more details).
+ - touchscreen-size-x	: horizontal resolution of touchscreen (in pixels)
+ - touchscreen-size-y	: vertical resolution of touchscreen (in pixels)
+ - touchscreen-fuzz-x	: horizontal noise value of the absolute input device
+			  (in pixels)
+ - touchscreen-fuzz-y	: vertical noise value of the absolute input device
+			  (in pixels)
+ - active-distance	: the distance in pixels beyond which a touch must move
+			  before movement is detected and reported by the device.
+			  Valid values: 0-15.
+ - active-interval-ms	: the minimum period in ms between consecutive
+			  scanning/processing cycles when the chip is in active mode.
+			  Valid values: 0-255.
+ - lowpower-interval-ms	: the minimum period in ms between consecutive
+			  scanning/processing cycles when the chip is in low-power mode.
+			  Valid values: 0-2550
+ - touch-timeout-ms	: minimum time in ms spent in the active power state while no
+			  touches are detected before entering low-power mode.
+			  Valid values: 0-2550
+ - use-handshake	: enable register-based handshake (boolean). This should
+			  only be used if the chip is configured to use 'blocking
+			  communication with timeout' (in this case the device
+			  generates an interrupt at the end of every
+			  scanning/processing cycle).
+
+[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+[1]: Documentation/devicetree/bindings/gpio/gpio.txt
+
+Example:
+	&i2c1 {
+		/* ... */
+		cyttsp@a {
+			compatible = "cypress,cyttsp-i2c";
+			reg = <0xa>;
+			interrupt-parent = <&gpio0>;
+			interrupts = <28 0>;
+			reset-gpios = <&gpio3 4 GPIO_ACTIVE_LOW>;
+
+			touchscreen-size-x = <800>;
+			touchscreen-size-y = <480>;
+			touchscreen-fuzz-x = <4>;
+			touchscreen-fuzz-y = <7>;
+
+			bootloader-key = /bits/ 8 <0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08>;
+			active-distance = <8>;
+			active-interval-ms = <0>;
+			lowpower-interval-ms = <200>;
+			touch-timeout-ms = <100>;
+		};
+
+		/* ... */
+	};
+
+	&mcspi1 {
+		/* ... */
+		cyttsp@0 {
+			compatible = "cypress,cyttsp-spi";
+			spi-max-frequency = <6000000>;
+			reg = <0>;
+			interrupt-parent = <&gpio0>;
+			interrupts = <28 0>;
+			reset-gpios = <&gpio3 4 GPIO_ACTIVE_LOW>;
+
+			touchscreen-size-x = <800>;
+			touchscreen-size-y = <480>;
+			touchscreen-fuzz-x = <4>;
+			touchscreen-fuzz-y = <7>;
+
+			bootloader-key = /bits/ 8 <0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08>;
+			active-distance = <8>;
+			active-interval-ms = <0>;
+			lowpower-interval-ms = <200>;
+			touch-timeout-ms = <100>;
+		};
+
+		/* ... */
+	};
diff --git a/drivers/input/touchscreen/cyttsp_core.c b/drivers/input/touchscreen/cyttsp_core.c
index b6653aa06108..8814b6c87097 100644
--- a/drivers/input/touchscreen/cyttsp_core.c
+++ b/drivers/input/touchscreen/cyttsp_core.c
@@ -30,9 +30,12 @@
 #include <linux/delay.h>
 #include <linux/input.h>
 #include <linux/input/mt.h>
+#include <linux/input/touchscreen.h>
 #include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
+#include <linux/property.h>
+#include <linux/gpio/consumer.h>
 
 #include "cyttsp_core.h"
 
@@ -57,6 +60,7 @@
 #define CY_DELAY_DFLT			20 /* ms */
 #define CY_DELAY_MAX			500
 #define CY_ACT_DIST_DFLT		0xF8
+#define CY_ACT_DIST_MASK		0x0F
 #define CY_HNDSHK_BIT			0x80
 /* device mode bits */
 #define CY_OPERATE_MODE			0x00
@@ -120,7 +124,7 @@ static int ttsp_send_command(struct cyttsp *ts, u8 cmd)
 
 static int cyttsp_handshake(struct cyttsp *ts)
 {
-	if (ts->pdata->use_hndshk)
+	if (ts->use_hndshk)
 		return ttsp_send_command(ts,
 				ts->xy_data.hst_mode ^ CY_HNDSHK_BIT);
 
@@ -142,9 +146,9 @@ static int cyttsp_exit_bl_mode(struct cyttsp *ts)
 	u8 bl_cmd[sizeof(bl_command)];
 
 	memcpy(bl_cmd, bl_command, sizeof(bl_command));
-	if (ts->pdata->bl_keys)
+	if (ts->bl_keys)
 		memcpy(&bl_cmd[sizeof(bl_command) - CY_NUM_BL_KEYS],
-			ts->pdata->bl_keys, CY_NUM_BL_KEYS);
+			ts->bl_keys, CY_NUM_BL_KEYS);
 
 	error = ttsp_write_block_data(ts, CY_REG_BASE,
 				      sizeof(bl_cmd), bl_cmd);
@@ -217,14 +221,14 @@ static int cyttsp_set_sysinfo_regs(struct cyttsp *ts)
 {
 	int retval = 0;
 
-	if (ts->pdata->act_intrvl != CY_ACT_INTRVL_DFLT ||
-	    ts->pdata->tch_tmout != CY_TCH_TMOUT_DFLT ||
-	    ts->pdata->lp_intrvl != CY_LP_INTRVL_DFLT) {
+	if (ts->act_intrvl != CY_ACT_INTRVL_DFLT ||
+	    ts->tch_tmout != CY_TCH_TMOUT_DFLT ||
+	    ts->lp_intrvl != CY_LP_INTRVL_DFLT) {
 
 		u8 intrvl_ray[] = {
-			ts->pdata->act_intrvl,
-			ts->pdata->tch_tmout,
-			ts->pdata->lp_intrvl
+			ts->act_intrvl,
+			ts->tch_tmout,
+			ts->lp_intrvl
 		};
 
 		/* set intrvl registers */
@@ -263,7 +267,7 @@ out:
 
 static int cyttsp_act_dist_setup(struct cyttsp *ts)
 {
-	u8 act_dist_setup = ts->pdata->act_dist;
+	u8 act_dist_setup = ts->act_dist;
 
 	/* Init gesture; active distance setup */
 	return ttsp_write_block_data(ts, CY_REG_ACT_DIST,
@@ -528,25 +532,82 @@ static void cyttsp_close(struct input_dev *dev)
 		cyttsp_disable(ts);
 }
 
-static void cyttsp_platform_exit(void *data)
+static int cyttsp_parse_properties(struct cyttsp *ts)
 {
-	struct cyttsp *ts = data;
+	struct device *dev = ts->dev;
+	u32 dt_value;
+	int ret;
+
+	ts->bl_keys = devm_kzalloc(dev, CY_NUM_BL_KEYS, GFP_KERNEL);
+	if (!ts->bl_keys)
+		return -ENOMEM;
+
+	/* Set some default values */
+	ts->use_hndshk = false;
+	ts->act_dist = CY_ACT_DIST_DFLT;
+	ts->act_intrvl = CY_ACT_INTRVL_DFLT;
+	ts->tch_tmout = CY_TCH_TMOUT_DFLT;
+	ts->lp_intrvl = CY_LP_INTRVL_DFLT;
+
+	ret = device_property_read_u8_array(dev, "bootloader-key",
+					    ts->bl_keys, CY_NUM_BL_KEYS);
+	if (ret) {
+		dev_err(dev,
+			"bootloader-key property could not be retrieved\n");
+		return ret;
+	}
+
+	ts->use_hndshk = device_property_present(dev, "use-handshake");
+
+	if (!device_property_read_u32(dev, "active-distance", &dt_value)) {
+		if (dt_value > 15) {
+			dev_err(dev, "active-distance (%u) must be [0-15]\n",
+				dt_value);
+			return -EINVAL;
+		}
+		ts->act_dist &= ~CY_ACT_DIST_MASK;
+		ts->act_dist |= dt_value;
+	}
+
+	if (!device_property_read_u32(dev, "active-interval-ms", &dt_value)) {
+		if (dt_value > 255) {
+			dev_err(dev, "active-interval-ms (%u) must be [0-255]\n",
+				dt_value);
+			return -EINVAL;
+		}
+		ts->act_intrvl = dt_value;
+	}
 
-	if (ts->pdata->exit)
-		ts->pdata->exit();
+	if (!device_property_read_u32(dev, "lowpower-interval-ms", &dt_value)) {
+		if (dt_value > 2550) {
+			dev_err(dev, "lowpower-interval-ms (%u) must be [0-2550]\n",
+				dt_value);
+			return -EINVAL;
+		}
+		/* Register value is expressed in 0.01s / bit */
+		ts->lp_intrvl = dt_value / 10;
+	}
+
+	if (!device_property_read_u32(dev, "touch-timeout-ms", &dt_value)) {
+		if (dt_value > 2550) {
+			dev_err(dev, "touch-timeout-ms (%u) must be [0-2550]\n",
+				dt_value);
+			return -EINVAL;
+		}
+		/* Register value is expressed in 0.01s / bit */
+		ts->tch_tmout = dt_value / 10;
+	}
+
+	return 0;
 }
 
 struct cyttsp *cyttsp_probe(const struct cyttsp_bus_ops *bus_ops,
 			    struct device *dev, int irq, size_t xfer_buf_size)
 {
-	const struct cyttsp_platform_data *pdata = dev_get_platdata(dev);
 	struct cyttsp *ts;
 	struct input_dev *input_dev;
 	int error;
 
-	if (!pdata || !pdata->name || irq <= 0)
-		return ERR_PTR(-EINVAL);
-
 	ts = devm_kzalloc(dev, sizeof(*ts) + xfer_buf_size, GFP_KERNEL);
 	if (!ts)
 		return ERR_PTR(-ENOMEM);
@@ -557,29 +618,24 @@ struct cyttsp *cyttsp_probe(const struct cyttsp_bus_ops *bus_ops,
 
 	ts->dev = dev;
 	ts->input = input_dev;
-	ts->pdata = dev_get_platdata(dev);
 	ts->bus_ops = bus_ops;
 	ts->irq = irq;
 
-	init_completion(&ts->bl_ready);
-	snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(dev));
-
-	error = devm_add_action(dev, cyttsp_platform_exit, ts);
-	if (error) {
-		dev_err(dev, "failed to install exit action: %d\n", error);
+	ts->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(ts->reset_gpio)) {
+		error = PTR_ERR(ts->reset_gpio);
+		dev_err(dev, "Failed to request reset gpio, error %d\n", error);
 		return ERR_PTR(error);
 	}
 
-	if (pdata->init) {
-		error = pdata->init();
-		if (error) {
-			dev_err(ts->dev, "platform init failed, err: %d\n",
-				error);
-			return ERR_PTR(error);
-		}
-	}
+	error = cyttsp_parse_properties(ts);
+	if (error)
+		return ERR_PTR(error);
+
+	init_completion(&ts->bl_ready);
+	snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(dev));
 
-	input_dev->name = pdata->name;
+	input_dev->name = "Cypress TTSP TouchScreen";
 	input_dev->phys = ts->phys;
 	input_dev->id.bustype = bus_ops->bustype;
 	input_dev->dev.parent = ts->dev;
@@ -589,13 +645,9 @@ struct cyttsp *cyttsp_probe(const struct cyttsp_bus_ops *bus_ops,
 
 	input_set_drvdata(input_dev, ts);
 
-	__set_bit(EV_ABS, input_dev->evbit);
-	input_set_abs_params(input_dev, ABS_MT_POSITION_X,
-			     0, pdata->maxx, 0, 0);
-	input_set_abs_params(input_dev, ABS_MT_POSITION_Y,
-			     0, pdata->maxy, 0, 0);
-	input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR,
-			     0, CY_MAXZ, 0, 0);
+	input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_X);
+	input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_Y);
+	touchscreen_parse_properties(input_dev, true);
 
 	error = input_mt_init_slots(input_dev, CY_MAX_ID, 0);
 	if (error) {
@@ -605,7 +657,7 @@ struct cyttsp *cyttsp_probe(const struct cyttsp_bus_ops *bus_ops,
 
 	error = devm_request_threaded_irq(dev, ts->irq, NULL, cyttsp_irq,
 					  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-					  pdata->name, ts);
+					  "cyttsp", ts);
 	if (error) {
 		dev_err(ts->dev, "failed to request IRQ %d, err: %d\n",
 			ts->irq, error);
diff --git a/drivers/input/touchscreen/cyttsp_core.h b/drivers/input/touchscreen/cyttsp_core.h
index c5e7afce6de8..7835e2bacf5a 100644
--- a/drivers/input/touchscreen/cyttsp_core.h
+++ b/drivers/input/touchscreen/cyttsp_core.h
@@ -129,7 +129,6 @@ struct cyttsp {
 	int irq;
 	struct input_dev *input;
 	char phys[32];
-	const struct cyttsp_platform_data *pdata;
 	const struct cyttsp_bus_ops *bus_ops;
 	struct cyttsp_bootloader_data bl_data;
 	struct cyttsp_sysinfo_data sysinfo_data;
@@ -138,6 +137,14 @@ struct cyttsp {
 	enum cyttsp_state state;
 	bool suspended;
 
+	struct gpio_desc *reset_gpio;
+	bool use_hndshk;
+	u8 act_dist;
+	u8 act_intrvl;
+	u8 tch_tmout;
+	u8 lp_intrvl;
+	u8 *bl_keys;
+
 	u8 xfer_buf[] ____cacheline_aligned;
 };
 
diff --git a/include/linux/input/cyttsp.h b/include/linux/input/cyttsp.h
index 5af7c66f1fca..586c8c95dcb0 100644
--- a/include/linux/input/cyttsp.h
+++ b/include/linux/input/cyttsp.h
@@ -40,19 +40,4 @@
 /* Active distance in pixels for a gesture to be reported */
 #define CY_ACT_DIST_DFLT 0xF8 /* pixels */
 
-struct cyttsp_platform_data {
-	u32 maxx;
-	u32 maxy;
-	bool use_hndshk;
-	u8 act_dist;	/* Active distance */
-	u8 act_intrvl;  /* Active refresh interval; ms */
-	u8 tch_tmout;   /* Active touch timeout; ms */
-	u8 lp_intrvl;   /* Low power refresh interval; ms */
-	int (*init)(void);
-	void (*exit)(void);
-	char *name;
-	s16 irq_gpio;
-	u8 *bl_keys;
-};
-
 #endif /* _CYTTSP_H_ */
-- 
cgit v1.2.3


From fdfb719e93b55a50f90da2059dc450e7c0c48e8f Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 10 Jan 2016 22:40:54 -0800
Subject: tty: Remove chars_in_buffer() line discipline method

The chars_in_buffer() line discipline method serves no functional
purpose, other than as a (dubious) debugging aid for mostly bit-rotting
drivers. Despite being documented as an optional method, every caller
is unconditionally executed (although conditionally compiled).
Furthermore, direct tty->ldisc access without an ldisc ref is unsafe.
Lastly, N_TTY's chars_in_buffer() has warned of removal since 3.12.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/serial/tty.txt |  3 ---
 drivers/tty/amiserial.c      |  6 ++----
 drivers/tty/cyclades.c       |  8 ++++----
 drivers/tty/n_gsm.c          | 16 ----------------
 drivers/tty/n_tty.c          | 23 -----------------------
 drivers/tty/rocket.c         |  6 ++----
 drivers/tty/serial/crisv10.c | 12 +++++-------
 include/linux/tty_ldisc.h    |  7 -------
 8 files changed, 13 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/serial/tty.txt b/Documentation/serial/tty.txt
index bc3842dc323a..798cba82c762 100644
--- a/Documentation/serial/tty.txt
+++ b/Documentation/serial/tty.txt
@@ -72,9 +72,6 @@ flush_buffer()	-	(optional) May be called at any point between
 			open and close, and instructs the line discipline
 			to empty its input buffer.
 
-chars_in_buffer() -	(optional) Report the number of bytes in the input
-			buffer.
-
 set_termios()	-	(optional) Called on termios structure changes.
 			The caller passes the old termios data and the
 			current data is in the tty. Called under the
diff --git a/drivers/tty/amiserial.c b/drivers/tty/amiserial.c
index 2caaf5a2516d..6ba5681b6385 100644
--- a/drivers/tty/amiserial.c
+++ b/drivers/tty/amiserial.c
@@ -965,8 +965,7 @@ static void rs_throttle(struct tty_struct * tty)
 	struct serial_state *info = tty->driver_data;
 	unsigned long flags;
 #ifdef SERIAL_DEBUG_THROTTLE
-	printk("throttle %s: %d....\n", tty_name(tty),
-	       tty->ldisc.chars_in_buffer(tty));
+	printk("throttle %s ....\n", tty_name(tty));
 #endif
 
 	if (serial_paranoia_check(info, tty->name, "rs_throttle"))
@@ -988,8 +987,7 @@ static void rs_unthrottle(struct tty_struct * tty)
 	struct serial_state *info = tty->driver_data;
 	unsigned long flags;
 #ifdef SERIAL_DEBUG_THROTTLE
-	printk("unthrottle %s: %d....\n", tty_name(tty),
-	       tty->ldisc.chars_in_buffer(tty));
+	printk("unthrottle %s ....\n", tty_name(tty));
 #endif
 
 	if (serial_paranoia_check(info, tty->name, "rs_unthrottle"))
diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c
index abbed201dc74..a48e7e66b970 100644
--- a/drivers/tty/cyclades.c
+++ b/drivers/tty/cyclades.c
@@ -2852,8 +2852,8 @@ static void cy_throttle(struct tty_struct *tty)
 	unsigned long flags;
 
 #ifdef CY_DEBUG_THROTTLE
-	printk(KERN_DEBUG "cyc:throttle %s: %ld...ttyC%d\n", tty_name(tty),
-			tty->ldisc.chars_in_buffer(tty), info->line);
+	printk(KERN_DEBUG "cyc:throttle %s ...ttyC%d\n", tty_name(tty),
+			 info->line);
 #endif
 
 	if (serial_paranoia_check(info, tty->name, "cy_throttle"))
@@ -2891,8 +2891,8 @@ static void cy_unthrottle(struct tty_struct *tty)
 	unsigned long flags;
 
 #ifdef CY_DEBUG_THROTTLE
-	printk(KERN_DEBUG "cyc:unthrottle %s: %ld...ttyC%d\n",
-		tty_name(tty), tty_chars_in_buffer(tty), info->line);
+	printk(KERN_DEBUG "cyc:unthrottle %s ...ttyC%d\n",
+		tty_name(tty), info->line);
 #endif
 
 	if (serial_paranoia_check(info, tty->name, "cy_unthrottle"))
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index c3fe026d3168..e3cc27749344 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -2303,21 +2303,6 @@ static void gsmld_receive_buf(struct tty_struct *tty, const unsigned char *cp,
 	/* If clogged call tty_throttle(tty); */
 }
 
-/**
- *	gsmld_chars_in_buffer	-	report available bytes
- *	@tty: tty device
- *
- *	Report the number of characters buffered to be delivered to user
- *	at this instant in time.
- *
- *	Locking: gsm lock
- */
-
-static ssize_t gsmld_chars_in_buffer(struct tty_struct *tty)
-{
-	return 0;
-}
-
 /**
  *	gsmld_flush_buffer	-	clean input queue
  *	@tty:	terminal device
@@ -2830,7 +2815,6 @@ static struct tty_ldisc_ops tty_ldisc_packet = {
 	.open            = gsmld_open,
 	.close           = gsmld_close,
 	.flush_buffer    = gsmld_flush_buffer,
-	.chars_in_buffer = gsmld_chars_in_buffer,
 	.read            = gsmld_read,
 	.write           = gsmld_write,
 	.ioctl           = gsmld_ioctl,
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index b280abaad91b..90eca2605fbf 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -380,28 +380,6 @@ static void n_tty_flush_buffer(struct tty_struct *tty)
 	up_write(&tty->termios_rwsem);
 }
 
-/**
- *	n_tty_chars_in_buffer	-	report available bytes
- *	@tty: tty device
- *
- *	Report the number of characters buffered to be delivered to user
- *	at this instant in time.
- *
- *	Locking: exclusive termios_rwsem
- */
-
-static ssize_t n_tty_chars_in_buffer(struct tty_struct *tty)
-{
-	ssize_t n;
-
-	WARN_ONCE(1, "%s is deprecated and scheduled for removal.", __func__);
-
-	down_write(&tty->termios_rwsem);
-	n = chars_in_buffer(tty);
-	up_write(&tty->termios_rwsem);
-	return n;
-}
-
 /**
  *	is_utf8_continuation	-	utf8 multibyte check
  *	@c: byte to check
@@ -2525,7 +2503,6 @@ struct tty_ldisc_ops tty_ldisc_N_TTY = {
 	.open            = n_tty_open,
 	.close           = n_tty_close,
 	.flush_buffer    = n_tty_flush_buffer,
-	.chars_in_buffer = n_tty_chars_in_buffer,
 	.read            = n_tty_read,
 	.write           = n_tty_write,
 	.ioctl           = n_tty_ioctl,
diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index 802eac7e561b..f624b93a237f 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -1360,8 +1360,7 @@ static void rp_throttle(struct tty_struct *tty)
 	struct r_port *info = tty->driver_data;
 
 #ifdef ROCKET_DEBUG_THROTTLE
-	printk(KERN_INFO "throttle %s: %d....\n", tty->name,
-	       tty->ldisc.chars_in_buffer(tty));
+	printk(KERN_INFO "throttle %s ....\n", tty->name);
 #endif
 
 	if (rocket_paranoia_check(info, "rp_throttle"))
@@ -1377,8 +1376,7 @@ static void rp_unthrottle(struct tty_struct *tty)
 {
 	struct r_port *info = tty->driver_data;
 #ifdef ROCKET_DEBUG_THROTTLE
-	printk(KERN_INFO "unthrottle %s: %d....\n", tty->name,
-	       tty->ldisc.chars_in_buffer(tty));
+	printk(KERN_INFO "unthrottle %s ....\n", tty->name);
 #endif
 
 	if (rocket_paranoia_check(info, "rp_unthrottle"))
diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c
index f13f2ebd215b..e98aef797065 100644
--- a/drivers/tty/serial/crisv10.c
+++ b/drivers/tty/serial/crisv10.c
@@ -2968,7 +2968,7 @@ static int rs_raw_write(struct tty_struct *tty,
 
 	local_save_flags(flags);
 	DFLOW(DEBUG_LOG(info->line, "write count %i ", count));
-	DFLOW(DEBUG_LOG(info->line, "ldisc %i\n", tty->ldisc.chars_in_buffer(tty)));
+	DFLOW(DEBUG_LOG(info->line, "ldisc\n"));
 
 
 	/* The local_irq_disable/restore_flags pairs below are needed
@@ -3161,10 +3161,9 @@ rs_throttle(struct tty_struct * tty)
 {
 	struct e100_serial *info = (struct e100_serial *)tty->driver_data;
 #ifdef SERIAL_DEBUG_THROTTLE
-	printk("throttle %s: %lu....\n", tty_name(tty),
-	       (unsigned long)tty->ldisc.chars_in_buffer(tty));
+	printk("throttle %s ....\n", tty_name(tty));
 #endif
-	DFLOW(DEBUG_LOG(info->line,"rs_throttle %lu\n", tty->ldisc.chars_in_buffer(tty)));
+	DFLOW(DEBUG_LOG(info->line,"rs_throttle\n"));
 
 	/* Do RTS before XOFF since XOFF might take some time */
 	if (tty->termios.c_cflag & CRTSCTS) {
@@ -3181,10 +3180,9 @@ rs_unthrottle(struct tty_struct * tty)
 {
 	struct e100_serial *info = (struct e100_serial *)tty->driver_data;
 #ifdef SERIAL_DEBUG_THROTTLE
-	printk("unthrottle %s: %lu....\n", tty_name(tty),
-	       (unsigned long)tty->ldisc.chars_in_buffer(tty));
+	printk("unthrottle %s ....\n", tty_name(tty));
 #endif
-	DFLOW(DEBUG_LOG(info->line,"rs_unthrottle ldisc %d\n", tty->ldisc.chars_in_buffer(tty)));
+	DFLOW(DEBUG_LOG(info->line,"rs_unthrottle ldisc\n"));
 	DFLOW(DEBUG_LOG(info->line,"rs_unthrottle flip.count: %i\n", tty->flip.count));
 	/* Do RTS before XOFF since XOFF might take some time */
 	if (tty->termios.c_cflag & CRTSCTS) {
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index 00c9d688d7b7..6101ab8dc148 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -25,12 +25,6 @@
  *	buffers of any input characters it may have queued to be
  *	delivered to the user mode process.
  *
- * ssize_t (*chars_in_buffer)(struct tty_struct *tty);
- *
- *	This function returns the number of input characters the line
- *	discipline may have queued up to be delivered to the user mode
- *	process.
- *
  * ssize_t (*read)(struct tty_struct * tty, struct file * file,
  *		   unsigned char * buf, size_t nr);
  *
@@ -188,7 +182,6 @@ struct tty_ldisc_ops {
 	int	(*open)(struct tty_struct *);
 	void	(*close)(struct tty_struct *);
 	void	(*flush_buffer)(struct tty_struct *tty);
-	ssize_t	(*chars_in_buffer)(struct tty_struct *tty);
 	ssize_t	(*read)(struct tty_struct *tty, struct file *file,
 			unsigned char __user *buf, size_t nr);
 	ssize_t	(*write)(struct tty_struct *tty, struct file *file,
-- 
cgit v1.2.3


From c12da96f801a3f45b0634c966b9e7cda307daa72 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 10 Jan 2016 22:41:04 -0800
Subject: tty: Use 'disc' for line discipline index name

tty->ldisc is a ptr to struct tty_ldisc, but unfortunately 'ldisc' is
also used as a parameter or local name to refer to the line discipline
index value (ie, N_TTY, N_GSM, etc.); instead prefer the name used
by the line discipline registration/ref counting functions.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c    |  6 +++---
 drivers/tty/tty_ldisc.c | 22 +++++++++++-----------
 include/linux/tty.h     |  2 +-
 3 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index eb26754b2e5d..7f556e3c1515 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2662,13 +2662,13 @@ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t _
 
 static int tiocsetd(struct tty_struct *tty, int __user *p)
 {
-	int ldisc;
+	int disc;
 	int ret;
 
-	if (get_user(ldisc, p))
+	if (get_user(disc, p))
 		return -EFAULT;
 
-	ret = tty_set_ldisc(tty, ldisc);
+	ret = tty_set_ldisc(tty, disc);
 
 	return ret;
 }
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index eac33e104ccc..45aa31b923e5 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
 /**
  *	tty_set_termios_ldisc		-	set ldisc field
  *	@tty: tty structure
- *	@num: line discipline number
+ *	@disc: line discipline number
  *
  *	This is probably overkill for real world processors but
  *	they are not on hot paths so a little discipline won't do
@@ -430,10 +430,10 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
  *	Locking: takes termios_rwsem
  */
 
-static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
+static void tty_set_termios_ldisc(struct tty_struct *tty, int disc)
 {
 	down_write(&tty->termios_rwsem);
-	tty->termios.c_line = num;
+	tty->termios.c_line = disc;
 	up_write(&tty->termios_rwsem);
 
 	tty->disc_data = NULL;
@@ -531,12 +531,12 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
  *	the close of one side of a tty/pty pair, and eventually hangup.
  */
 
-int tty_set_ldisc(struct tty_struct *tty, int ldisc)
+int tty_set_ldisc(struct tty_struct *tty, int disc)
 {
 	int retval;
 	struct tty_ldisc *old_ldisc, *new_ldisc;
 
-	new_ldisc = tty_ldisc_get(tty, ldisc);
+	new_ldisc = tty_ldisc_get(tty, disc);
 	if (IS_ERR(new_ldisc))
 		return PTR_ERR(new_ldisc);
 
@@ -551,7 +551,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 	}
 
 	/* Check the no-op case */
-	if (tty->ldisc->ops->num == ldisc)
+	if (tty->ldisc->ops->num == disc)
 		goto out;
 
 	if (test_bit(TTY_HUPPED, &tty->flags)) {
@@ -567,7 +567,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 
 	/* Now set up the new line discipline. */
 	tty->ldisc = new_ldisc;
-	tty_set_termios_ldisc(tty, ldisc);
+	tty_set_termios_ldisc(tty, disc);
 
 	retval = tty_ldisc_open(tty, new_ldisc);
 	if (retval < 0) {
@@ -639,15 +639,15 @@ static void tty_reset_termios(struct tty_struct *tty)
 /**
  *	tty_ldisc_reinit	-	reinitialise the tty ldisc
  *	@tty: tty to reinit
- *	@ldisc: line discipline to reinitialize
+ *	@disc: line discipline to reinitialize
  *
  *	Switch the tty to a line discipline and leave the ldisc
  *	state closed
  */
 
-static int tty_ldisc_reinit(struct tty_struct *tty, int ldisc)
+static int tty_ldisc_reinit(struct tty_struct *tty, int disc)
 {
-	struct tty_ldisc *ld = tty_ldisc_get(tty, ldisc);
+	struct tty_ldisc *ld = tty_ldisc_get(tty, disc);
 
 	if (IS_ERR(ld))
 		return -1;
@@ -658,7 +658,7 @@ static int tty_ldisc_reinit(struct tty_struct *tty, int ldisc)
 	 *	Switch the line discipline back
 	 */
 	tty->ldisc = ld;
-	tty_set_termios_ldisc(tty, ldisc);
+	tty_set_termios_ldisc(tty, disc);
 
 	return 0;
 }
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 83f127673bc9..e3b0afbde7b9 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -570,7 +570,7 @@ static inline int tty_port_users(struct tty_port *port)
 
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
-extern int tty_set_ldisc(struct tty_struct *tty, int ldisc);
+extern int tty_set_ldisc(struct tty_struct *tty, int disc);
 extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty);
 extern void tty_ldisc_release(struct tty_struct *tty);
 extern void tty_ldisc_init(struct tty_struct *tty);
-- 
cgit v1.2.3


From 892d1fa7eaaed9d3c04954cb140c34ebc3393932 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 10 Jan 2016 22:41:06 -0800
Subject: tty: Destroy ldisc instance on hangup

Currently, when the tty is hungup, the ldisc is re-instanced; ie., the
current instance is destroyed and a new instance is created. The purpose
of this design was to guarantee a valid, open ldisc for the lifetime of
the tty.

However, now that tty buffers are owned by and have lifetime equivalent
to the tty_port (since v3.10), any data received immediately after the
ldisc is re-instanced may cause continued driver i/o operations
concurrently with the driver's hangup() operation. For drivers that
shutdown h/w on hangup, this is unexpected and usually bad. For example,
the serial core may free the xmit buffer page concurrently with an
in-progress write() operation (triggered by echo).

With the existing stable and robust ldisc reference handling, the
cleaned-up tty_reopen(), the straggling unsafe ldisc use cleaned up, and
the preparation to properly handle a NULL tty->ldisc, the ldisc instance
can be destroyed and only re-instanced when the tty is re-opened.

If the tty was opened as /dev/console or /dev/tty0, the original behavior
of re-instancing the ldisc is retained (the 'reinit' parameter to
tty_ldisc_hangup() is true). This is required since those file descriptors
are never hungup.

This patch has neglible impact on userspace; the tty file_operations ptr
is changed to point to the hungup file operations _before_ the ldisc
instance is destroyed, so only racing file operations might now retrieve
a NULL ldisc reference (which is simply handled as if the hungup file
operation had been called instead -- see "tty: Prepare for destroying
line discipline on hangup").

This resolves a long-standing FIXME and several crash reports.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c    | 12 ++++++------
 drivers/tty/tty_ldisc.c | 40 +++++++++++++++++-----------------------
 include/linux/tty.h     |  3 ++-
 3 files changed, 25 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 7f556e3c1515..d3ecbb513fa1 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -727,7 +727,7 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session)
 	while (refs--)
 		tty_kref_put(tty);
 
-	tty_ldisc_hangup(tty);
+	tty_ldisc_hangup(tty, cons_filp != NULL);
 
 	spin_lock_irq(&tty->ctrl_lock);
 	clear_bit(TTY_THROTTLED, &tty->flags);
@@ -752,10 +752,9 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session)
 	} else if (tty->ops->hangup)
 		tty->ops->hangup(tty);
 	/*
-	 * We don't want to have driver/ldisc interactions beyond
-	 * the ones we did here. The driver layer expects no
-	 * calls after ->hangup() from the ldisc side. However we
-	 * can't yet guarantee all that.
+	 * We don't want to have driver/ldisc interactions beyond the ones
+	 * we did here. The driver layer expects no calls after ->hangup()
+	 * from the ldisc side, which is now guaranteed.
 	 */
 	set_bit(TTY_HUPPED, &tty->flags);
 	tty_unlock(tty);
@@ -1475,7 +1474,8 @@ static int tty_reopen(struct tty_struct *tty)
 
 	tty->count++;
 
-	WARN_ON(!tty->ldisc);
+	if (!tty->ldisc)
+		return tty_ldisc_reinit(tty, tty->termios.c_line);
 
 	return 0;
 }
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index e18c8e864110..c6f970d63060 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -257,6 +257,9 @@ const struct file_operations tty_ldiscs_proc_fops = {
  *	reference to it. If the line discipline is in flux then
  *	wait patiently until it changes.
  *
+ *	Returns: NULL if the tty has been hungup and not re-opened with
+ *		 a new file descriptor, otherwise valid ldisc reference
+ *
  *	Note: Must not be called from an IRQ/timer context. The caller
  *	must also be careful not to hold other locks that will deadlock
  *	against a discipline change, such as an existing ldisc reference
@@ -642,14 +645,15 @@ static void tty_reset_termios(struct tty_struct *tty)
  *	@disc: line discipline to reinitialize
  *
  *	Completely reinitialize the line discipline state, by closing the
- *	current instance and opening a new instance. If an error occurs opening
- *	the new non-N_TTY instance, the instance is dropped and tty->ldisc reset
- *	to NULL. The caller can then retry with N_TTY instead.
+ *	current instance, if there is one, and opening a new instance. If
+ *	an error occurs opening the new non-N_TTY instance, the instance
+ *	is dropped and tty->ldisc reset to NULL. The caller can then retry
+ *	with N_TTY instead.
  *
  *	Returns 0 if successful, otherwise error code < 0
  */
 
-static int tty_ldisc_reinit(struct tty_struct *tty, int disc)
+int tty_ldisc_reinit(struct tty_struct *tty, int disc)
 {
 	struct tty_ldisc *ld;
 	int retval;
@@ -693,11 +697,9 @@ static int tty_ldisc_reinit(struct tty_struct *tty, int disc)
  *	tty itself so we must be careful about locking rules.
  */
 
-void tty_ldisc_hangup(struct tty_struct *tty)
+void tty_ldisc_hangup(struct tty_struct *tty, bool reinit)
 {
 	struct tty_ldisc *ld;
-	int reset = tty->driver->flags & TTY_DRIVER_RESET_TERMIOS;
-	int err = 0;
 
 	tty_ldisc_debug(tty, "%p: hangup\n", tty->ldisc);
 
@@ -725,25 +727,17 @@ void tty_ldisc_hangup(struct tty_struct *tty)
 	 */
 	tty_ldisc_lock(tty, MAX_SCHEDULE_TIMEOUT);
 
-	if (tty->ldisc) {
+	if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS)
+		tty_reset_termios(tty);
 
-		/* At this point we have a halted ldisc; we want to close it and
-		   reopen a new ldisc. We could defer the reopen to the next
-		   open but it means auditing a lot of other paths so this is
-		   a FIXME */
-		if (reset == 0)
-			err = tty_ldisc_reinit(tty, tty->termios.c_line);
-
-		/* If the re-open fails or we reset then go to N_TTY. The
-		   N_TTY open cannot fail */
-		if (reset || err < 0)
-			tty_ldisc_reinit(tty, N_TTY);
+	if (tty->ldisc) {
+		if (reinit) {
+			if (tty_ldisc_reinit(tty, tty->termios.c_line) < 0)
+				tty_ldisc_reinit(tty, N_TTY);
+		} else
+			tty_ldisc_kill(tty);
 	}
 	tty_ldisc_unlock(tty);
-	if (reset)
-		tty_reset_termios(tty);
-
-	tty_ldisc_debug(tty, "%p: re-opened\n", tty->ldisc);
 }
 
 /**
diff --git a/include/linux/tty.h b/include/linux/tty.h
index e3b0afbde7b9..95e52c6b9696 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -490,7 +490,8 @@ extern int tty_set_termios(struct tty_struct *tty, struct ktermios *kt);
 extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *);
 extern void tty_ldisc_deref(struct tty_ldisc *);
 extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *);
-extern void tty_ldisc_hangup(struct tty_struct *tty);
+extern void tty_ldisc_hangup(struct tty_struct *tty, bool reset);
+extern int tty_ldisc_reinit(struct tty_struct *tty, int disc);
 extern const struct file_operations tty_ldiscs_proc_fops;
 
 extern void tty_wakeup(struct tty_struct *tty);
-- 
cgit v1.2.3


From 27228732aa94f3883433fab2f43eee373c638f2f Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 21:35:19 -0800
Subject: tty: Eliminate global symbol tty_ldisc_N_TTY

Reduce global tty symbols; move and rename tty_ldisc_begin() as
n_tty_init() and redefine the N_TTY ldisc ops as file scope.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c     | 12 ++++++++----
 drivers/tty/tty_io.c    |  2 +-
 drivers/tty/tty_ldisc.c |  6 ------
 include/linux/tty.h     |  3 +--
 4 files changed, 10 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 90eca2605fbf..dae628f19912 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -2497,7 +2497,7 @@ static void n_tty_fasync(struct tty_struct *tty, int on)
 	}
 }
 
-struct tty_ldisc_ops tty_ldisc_N_TTY = {
+static struct tty_ldisc_ops n_tty_ops = {
 	.magic           = TTY_LDISC_MAGIC,
 	.name            = "n_tty",
 	.open            = n_tty_open,
@@ -2518,14 +2518,18 @@ struct tty_ldisc_ops tty_ldisc_N_TTY = {
  *	n_tty_inherit_ops	-	inherit N_TTY methods
  *	@ops: struct tty_ldisc_ops where to save N_TTY methods
  *
- *	Enables a 'subclass' line discipline to 'inherit' N_TTY
- *	methods.
+ *	Enables a 'subclass' line discipline to 'inherit' N_TTY methods.
  */
 
 void n_tty_inherit_ops(struct tty_ldisc_ops *ops)
 {
-	*ops = tty_ldisc_N_TTY;
+	*ops = n_tty_ops;
 	ops->owner = NULL;
 	ops->refcount = ops->flags = 0;
 }
 EXPORT_SYMBOL_GPL(n_tty_inherit_ops);
+
+void __init n_tty_init(void)
+{
+	tty_register_ldisc(N_TTY, &n_tty_ops);
+}
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index cb0a0c82279b..798acb690092 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -3576,7 +3576,7 @@ void __init console_init(void)
 	initcall_t *call;
 
 	/* Setup the default TTY line discipline. */
-	tty_ldisc_begin();
+	n_tty_init();
 
 	/*
 	 * set up the console device so that later boot sequences can
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 4cb5e572c7b8..68947f6de5ad 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -824,9 +824,3 @@ void tty_ldisc_deinit(struct tty_struct *tty)
 		tty_ldisc_put(tty->ldisc);
 	tty->ldisc = NULL;
 }
-
-void tty_ldisc_begin(void)
-{
-	/* Setup the default TTY line discipline. */
-	(void) tty_register_ldisc(N_TTY, &tty_ldisc_N_TTY);
-}
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 95e52c6b9696..f6c07fd12ef4 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -576,7 +576,6 @@ extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty);
 extern void tty_ldisc_release(struct tty_struct *tty);
 extern void tty_ldisc_init(struct tty_struct *tty);
 extern void tty_ldisc_deinit(struct tty_struct *tty);
-extern void tty_ldisc_begin(void);
 
 static inline int tty_ldisc_receive_buf(struct tty_ldisc *ld, unsigned char *p,
 					char *f, int count)
@@ -593,8 +592,8 @@ static inline int tty_ldisc_receive_buf(struct tty_ldisc *ld, unsigned char *p,
 
 
 /* n_tty.c */
-extern struct tty_ldisc_ops tty_ldisc_N_TTY;
 extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops);
+extern void __init n_tty_init(void);
 
 /* tty_audit.c */
 #ifdef CONFIG_AUDIT
-- 
cgit v1.2.3


From de5583d670a8b10f157fdcc7afaddfd1ec423a69 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 21:35:20 -0800
Subject: tty: Remove declarations to non-existent functions

tty_read_raw_data() and tty_signal() no longer exist; remove
declarations.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index f6c07fd12ef4..26a9b4b3efe9 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -433,8 +433,6 @@ extern struct device *tty_register_device_attr(struct tty_driver *driver,
 				void *drvdata,
 				const struct attribute_group **attr_grp);
 extern void tty_unregister_device(struct tty_driver *driver, unsigned index);
-extern int tty_read_raw_data(struct tty_struct *tty, unsigned char *bufp,
-			     int buflen);
 extern void tty_write_message(struct tty_struct *tty, char *msg);
 extern int tty_send_xchar(struct tty_struct *tty, char ch);
 extern int tty_put_char(struct tty_struct *tty, unsigned char c);
@@ -448,7 +446,6 @@ extern int tty_unthrottle_safe(struct tty_struct *tty);
 extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws);
 extern int is_current_pgrp_orphaned(void);
 extern int is_ignored(int sig);
-extern int tty_signal(int sig, struct tty_struct *tty);
 extern void tty_hangup(struct tty_struct *tty);
 extern void tty_vhangup(struct tty_struct *tty);
 extern int tty_hung_up_p(struct file *filp);
-- 
cgit v1.2.3


From e802ca0e1892865acdde90f1aaed37333c5660c2 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 21:35:21 -0800
Subject: tty: Move tty_check_change() helper

Move is_ignored() to drivers/tty/tty_io.c and re-declare in file
scope.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c  | 6 ------
 drivers/tty/tty_io.c | 6 ++++++
 include/linux/tty.h  | 1 -
 3 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index dae628f19912..f4555a261420 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1763,12 +1763,6 @@ static int n_tty_receive_buf2(struct tty_struct *tty, const unsigned char *cp,
 	return n_tty_receive_buf_common(tty, cp, fp, count, 1);
 }
 
-int is_ignored(int sig)
-{
-	return (sigismember(&current->blocked, sig) ||
-		current->sighand->action[sig-1].sa.sa_handler == SIG_IGN);
-}
-
 /**
  *	n_tty_set_termios	-	termios data changed
  *	@tty: terminal
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 798acb690092..e8996fc3ff1c 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -381,6 +381,12 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line)
 EXPORT_SYMBOL_GPL(tty_find_polling_driver);
 #endif
 
+static int is_ignored(int sig)
+{
+	return (sigismember(&current->blocked, sig) ||
+		current->sighand->action[sig-1].sa.sa_handler == SIG_IGN);
+}
+
 /**
  *	tty_check_change	-	check for POSIX terminal changes
  *	@tty: tty to check
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 26a9b4b3efe9..ba7120dccccb 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -445,7 +445,6 @@ extern int tty_throttle_safe(struct tty_struct *tty);
 extern int tty_unthrottle_safe(struct tty_struct *tty);
 extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws);
 extern int is_current_pgrp_orphaned(void);
-extern int is_ignored(int sig);
 extern void tty_hangup(struct tty_struct *tty);
 extern void tty_vhangup(struct tty_struct *tty);
 extern int tty_hung_up_p(struct file *filp);
-- 
cgit v1.2.3


From 527ffc11e1c0aa049b3a831d6f07ed98e06a0819 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 21:35:22 -0800
Subject: tty: Remove unreferenced tty flags macro TTY_DEBUG

The TTY_DEBUG macro is not used; remove.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index ba7120dccccb..da16b5980c25 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -336,7 +336,6 @@ struct tty_file_private {
 #define TTY_IO_ERROR 		1	/* Cause an I/O error (may be no ldisc too) */
 #define TTY_OTHER_CLOSED 	2	/* Other side (if any) has closed */
 #define TTY_EXCLUSIVE 		3	/* Exclusive open mode */
-#define TTY_DEBUG 		4	/* Debugging */
 #define TTY_DO_WRITE_WAKEUP 	5	/* Call write_wakeup after queuing new */
 #define TTY_OTHER_DONE		6	/* Closed pty has completed input processing */
 #define TTY_LDISC_OPEN	 	11	/* Line discipline is open */
-- 
cgit v1.2.3


From 4a510969374ab8853451c337e43d28fb864e43fd Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 21:35:23 -0800
Subject: tty: Make tty_files_lock per-tty

Access to tty->tty_files list is always per-tty, never for all ttys
simultaneously. Replace global tty_files_lock spinlock with per-tty
->files_lock. Initialize when the ->tty_files list is inited, in
alloc_tty_struct().

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c     | 25 ++++++++++++-------------
 include/linux/tty.h      |  2 +-
 security/selinux/hooks.c |  4 ++--
 3 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index e8996fc3ff1c..c5b4274584dc 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -138,9 +138,6 @@ LIST_HEAD(tty_drivers);			/* linked list of tty drivers */
 /* Mutex to protect creating and releasing a tty */
 DEFINE_MUTEX(tty_mutex);
 
-/* Spinlock to protect the tty->tty_files list */
-DEFINE_SPINLOCK(tty_files_lock);
-
 static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
 static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
 ssize_t redirected_tty_write(struct file *, const char __user *,
@@ -202,9 +199,9 @@ void tty_add_file(struct tty_struct *tty, struct file *file)
 	priv->tty = tty;
 	priv->file = file;
 
-	spin_lock(&tty_files_lock);
+	spin_lock(&tty->files_lock);
 	list_add(&priv->list, &tty->tty_files);
-	spin_unlock(&tty_files_lock);
+	spin_unlock(&tty->files_lock);
 }
 
 /**
@@ -225,10 +222,11 @@ void tty_free_file(struct file *file)
 static void tty_del_file(struct file *file)
 {
 	struct tty_file_private *priv = file->private_data;
+	struct tty_struct *tty = priv->tty;
 
-	spin_lock(&tty_files_lock);
+	spin_lock(&tty->files_lock);
 	list_del(&priv->list);
-	spin_unlock(&tty_files_lock);
+	spin_unlock(&tty->files_lock);
 	tty_free_file(file);
 }
 
@@ -286,11 +284,11 @@ static int check_tty_count(struct tty_struct *tty, const char *routine)
 	struct list_head *p;
 	int count = 0;
 
-	spin_lock(&tty_files_lock);
+	spin_lock(&tty->files_lock);
 	list_for_each(p, &tty->tty_files) {
 		count++;
 	}
-	spin_unlock(&tty_files_lock);
+	spin_unlock(&tty->files_lock);
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_SLAVE &&
 	    tty->link && tty->link->count)
@@ -713,7 +711,7 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session)
 	   workqueue with the lock held */
 	check_tty_count(tty, "tty_hangup");
 
-	spin_lock(&tty_files_lock);
+	spin_lock(&tty->files_lock);
 	/* This breaks for file handles being sent over AF_UNIX sockets ? */
 	list_for_each_entry(priv, &tty->tty_files, list) {
 		filp = priv->file;
@@ -725,7 +723,7 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session)
 		__tty_fasync(-1, filp, 0);	/* can't block */
 		filp->f_op = &hung_up_tty_fops;
 	}
-	spin_unlock(&tty_files_lock);
+	spin_unlock(&tty->files_lock);
 
 	refs = tty_signal_session_leader(tty, exit_session);
 	/* Account for the p->signal references we killed */
@@ -1637,9 +1635,9 @@ static void release_one_tty(struct work_struct *work)
 	tty_driver_kref_put(driver);
 	module_put(owner);
 
-	spin_lock(&tty_files_lock);
+	spin_lock(&tty->files_lock);
 	list_del_init(&tty->tty_files);
-	spin_unlock(&tty_files_lock);
+	spin_unlock(&tty->files_lock);
 
 	put_pid(tty->pgrp);
 	put_pid(tty->session);
@@ -3176,6 +3174,7 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx)
 	mutex_init(&tty->atomic_write_lock);
 	spin_lock_init(&tty->ctrl_lock);
 	spin_lock_init(&tty->flow_lock);
+	spin_lock_init(&tty->files_lock);
 	INIT_LIST_HEAD(&tty->tty_files);
 	INIT_WORK(&tty->SAK_work, do_SAK_work);
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index da16b5980c25..780adbfc6dce 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -302,6 +302,7 @@ struct tty_struct {
 	struct work_struct hangup_work;
 	void *disc_data;
 	void *driver_data;
+	spinlock_t files_lock;		/* protects tty_files list */
 	struct list_head tty_files;
 
 #define N_TTY_BUF_SIZE 4096
@@ -508,7 +509,6 @@ extern int tty_standard_install(struct tty_driver *driver,
 		struct tty_struct *tty);
 
 extern struct mutex tty_mutex;
-extern spinlock_t tty_files_lock;
 
 #define tty_is_writelocked(tty)  (mutex_is_locked(&tty->atomic_write_lock))
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f8110cfd80ff..8010bc5391c3 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2415,7 +2415,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 
 	tty = get_current_tty();
 	if (tty) {
-		spin_lock(&tty_files_lock);
+		spin_lock(&tty->files_lock);
 		if (!list_empty(&tty->tty_files)) {
 			struct tty_file_private *file_priv;
 
@@ -2430,7 +2430,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 			if (file_path_has_perm(cred, file, FILE__READ | FILE__WRITE))
 				drop_tty = 1;
 		}
-		spin_unlock(&tty_files_lock);
+		spin_unlock(&tty->files_lock);
 		tty_kref_put(tty);
 	}
 	/* Reset controlling tty. */
-- 
cgit v1.2.3


From 309426ae69cdf35b0d72a8bd59a5081f8ddccddd Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 22:55:27 -0800
Subject: tty: audit: Remove icanon mode from call chain

The tty termios bits cannot change while n_tty_read() is in the
i/o loop; the termios_rwsem ensures mutual exclusion with termios
changes in n_tty_set_termios(). Check L_ICANON() directly and
eliminate icanon parameter.

NB: tty_audit_add_data() => tty_audit_buf_get() => tty_audit_buf_alloc()
is a single path; ie., tty_audit_buf_get() and tty_audit_buf_alloc()
have no other callers.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c     |  6 +++---
 drivers/tty/tty_audit.c | 22 +++++++++-------------
 include/linux/tty.h     |  4 ++--
 3 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index bacafda44bf3..4fbc5defbcd8 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -162,7 +162,7 @@ static int tty_copy_to_user(struct tty_struct *tty, void __user *to,
 	int uncopied;
 
 	if (n > size) {
-		tty_audit_add_data(tty, from, size, ldata->icanon);
+		tty_audit_add_data(tty, from, size);
 		uncopied = copy_to_user(to, from, size);
 		if (uncopied)
 			return uncopied;
@@ -171,7 +171,7 @@ static int tty_copy_to_user(struct tty_struct *tty, void __user *to,
 		from = ldata->read_buf;
 	}
 
-	tty_audit_add_data(tty, from, n, ldata->icanon);
+	tty_audit_add_data(tty, from, n);
 	return copy_to_user(to, from, n);
 }
 
@@ -1978,7 +1978,7 @@ static int copy_from_read_buf(struct tty_struct *tty,
 		retval = copy_to_user(*b, from, n);
 		n -= retval;
 		is_eof = n == 1 && *from == EOF_CHAR(tty);
-		tty_audit_add_data(tty, from, n, ldata->icanon);
+		tty_audit_add_data(tty, from, n);
 		smp_store_release(&ldata->read_tail, ldata->read_tail + n);
 		/* Turn single EOF into zero-length read */
 		if (L_EXTPROC(tty) && ldata->icanon && is_eof &&
diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c
index ead924e4bd53..d2a004abeb5e 100644
--- a/drivers/tty/tty_audit.c
+++ b/drivers/tty/tty_audit.c
@@ -22,8 +22,7 @@ struct tty_audit_buf {
 	unsigned char *data;	/* Allocated size N_TTY_BUF_SIZE */
 };
 
-static struct tty_audit_buf *tty_audit_buf_alloc(int major, int minor,
-						 unsigned icanon)
+static struct tty_audit_buf *tty_audit_buf_alloc(struct tty_struct *tty)
 {
 	struct tty_audit_buf *buf;
 
@@ -35,9 +34,9 @@ static struct tty_audit_buf *tty_audit_buf_alloc(int major, int minor,
 		goto err_buf;
 	atomic_set(&buf->count, 1);
 	mutex_init(&buf->mutex);
-	buf->major = major;
-	buf->minor = minor;
-	buf->icanon = icanon;
+	buf->major = tty->driver->major;
+	buf->minor = tty->driver->minor_start + tty->index;
+	buf->icanon = !!L_ICANON(tty);
 	buf->valid = 0;
 	return buf;
 
@@ -216,8 +215,7 @@ int tty_audit_push_current(void)
  *	if TTY auditing is disabled or out of memory.  Otherwise, return a new
  *	reference to the buffer.
  */
-static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty,
-		unsigned icanon)
+static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty)
 {
 	struct tty_audit_buf *buf, *buf2;
 	unsigned long flags;
@@ -234,9 +232,7 @@ static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty,
 	}
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-	buf2 = tty_audit_buf_alloc(tty->driver->major,
-				   tty->driver->minor_start + tty->index,
-				   icanon);
+	buf2 = tty_audit_buf_alloc(tty);
 	if (buf2 == NULL) {
 		audit_log_lost("out of memory in TTY auditing");
 		return NULL;
@@ -265,13 +261,13 @@ static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty,
  *
  *	Audit @data of @size from @tty, if necessary.
  */
-void tty_audit_add_data(struct tty_struct *tty, const void *data,
-			size_t size, unsigned icanon)
+void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size)
 {
 	struct tty_audit_buf *buf;
 	int major, minor;
 	int audit_log_tty_passwd;
 	unsigned long flags;
+	unsigned int icanon = !!L_ICANON(tty);
 
 	if (unlikely(size == 0))
 		return;
@@ -286,7 +282,7 @@ void tty_audit_add_data(struct tty_struct *tty, const void *data,
 	if (!audit_log_tty_passwd && icanon && !L_ECHO(tty))
 		return;
 
-	buf = tty_audit_buf_get(tty, icanon);
+	buf = tty_audit_buf_get(tty);
 	if (!buf)
 		return;
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 780adbfc6dce..c011dc205e5c 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -593,7 +593,7 @@ extern void __init n_tty_init(void);
 /* tty_audit.c */
 #ifdef CONFIG_AUDIT
 extern void tty_audit_add_data(struct tty_struct *tty, const void *data,
-			       size_t size, unsigned icanon);
+			       size_t size);
 extern void tty_audit_exit(void);
 extern void tty_audit_fork(struct signal_struct *sig);
 extern void tty_audit_tiocsti(struct tty_struct *tty, char ch);
@@ -601,7 +601,7 @@ extern void tty_audit_push(struct tty_struct *tty);
 extern int tty_audit_push_current(void);
 #else
 static inline void tty_audit_add_data(struct tty_struct *tty, const void *data,
-				      size_t size, unsigned icanon)
+				      size_t size)
 {
 }
 static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch)
-- 
cgit v1.2.3


From b50819f437c094b4beb2e8684fbe12bbe79fb331 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 22:55:30 -0800
Subject: tty: audit: Ignore current association for audit push

In canonical read mode, each line read and logged is pushed separately
with tty_audit_push(). For all single-threaded processes and multi-threaded
processes reading from only one tty, this patch has no effect; the last line
read will still be the entry pushed to the audit log because the tty
association cannot have changed between tty_audit_add_data() and
tty_audit_push().

For multi-threaded processes reading from different ttys concurrently,
the audit log will have mixed log entries anyway. Consider two ttys
audited concurrently:

CPU0                           CPU1
----------                     ------------
tty_audit_add_data(ttyA)
                               tty_audit_add_data(ttyB)
tty_audit_push()
                               tty_audit_add_data(ttyB)
                               tty_audit_push()

This patch will now cause the ttyB output to be split into separate
audit log entries.

However, this possibility is equally likely without this patch:

CPU0                           CPU1
----------                     ------------
                               tty_audit_add_data(ttyB)
tty_audit_add_data(ttyA)
tty_audit_push()
                               tty_audit_add_data(ttyB)
                               tty_audit_push()

Mixed canonical and non-canonical reads have similar races.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c     |  2 +-
 drivers/tty/tty_audit.c | 11 +++--------
 include/linux/tty.h     |  4 ++--
 3 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 4fbc5defbcd8..827206914b02 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -2072,7 +2072,7 @@ static int canon_copy_from_read_buf(struct tty_struct *tty,
 			ldata->line_start = ldata->read_tail;
 		else
 			ldata->push = 0;
-		tty_audit_push(tty);
+		tty_audit_push();
 	}
 	return 0;
 }
diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c
index 5f65653cee48..5ae48396e265 100644
--- a/drivers/tty/tty_audit.c
+++ b/drivers/tty/tty_audit.c
@@ -313,9 +313,9 @@ void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size)
 /**
  *	tty_audit_push	-	Push buffered data out
  *
- *	Make sure no audit data is pending for @tty on the current process.
+ *	Make sure no audit data is pending on the current process.
  */
-void tty_audit_push(struct tty_struct *tty)
+void tty_audit_push(void)
 {
 	struct tty_audit_buf *buf;
 	unsigned long flags;
@@ -331,13 +331,8 @@ void tty_audit_push(struct tty_struct *tty)
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 	if (buf) {
-		int major, minor;
-
-		major = tty->driver->major;
-		minor = tty->driver->minor_start + tty->index;
 		mutex_lock(&buf->mutex);
-		if (buf->major == major && buf->minor == minor)
-			tty_audit_buf_push(buf);
+		tty_audit_buf_push(buf);
 		mutex_unlock(&buf->mutex);
 		tty_audit_buf_put(buf);
 	}
diff --git a/include/linux/tty.h b/include/linux/tty.h
index c011dc205e5c..83d74dcfb3c8 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -597,7 +597,7 @@ extern void tty_audit_add_data(struct tty_struct *tty, const void *data,
 extern void tty_audit_exit(void);
 extern void tty_audit_fork(struct signal_struct *sig);
 extern void tty_audit_tiocsti(struct tty_struct *tty, char ch);
-extern void tty_audit_push(struct tty_struct *tty);
+extern void tty_audit_push(void);
 extern int tty_audit_push_current(void);
 #else
 static inline void tty_audit_add_data(struct tty_struct *tty, const void *data,
@@ -613,7 +613,7 @@ static inline void tty_audit_exit(void)
 static inline void tty_audit_fork(struct signal_struct *sig)
 {
 }
-static inline void tty_audit_push(struct tty_struct *tty)
+static inline void tty_audit_push(void)
 {
 }
 static inline int tty_audit_push_current(void)
-- 
cgit v1.2.3


From 37282a77954aa2dbb339d15902290f25b868d2e8 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 22:55:31 -0800
Subject: tty: audit: Combine push functions

tty_audit_push() and tty_audit_push_current() perform identical
tasks; eliminate the tty_audit_push() implementation and the
tty_audit_push_current() name.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_audit.c | 35 +++--------------------------------
 include/linux/tty.h     |  8 ++------
 kernel/audit.c          |  2 +-
 3 files changed, 6 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c
index 5ae48396e265..6b82c3ce321f 100644
--- a/drivers/tty/tty_audit.c
+++ b/drivers/tty/tty_audit.c
@@ -172,12 +172,11 @@ void tty_audit_tiocsti(struct tty_struct *tty, char ch)
 }
 
 /**
- * tty_audit_push_current -	Flush current's pending audit data
+ *	tty_audit_push	-	Flush current's pending audit data
  *
- * Try to lock sighand and get a reference to the tty audit buffer if available.
- * Flush the buffer or return an appropriate error code.
+ *	Returns 0 if success, -EPERM if tty audit is disabled
  */
-int tty_audit_push_current(void)
+int tty_audit_push(void)
 {
 	struct tty_audit_buf *buf = ERR_PTR(-EPERM);
 	unsigned long flags;
@@ -309,31 +308,3 @@ void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size)
 	mutex_unlock(&buf->mutex);
 	tty_audit_buf_put(buf);
 }
-
-/**
- *	tty_audit_push	-	Push buffered data out
- *
- *	Make sure no audit data is pending on the current process.
- */
-void tty_audit_push(void)
-{
-	struct tty_audit_buf *buf;
-	unsigned long flags;
-
-	spin_lock_irqsave(&current->sighand->siglock, flags);
-	if (likely(!current->signal->audit_tty)) {
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
-		return;
-	}
-	buf = current->signal->tty_audit_buf;
-	if (buf)
-		atomic_inc(&buf->count);
-	spin_unlock_irqrestore(&current->sighand->siglock, flags);
-
-	if (buf) {
-		mutex_lock(&buf->mutex);
-		tty_audit_buf_push(buf);
-		mutex_unlock(&buf->mutex);
-		tty_audit_buf_put(buf);
-	}
-}
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 83d74dcfb3c8..dea7d54d00d4 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -597,8 +597,7 @@ extern void tty_audit_add_data(struct tty_struct *tty, const void *data,
 extern void tty_audit_exit(void);
 extern void tty_audit_fork(struct signal_struct *sig);
 extern void tty_audit_tiocsti(struct tty_struct *tty, char ch);
-extern void tty_audit_push(void);
-extern int tty_audit_push_current(void);
+extern int tty_audit_push(void);
 #else
 static inline void tty_audit_add_data(struct tty_struct *tty, const void *data,
 				      size_t size)
@@ -613,10 +612,7 @@ static inline void tty_audit_exit(void)
 static inline void tty_audit_fork(struct signal_struct *sig)
 {
 }
-static inline void tty_audit_push(void)
-{
-}
-static inline int tty_audit_push_current(void)
+static inline int tty_audit_push(void)
 {
 	return 0;
 }
diff --git a/kernel/audit.c b/kernel/audit.c
index 3a3e5deeda8d..610f221df069 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -920,7 +920,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (err == 1) { /* match or error */
 			err = 0;
 			if (msg_type == AUDIT_USER_TTY) {
-				err = tty_audit_push_current();
+				err = tty_audit_push();
 				if (err)
 					break;
 			}
-- 
cgit v1.2.3


From 2e28d38ae1d9ced6ac2deb4001aca3f267304cdb Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 22:55:33 -0800
Subject: tty: audit: Handle tty audit enable atomically

The audit_tty and audit_tty_log_passwd fields are actually bool
values, so merge into single memory location to access atomically.

NB: audit log operations may still occur after tty audit is disabled
which is consistent with the existing functionality

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_audit.c | 53 ++++++++++++++++++++-----------------------------
 include/linux/audit.h   |  4 ++++
 include/linux/sched.h   |  1 -
 kernel/audit.c          | 25 +++++++++++------------
 4 files changed, 38 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c
index 50380d87061d..3d90f88c5ff9 100644
--- a/drivers/tty/tty_audit.c
+++ b/drivers/tty/tty_audit.c
@@ -131,7 +131,6 @@ void tty_audit_exit(void)
 void tty_audit_fork(struct signal_struct *sig)
 {
 	sig->audit_tty = current->signal->audit_tty;
-	sig->audit_tty_log_passwd = current->signal->audit_tty_log_passwd;
 }
 
 /**
@@ -141,11 +140,9 @@ void tty_audit_tiocsti(struct tty_struct *tty, char ch)
 {
 	struct tty_audit_buf *buf;
 	dev_t dev;
-	int should_audit;
 	unsigned long flags;
 
 	spin_lock_irqsave(&current->sighand->siglock, flags);
-	should_audit = current->signal->audit_tty;
 	buf = current->signal->tty_audit_buf;
 	if (buf)
 		atomic_inc(&buf->count);
@@ -160,7 +157,7 @@ void tty_audit_tiocsti(struct tty_struct *tty, char ch)
 		tty_audit_buf_put(buf);
 	}
 
-	if (should_audit && audit_enabled) {
+	if (audit_enabled && (current->signal->audit_tty & AUDIT_TTY_ENABLE)) {
 		kuid_t auid;
 		unsigned int sessionid;
 
@@ -177,29 +174,25 @@ void tty_audit_tiocsti(struct tty_struct *tty, char ch)
  */
 int tty_audit_push(void)
 {
-	struct tty_audit_buf *buf = ERR_PTR(-EPERM);
+	struct tty_audit_buf *buf;
 	unsigned long flags;
 
+	if (~current->signal->audit_tty & AUDIT_TTY_ENABLE)
+		return -EPERM;
+
 	spin_lock_irqsave(&current->sighand->siglock, flags);
-	if (current->signal->audit_tty) {
-		buf = current->signal->tty_audit_buf;
-		if (buf)
-			atomic_inc(&buf->count);
-	}
+	buf = current->signal->tty_audit_buf;
+	if (buf)
+		atomic_inc(&buf->count);
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-	/*
-	 * Return 0 when signal->audit_tty set
-	 * but current->signal->tty_audit_buf == NULL.
-	 */
-	if (!buf || IS_ERR(buf))
-		return PTR_ERR(buf);
-
-	mutex_lock(&buf->mutex);
-	tty_audit_buf_push(buf);
-	mutex_unlock(&buf->mutex);
+	if (buf) {
+		mutex_lock(&buf->mutex);
+		tty_audit_buf_push(buf);
+		mutex_unlock(&buf->mutex);
 
-	tty_audit_buf_put(buf);
+		tty_audit_buf_put(buf);
+	}
 	return 0;
 }
 
@@ -218,8 +211,6 @@ static struct tty_audit_buf *tty_audit_buf_get(void)
 	buf = NULL;
 	buf2 = NULL;
 	spin_lock_irqsave(&current->sighand->siglock, flags);
-	if (likely(!current->signal->audit_tty))
-		goto out;
 	buf = current->signal->tty_audit_buf;
 	if (buf) {
 		atomic_inc(&buf->count);
@@ -233,9 +224,10 @@ static struct tty_audit_buf *tty_audit_buf_get(void)
 		return NULL;
 	}
 
-	spin_lock_irqsave(&current->sighand->siglock, flags);
-	if (!current->signal->audit_tty)
+	if (~current->signal->audit_tty & AUDIT_TTY_ENABLE)
 		goto out;
+
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	buf = current->signal->tty_audit_buf;
 	if (!buf) {
 		current->signal->tty_audit_buf = buf2;
@@ -259,9 +251,8 @@ static struct tty_audit_buf *tty_audit_buf_get(void)
 void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size)
 {
 	struct tty_audit_buf *buf;
-	int audit_log_tty_passwd;
-	unsigned long flags;
 	unsigned int icanon = !!L_ICANON(tty);
+	unsigned int audit_tty;
 	dev_t dev;
 
 	if (unlikely(size == 0))
@@ -271,10 +262,10 @@ void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size)
 	    && tty->driver->subtype == PTY_TYPE_MASTER)
 		return;
 
-	spin_lock_irqsave(&current->sighand->siglock, flags);
-	audit_log_tty_passwd = current->signal->audit_tty_log_passwd;
-	spin_unlock_irqrestore(&current->sighand->siglock, flags);
-	if (!audit_log_tty_passwd && icanon && !L_ECHO(tty))
+	audit_tty = READ_ONCE(current->signal->audit_tty);
+	if (~audit_tty & AUDIT_TTY_ENABLE)
+		return;
+	if ((~audit_tty & AUDIT_TTY_LOG_PASSWD) && icanon && !L_ECHO(tty))
 		return;
 
 	buf = tty_audit_buf_get();
diff --git a/include/linux/audit.h b/include/linux/audit.h
index b40ed5df5542..e38e3fc13ea8 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -109,6 +109,10 @@ extern int audit_classify_compat_syscall(int abi, unsigned syscall);
 /* maximized args number that audit_socketcall can process */
 #define AUDITSC_ARGS		6
 
+/* bit values for ->signal->audit_tty */
+#define AUDIT_TTY_ENABLE	BIT(0)
+#define AUDIT_TTY_LOG_PASSWD	BIT(1)
+
 struct filename;
 
 extern void audit_log_session_info(struct audit_buffer *ab);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a10494a94cc3..a389222e9703 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -775,7 +775,6 @@ struct signal_struct {
 #endif
 #ifdef CONFIG_AUDIT
 	unsigned audit_tty;
-	unsigned audit_tty_log_passwd;
 	struct tty_audit_buf *tty_audit_buf;
 #endif
 
diff --git a/kernel/audit.c b/kernel/audit.c
index 610f221df069..2651e423b2dc 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1030,20 +1030,19 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		break;
 	case AUDIT_TTY_GET: {
 		struct audit_tty_status s;
-		struct task_struct *tsk = current;
+		unsigned int t;
 
-		spin_lock(&tsk->sighand->siglock);
-		s.enabled = tsk->signal->audit_tty;
-		s.log_passwd = tsk->signal->audit_tty_log_passwd;
-		spin_unlock(&tsk->sighand->siglock);
+		t = READ_ONCE(current->signal->audit_tty);
+		s.enabled = t & AUDIT_TTY_ENABLE;
+		s.log_passwd = !!(t & AUDIT_TTY_LOG_PASSWD);
 
 		audit_send_reply(skb, seq, AUDIT_TTY_GET, 0, 0, &s, sizeof(s));
 		break;
 	}
 	case AUDIT_TTY_SET: {
 		struct audit_tty_status s, old;
-		struct task_struct *tsk = current;
 		struct audit_buffer	*ab;
+		unsigned int t;
 
 		memset(&s, 0, sizeof(s));
 		/* guard against past and future API changes */
@@ -1053,14 +1052,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		    (s.log_passwd != 0 && s.log_passwd != 1))
 			err = -EINVAL;
 
-		spin_lock(&tsk->sighand->siglock);
-		old.enabled = tsk->signal->audit_tty;
-		old.log_passwd = tsk->signal->audit_tty_log_passwd;
-		if (!err) {
-			tsk->signal->audit_tty = s.enabled;
-			tsk->signal->audit_tty_log_passwd = s.log_passwd;
+		if (err)
+			t = READ_ONCE(current->signal->audit_tty);
+		else {
+			t = s.enabled | (-s.log_passwd & AUDIT_TTY_LOG_PASSWD);
+			t = xchg(&current->signal->audit_tty, t);
 		}
-		spin_unlock(&tsk->sighand->siglock);
+		old.enabled = t & AUDIT_TTY_ENABLE;
+		old.log_passwd = !!(t & AUDIT_TTY_LOG_PASSWD);
 
 		audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
 		audit_log_format(ab, " op=tty_set old-enabled=%d new-enabled=%d"
-- 
cgit v1.2.3


From bee6741ca022f051ea1b46e16fb2ff0097643181 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 21:45:09 -0800
Subject: tty, n_tty: Remove fasync() ldisc notification

Only the N_TTY line discipline implements the signal-driven i/o
notification enabled/disabled by fcntl(F_SETFL, O_ASYNC). The ldisc
fasync() notification is sent to the ldisc when the enable state has
changed (the tty core is notified via the fasync() VFS file operation).

The N_TTY line discipline used the enable state to change the wakeup
condition (minimum_to_wake = 1) for notifying the signal handler i/o is
available. However, just the presence of data is sufficient and necessary
to signal i/o is available, so changing minimum_to_wake is unnecessary
(and creates a race condition with read() and poll() which may be
concurrently updating minimum_to_wake).

Furthermore, since the kill_fasync() VFS helper performs no action if
the fasync list is empty, calling unconditionally is preferred; if
signal driven i/o just has been disabled, no signal will be sent by
kill_fasync() anyway so notification of the change via the ldisc
fasync() method is superfluous.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c       | 5 -----
 drivers/tty/tty_io.c      | 8 --------
 include/linux/tty_ldisc.h | 6 ------
 3 files changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index d04f398d5de3..a93e4c7e82ae 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -2448,10 +2448,6 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
 	}
 }
 
-static void n_tty_fasync(struct tty_struct *tty, int on)
-{
-}
-
 static struct tty_ldisc_ops n_tty_ops = {
 	.magic           = TTY_LDISC_MAGIC,
 	.name            = "n_tty",
@@ -2465,7 +2461,6 @@ static struct tty_ldisc_ops n_tty_ops = {
 	.poll            = n_tty_poll,
 	.receive_buf     = n_tty_receive_buf,
 	.write_wakeup    = n_tty_write_wakeup,
-	.fasync		 = n_tty_fasync,
 	.receive_buf2	 = n_tty_receive_buf2,
 };
 
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index c5b4274584dc..7533ab1abe72 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2219,7 +2219,6 @@ static unsigned int tty_poll(struct file *filp, poll_table *wait)
 static int __tty_fasync(int fd, struct file *filp, int on)
 {
 	struct tty_struct *tty = file_tty(filp);
-	struct tty_ldisc *ldisc;
 	unsigned long flags;
 	int retval = 0;
 
@@ -2230,13 +2229,6 @@ static int __tty_fasync(int fd, struct file *filp, int on)
 	if (retval <= 0)
 		goto out;
 
-	ldisc = tty_ldisc_ref(tty);
-	if (ldisc) {
-		if (ldisc->ops->fasync)
-			ldisc->ops->fasync(tty, on);
-		tty_ldisc_deref(ldisc);
-	}
-
 	if (on) {
 		enum pid_type type;
 		struct pid *pid;
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index 6101ab8dc148..3971cf0eb467 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -98,11 +98,6 @@
  *	seek to perform this action quickly but should wait until
  *	any pending driver I/O is completed.
  *
- * void (*fasync)(struct tty_struct *, int on)
- *
- *	Notify line discipline when signal-driven I/O is enabled or
- *	disabled.
- *
  * void (*dcd_change)(struct tty_struct *tty, unsigned int status)
  *
  *	Tells the discipline that the DCD pin has changed its status.
@@ -202,7 +197,6 @@ struct tty_ldisc_ops {
 			       char *fp, int count);
 	void	(*write_wakeup)(struct tty_struct *);
 	void	(*dcd_change)(struct tty_struct *, unsigned int);
-	void	(*fasync)(struct tty_struct *tty, int on);
 	int	(*receive_buf2)(struct tty_struct *, const unsigned char *cp,
 				char *fp, int count);
 
-- 
cgit v1.2.3


From f33798deecbd59a2955f40ac0ae2bc7dff54c069 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 10 Jan 2016 20:36:12 -0800
Subject: tty: Fix GPF in flush_to_ldisc(), part 2

commit 9ce119f318ba ("tty: Fix GPF in flush_to_ldisc()") fixed a
GPF caused by a line discipline which does not define a receive_buf()
method.

However, the vt driver (and speakup driver also) pushes selection
data directly to the line discipline receive_buf() method via
tty_ldisc_receive_buf(). Fix the same problem in tty_ldisc_receive_buf().

Cc: <stable@vger.kernel.org>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index dea7d54d00d4..03e4015fa033 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -579,7 +579,7 @@ static inline int tty_ldisc_receive_buf(struct tty_ldisc *ld, unsigned char *p,
 		count = ld->ops->receive_buf2(ld->tty, p, f, count);
 	else {
 		count = min_t(int, count, ld->tty->receive_room);
-		if (count)
+		if (count && ld->ops->receive_buf)
 			ld->ops->receive_buf(ld->tty, p, f, count);
 	}
 	return count;
-- 
cgit v1.2.3


From 8d082cd300ab422b7ee9f4629a1c470e4f0d90d5 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 10 Jan 2016 20:36:13 -0800
Subject: tty: Unify receive_buf() code paths

Instead of two distinct code branches for receive_buf() handling,
use tty_ldisc_receive_buf() as the single code path.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_buffer.c | 39 ++++++++++++++++++++++++++++-----------
 include/linux/tty.h      | 16 ++--------------
 2 files changed, 30 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 3cd31e0d4bd9..a946e49a2626 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -435,25 +435,42 @@ int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars,
 }
 EXPORT_SYMBOL_GPL(tty_prepare_flip_string);
 
+/**
+ *	tty_ldisc_receive_buf		-	forward data to line discipline
+ *	@ld:	line discipline to process input
+ *	@p:	char buffer
+ *	@f:	TTY_* flags buffer
+ *	@count:	number of bytes to process
+ *
+ *	Callers other than flush_to_ldisc() need to exclude the kworker
+ *	from concurrent use of the line discipline, see paste_selection().
+ *
+ *	Returns the number of bytes not processed
+ */
+int tty_ldisc_receive_buf(struct tty_ldisc *ld, unsigned char *p,
+			  char *f, int count)
+{
+	if (ld->ops->receive_buf2)
+		count = ld->ops->receive_buf2(ld->tty, p, f, count);
+	else {
+		count = min_t(int, count, ld->tty->receive_room);
+		if (count && ld->ops->receive_buf)
+			ld->ops->receive_buf(ld->tty, p, f, count);
+	}
+	return count;
+}
+EXPORT_SYMBOL_GPL(tty_ldisc_receive_buf);
 
 static int
-receive_buf(struct tty_struct *tty, struct tty_buffer *head, int count)
+receive_buf(struct tty_ldisc *ld, struct tty_buffer *head, int count)
 {
-	struct tty_ldisc *disc = tty->ldisc;
 	unsigned char *p = char_buf_ptr(head, head->read);
 	char	      *f = NULL;
 
 	if (~head->flags & TTYB_NORMAL)
 		f = flag_buf_ptr(head, head->read);
 
-	if (disc->ops->receive_buf2)
-		count = disc->ops->receive_buf2(tty, p, f, count);
-	else {
-		count = min_t(int, count, tty->receive_room);
-		if (count && disc->ops->receive_buf)
-			disc->ops->receive_buf(tty, p, f, count);
-	}
-	return count;
+	return tty_ldisc_receive_buf(ld, p, f, count);
 }
 
 /**
@@ -514,7 +531,7 @@ static void flush_to_ldisc(struct work_struct *work)
 			continue;
 		}
 
-		count = receive_buf(tty, head, count);
+		count = receive_buf(disc, head, count);
 		if (!count)
 			break;
 		head->read += count;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 03e4015fa033..3b09f235db66 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -571,20 +571,8 @@ extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty);
 extern void tty_ldisc_release(struct tty_struct *tty);
 extern void tty_ldisc_init(struct tty_struct *tty);
 extern void tty_ldisc_deinit(struct tty_struct *tty);
-
-static inline int tty_ldisc_receive_buf(struct tty_ldisc *ld, unsigned char *p,
-					char *f, int count)
-{
-	if (ld->ops->receive_buf2)
-		count = ld->ops->receive_buf2(ld->tty, p, f, count);
-	else {
-		count = min_t(int, count, ld->tty->receive_room);
-		if (count && ld->ops->receive_buf)
-			ld->ops->receive_buf(ld->tty, p, f, count);
-	}
-	return count;
-}
-
+extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, unsigned char *p,
+				 char *f, int count);
 
 /* n_tty.c */
 extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops);
-- 
cgit v1.2.3


From a657eecd022d82c5b1795243f54389112f42c7f9 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 10 Jan 2016 14:51:39 -0800
Subject: isdn: Remove ASYNC_CLOSING

The tty core no longer provides ASYNC_CLOSING. Use private flag for
same purpose, which is to disable AT-emulator output (why this is
necessary is not clear).

Cc: Karsten Keil <isdn@linux-pingi.de>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/isdn/i4l/isdn_tty.c | 12 ++++++------
 include/linux/isdn.h        |  1 +
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c
index 2175225af742..947d5c978b8f 100644
--- a/drivers/isdn/i4l/isdn_tty.c
+++ b/drivers/isdn/i4l/isdn_tty.c
@@ -1572,7 +1572,7 @@ isdn_tty_close(struct tty_struct *tty, struct file *filp)
 #endif
 		return;
 	}
-	port->flags |= ASYNC_CLOSING;
+	info->closing = 1;
 
 	tty->closing = 1;
 	/*
@@ -1603,6 +1603,7 @@ isdn_tty_close(struct tty_struct *tty, struct file *filp)
 	info->ncarrier = 0;
 
 	tty_port_close_end(port, tty);
+	info->closing = 0;
 #ifdef ISDN_DEBUG_MODEM_OPEN
 	printk(KERN_DEBUG "isdn_tty_close normal exit\n");
 #endif
@@ -2236,7 +2237,7 @@ isdn_tty_at_cout(char *msg, modem_info *info)
 	l = strlen(msg);
 
 	spin_lock_irqsave(&info->readlock, flags);
-	if (port->flags & ASYNC_CLOSING) {
+	if (info->closing) {
 		spin_unlock_irqrestore(&info->readlock, flags);
 		return;
 	}
@@ -2386,13 +2387,12 @@ isdn_tty_modem_result(int code, modem_info *info)
 	case RESULT_NO_CARRIER:
 #ifdef ISDN_DEBUG_MODEM_HUP
 		printk(KERN_DEBUG "modem_result: NO CARRIER %d %d\n",
-		       (info->port.flags & ASYNC_CLOSING),
-		       (!info->port.tty));
+		       info->closing, !info->port.tty);
 #endif
 		m->mdmreg[REG_RINGCNT] = 0;
 		del_timer(&info->nc_timer);
 		info->ncarrier = 0;
-		if ((info->port.flags & ASYNC_CLOSING) || (!info->port.tty))
+		if (info->closing || !info->port.tty)
 			return;
 
 #ifdef CONFIG_ISDN_AUDIO
@@ -2525,7 +2525,7 @@ isdn_tty_modem_result(int code, modem_info *info)
 		}
 	}
 	if (code == RESULT_NO_CARRIER) {
-		if ((info->port.flags & ASYNC_CLOSING) || (!info->port.tty))
+		if (info->closing || (!info->port.tty))
 			return;
 
 		if (info->port.flags & ASYNC_CHECK_CD)
diff --git a/include/linux/isdn.h b/include/linux/isdn.h
index 1e9a0f2a8626..df97c8444f5d 100644
--- a/include/linux/isdn.h
+++ b/include/linux/isdn.h
@@ -319,6 +319,7 @@ typedef struct modem_info {
   int                   online;          /* 1 = B-Channel is up, drop data */
 					 /* 2 = B-Channel is up, deliver d.*/
   int                   dialing;         /* Dial in progress or ATA        */
+  int                   closing;
   int                   rcvsched;        /* Receive needs schedule         */
   int                   isdn_driver;	 /* Index to isdn-driver           */
   int                   isdn_channel;    /* Index to isdn-channel          */
-- 
cgit v1.2.3


From 50359819794b4a16ae35051cd80f2dab025f6019 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Thu, 21 Jan 2016 21:53:09 +0100
Subject: clk-divider: make sure read-only dividers do not write to their
 register

Commit e6d5e7d90be9 ("clk-divider: Fix READ_ONLY when divider > 1") removed
the special ops struct for read-only clocks and instead opted to handle
them inside the regular ops.

On the rk3368 this results in breakage as aclkm now gets set a value.
While it is the same divider value, the A53 core still doesn't like it,
which can result in the cpu ending up in a hang.
The reason being that "ACLKENMasserts one clock cycle before the rising
edge of ACLKM" and the clock should only be touched when STANDBYWFIL2
is asserted.

To fix this, reintroduce the read-only ops but do include the round_rate
callback. That way no writes that may be unsafe are done to the divider
register in any case.

The Rockchip use of the clk_divider_ops is adapted to this split again,
as is the nxp, lpc18xx-ccu driver that was included since the original
commit. On lpc18xx-ccu the divider seems to always be read-only
so only uses the new ops now.

Fixes: e6d5e7d90be9 ("clk-divider: Fix READ_ONLY when divider > 1")
Reported-by: Zhang Qing <zhangqing@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-divider.c         | 11 ++++++++++-
 drivers/clk/nxp/clk-lpc18xx-ccu.c |  2 +-
 drivers/clk/rockchip/clk.c        |  4 +++-
 include/linux/clk-provider.h      |  1 +
 4 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index ded3ff4b91b9..aa1dacdaa39d 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -423,6 +423,12 @@ const struct clk_ops clk_divider_ops = {
 };
 EXPORT_SYMBOL_GPL(clk_divider_ops);
 
+const struct clk_ops clk_divider_ro_ops = {
+	.recalc_rate = clk_divider_recalc_rate,
+	.round_rate = clk_divider_round_rate,
+};
+EXPORT_SYMBOL_GPL(clk_divider_ro_ops);
+
 static struct clk *_register_divider(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 shift, u8 width,
@@ -446,7 +452,10 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 		return ERR_PTR(-ENOMEM);
 
 	init.name = name;
-	init.ops = &clk_divider_ops;
+	if (clk_divider_flags & CLK_DIVIDER_READ_ONLY)
+		init.ops = &clk_divider_ro_ops;
+	else
+		init.ops = &clk_divider_ops;
 	init.flags = flags | CLK_IS_BASIC;
 	init.parent_names = (parent_name ? &parent_name: NULL);
 	init.num_parents = (parent_name ? 1 : 0);
diff --git a/drivers/clk/nxp/clk-lpc18xx-ccu.c b/drivers/clk/nxp/clk-lpc18xx-ccu.c
index 13aabbb3acbe..558da89555af 100644
--- a/drivers/clk/nxp/clk-lpc18xx-ccu.c
+++ b/drivers/clk/nxp/clk-lpc18xx-ccu.c
@@ -222,7 +222,7 @@ static void lpc18xx_ccu_register_branch_gate_div(struct lpc18xx_clk_branch *bran
 		div->width = 1;
 
 		div_hw = &div->hw;
-		div_ops = &clk_divider_ops;
+		div_ops = &clk_divider_ro_ops;
 	}
 
 	branch->gate.reg = branch->offset + reg_base;
diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index d9a0b5d4d47f..f7e8693ad28b 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -90,7 +90,9 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 		div->width = div_width;
 		div->lock = lock;
 		div->table = div_table;
-		div_ops = &clk_divider_ops;
+		div_ops = (div_flags & CLK_DIVIDER_READ_ONLY)
+						? &clk_divider_ro_ops
+						: &clk_divider_ops;
 	}
 
 	clk = clk_register_composite(NULL, name, parent_names, num_parents,
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 1143e38555a4..408a60dca353 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -385,6 +385,7 @@ struct clk_divider {
 #define CLK_DIVIDER_MAX_AT_ZERO		BIT(6)
 
 extern const struct clk_ops clk_divider_ops;
+extern const struct clk_ops clk_divider_ro_ops;
 
 unsigned long divider_recalc_rate(struct clk_hw *hw, unsigned long parent_rate,
 		unsigned int val, const struct clk_div_table *table,
-- 
cgit v1.2.3


From 5fd9c05c846db98319e75496612da24435cee208 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@163.com>
Date: Fri, 8 Jan 2016 23:51:46 +0800
Subject: clk: move the common clock's to_clk_*(_hw) macros to clk-provider.h

to_clk_*(_hw) macros have been repeatedly defined in many places.
This patch moves all the to_clk_*(_hw) definitions in the common
clock framework to public header clk-provider.h, and drop the local
definitions.

Signed-off-by: Geliang Tang <geliangtang@163.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-composite.c          |  2 --
 drivers/clk/clk-divider.c            |  2 --
 drivers/clk/clk-fixed-factor.c       |  2 --
 drivers/clk/clk-fixed-rate.c         |  2 --
 drivers/clk/clk-fractional-divider.c |  2 --
 drivers/clk/clk-gate.c               |  2 --
 drivers/clk/clk-gpio.c               |  2 --
 drivers/clk/clk-multiplier.c         |  2 --
 drivers/clk/clk-mux.c                |  2 --
 drivers/clk/imx/clk-busy.c           |  4 ++--
 drivers/clk/imx/clk-fixup-div.c      |  5 ++---
 drivers/clk/imx/clk-fixup-mux.c      |  2 --
 drivers/clk/imx/clk-gate-exclusive.c |  2 +-
 drivers/clk/mediatek/clk-gate.c      |  8 ++++----
 drivers/clk/mediatek/clk-gate.h      |  2 +-
 drivers/clk/mvebu/common.c           |  2 --
 drivers/clk/mvebu/kirkwood.c         |  2 --
 drivers/clk/mxs/clk-div.c            |  2 +-
 drivers/clk/nxp/clk-lpc18xx-ccu.c    |  2 --
 drivers/clk/st/clkgen-mux.c          |  9 ++++-----
 drivers/clk/ti/composite.c           |  2 --
 drivers/clk/ti/divider.c             |  2 --
 drivers/clk/ti/gate.c                |  2 --
 drivers/clk/ti/mux.c                 |  2 --
 include/linux/clk-provider.h         | 18 ++++++++++++++++++
 25 files changed, 33 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c
index 4735de0660cc..1f903e1f86a2 100644
--- a/drivers/clk/clk-composite.c
+++ b/drivers/clk/clk-composite.c
@@ -19,8 +19,6 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 
-#define to_clk_composite(_hw) container_of(_hw, struct clk_composite, hw)
-
 static u8 clk_composite_get_parent(struct clk_hw *hw)
 {
 	struct clk_composite *composite = to_clk_composite(hw);
diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index aa1dacdaa39d..7d62dc30e969 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -28,8 +28,6 @@
  * parent - fixed parent.  No clk_set_parent support
  */
 
-#define to_clk_divider(_hw) container_of(_hw, struct clk_divider, hw)
-
 #define div_mask(width)	((1 << (width)) - 1)
 
 static unsigned int _get_table_maxdiv(const struct clk_div_table *table,
diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c
index 83de57aeceea..f0ddf37d5e15 100644
--- a/drivers/clk/clk-fixed-factor.c
+++ b/drivers/clk/clk-fixed-factor.c
@@ -23,8 +23,6 @@
  * parent - fixed parent.  No clk_set_parent support
  */
 
-#define to_clk_fixed_factor(_hw) container_of(_hw, struct clk_fixed_factor, hw)
-
 static unsigned long clk_factor_recalc_rate(struct clk_hw *hw,
 		unsigned long parent_rate)
 {
diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c
index f85ec8d1711f..e156beb871f0 100644
--- a/drivers/clk/clk-fixed-rate.c
+++ b/drivers/clk/clk-fixed-rate.c
@@ -26,8 +26,6 @@
  * parent - fixed parent.  No clk_set_parent support
  */
 
-#define to_clk_fixed_rate(_hw) container_of(_hw, struct clk_fixed_rate, hw)
-
 static unsigned long clk_fixed_rate_recalc_rate(struct clk_hw *hw,
 		unsigned long parent_rate)
 {
diff --git a/drivers/clk/clk-fractional-divider.c b/drivers/clk/clk-fractional-divider.c
index 5c4955e33f7a..1abcd76b4993 100644
--- a/drivers/clk/clk-fractional-divider.c
+++ b/drivers/clk/clk-fractional-divider.c
@@ -16,8 +16,6 @@
 #include <linux/slab.h>
 #include <linux/rational.h>
 
-#define to_clk_fd(_hw) container_of(_hw, struct clk_fractional_divider, hw)
-
 static unsigned long clk_fd_recalc_rate(struct clk_hw *hw,
 					unsigned long parent_rate)
 {
diff --git a/drivers/clk/clk-gate.c b/drivers/clk/clk-gate.c
index de0b322f5f58..d0d8ec8e1f1b 100644
--- a/drivers/clk/clk-gate.c
+++ b/drivers/clk/clk-gate.c
@@ -26,8 +26,6 @@
  * parent - fixed parent.  No clk_set_parent support
  */
 
-#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw)
-
 /*
  * It works on following logic:
  *
diff --git a/drivers/clk/clk-gpio.c b/drivers/clk/clk-gpio.c
index 19fed65587e8..cbbea2985cc9 100644
--- a/drivers/clk/clk-gpio.c
+++ b/drivers/clk/clk-gpio.c
@@ -31,8 +31,6 @@
  * parent - fixed parent.  No clk_set_parent support
  */
 
-#define to_clk_gpio(_hw) container_of(_hw, struct clk_gpio, hw)
-
 static int clk_gpio_gate_enable(struct clk_hw *hw)
 {
 	struct clk_gpio *clk = to_clk_gpio(hw);
diff --git a/drivers/clk/clk-multiplier.c b/drivers/clk/clk-multiplier.c
index fe7806506bf3..9e449c7b751c 100644
--- a/drivers/clk/clk-multiplier.c
+++ b/drivers/clk/clk-multiplier.c
@@ -14,8 +14,6 @@
 #include <linux/of.h>
 #include <linux/slab.h>
 
-#define to_clk_multiplier(_hw) container_of(_hw, struct clk_multiplier, hw)
-
 static unsigned long __get_mult(struct clk_multiplier *mult,
 				unsigned long rate,
 				unsigned long parent_rate)
diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c
index 5ed03c8a8df9..252188fd8bcd 100644
--- a/drivers/clk/clk-mux.c
+++ b/drivers/clk/clk-mux.c
@@ -26,8 +26,6 @@
  * parent - parent is adjustable through clk_set_parent
  */
 
-#define to_clk_mux(_hw) container_of(_hw, struct clk_mux, hw)
-
 static u8 clk_mux_get_parent(struct clk_hw *hw)
 {
 	struct clk_mux *mux = to_clk_mux(hw);
diff --git a/drivers/clk/imx/clk-busy.c b/drivers/clk/imx/clk-busy.c
index 4bb1bc419b79..5cc99590f9a3 100644
--- a/drivers/clk/imx/clk-busy.c
+++ b/drivers/clk/imx/clk-busy.c
@@ -38,7 +38,7 @@ struct clk_busy_divider {
 
 static inline struct clk_busy_divider *to_clk_busy_divider(struct clk_hw *hw)
 {
-	struct clk_divider *div = container_of(hw, struct clk_divider, hw);
+	struct clk_divider *div = to_clk_divider(hw);
 
 	return container_of(div, struct clk_busy_divider, div);
 }
@@ -123,7 +123,7 @@ struct clk_busy_mux {
 
 static inline struct clk_busy_mux *to_clk_busy_mux(struct clk_hw *hw)
 {
-	struct clk_mux *mux = container_of(hw, struct clk_mux, hw);
+	struct clk_mux *mux = to_clk_mux(hw);
 
 	return container_of(mux, struct clk_busy_mux, mux);
 }
diff --git a/drivers/clk/imx/clk-fixup-div.c b/drivers/clk/imx/clk-fixup-div.c
index 21db020b1f2d..ce5722732715 100644
--- a/drivers/clk/imx/clk-fixup-div.c
+++ b/drivers/clk/imx/clk-fixup-div.c
@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 #include "clk.h"
 
-#define to_clk_div(_hw) container_of(_hw, struct clk_divider, hw)
 #define div_mask(d)	((1 << (d->width)) - 1)
 
 /**
@@ -35,7 +34,7 @@ struct clk_fixup_div {
 
 static inline struct clk_fixup_div *to_clk_fixup_div(struct clk_hw *hw)
 {
-	struct clk_divider *divider = to_clk_div(hw);
+	struct clk_divider *divider = to_clk_divider(hw);
 
 	return container_of(divider, struct clk_fixup_div, divider);
 }
@@ -60,7 +59,7 @@ static int clk_fixup_div_set_rate(struct clk_hw *hw, unsigned long rate,
 			    unsigned long parent_rate)
 {
 	struct clk_fixup_div *fixup_div = to_clk_fixup_div(hw);
-	struct clk_divider *div = to_clk_div(hw);
+	struct clk_divider *div = to_clk_divider(hw);
 	unsigned int divider, value;
 	unsigned long flags = 0;
 	u32 val;
diff --git a/drivers/clk/imx/clk-fixup-mux.c b/drivers/clk/imx/clk-fixup-mux.c
index 0d40b35c557c..c9b327e0a8dd 100644
--- a/drivers/clk/imx/clk-fixup-mux.c
+++ b/drivers/clk/imx/clk-fixup-mux.c
@@ -15,8 +15,6 @@
 #include <linux/slab.h>
 #include "clk.h"
 
-#define to_clk_mux(_hw) container_of(_hw, struct clk_mux, hw)
-
 /**
  * struct clk_fixup_mux - imx integer fixup multiplexer clock
  * @mux: the parent class
diff --git a/drivers/clk/imx/clk-gate-exclusive.c b/drivers/clk/imx/clk-gate-exclusive.c
index c12f5f2e04dc..3bd9dee618b2 100644
--- a/drivers/clk/imx/clk-gate-exclusive.c
+++ b/drivers/clk/imx/clk-gate-exclusive.c
@@ -31,7 +31,7 @@ struct clk_gate_exclusive {
 
 static int clk_gate_exclusive_enable(struct clk_hw *hw)
 {
-	struct clk_gate *gate = container_of(hw, struct clk_gate, hw);
+	struct clk_gate *gate = to_clk_gate(hw);
 	struct clk_gate_exclusive *exgate = container_of(gate,
 					struct clk_gate_exclusive, gate);
 	u32 val = readl(gate->reg);
diff --git a/drivers/clk/mediatek/clk-gate.c b/drivers/clk/mediatek/clk-gate.c
index 576bdb7c98b8..2a76901bf04b 100644
--- a/drivers/clk/mediatek/clk-gate.c
+++ b/drivers/clk/mediatek/clk-gate.c
@@ -25,7 +25,7 @@
 
 static int mtk_cg_bit_is_cleared(struct clk_hw *hw)
 {
-	struct mtk_clk_gate *cg = to_clk_gate(hw);
+	struct mtk_clk_gate *cg = to_mtk_clk_gate(hw);
 	u32 val;
 
 	regmap_read(cg->regmap, cg->sta_ofs, &val);
@@ -37,7 +37,7 @@ static int mtk_cg_bit_is_cleared(struct clk_hw *hw)
 
 static int mtk_cg_bit_is_set(struct clk_hw *hw)
 {
-	struct mtk_clk_gate *cg = to_clk_gate(hw);
+	struct mtk_clk_gate *cg = to_mtk_clk_gate(hw);
 	u32 val;
 
 	regmap_read(cg->regmap, cg->sta_ofs, &val);
@@ -49,14 +49,14 @@ static int mtk_cg_bit_is_set(struct clk_hw *hw)
 
 static void mtk_cg_set_bit(struct clk_hw *hw)
 {
-	struct mtk_clk_gate *cg = to_clk_gate(hw);
+	struct mtk_clk_gate *cg = to_mtk_clk_gate(hw);
 
 	regmap_write(cg->regmap, cg->set_ofs, BIT(cg->bit));
 }
 
 static void mtk_cg_clr_bit(struct clk_hw *hw)
 {
-	struct mtk_clk_gate *cg = to_clk_gate(hw);
+	struct mtk_clk_gate *cg = to_mtk_clk_gate(hw);
 
 	regmap_write(cg->regmap, cg->clr_ofs, BIT(cg->bit));
 }
diff --git a/drivers/clk/mediatek/clk-gate.h b/drivers/clk/mediatek/clk-gate.h
index 11e25c992948..b1821603b887 100644
--- a/drivers/clk/mediatek/clk-gate.h
+++ b/drivers/clk/mediatek/clk-gate.h
@@ -29,7 +29,7 @@ struct mtk_clk_gate {
 	u8		bit;
 };
 
-static inline struct mtk_clk_gate *to_clk_gate(struct clk_hw *hw)
+static inline struct mtk_clk_gate *to_mtk_clk_gate(struct clk_hw *hw)
 {
 	return container_of(hw, struct mtk_clk_gate, hw);
 }
diff --git a/drivers/clk/mvebu/common.c b/drivers/clk/mvebu/common.c
index 28aac67e7b92..daa6ebdac131 100644
--- a/drivers/clk/mvebu/common.c
+++ b/drivers/clk/mvebu/common.c
@@ -199,8 +199,6 @@ struct clk_gating_ctrl {
 	u32 saved_reg;
 };
 
-#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw)
-
 static struct clk_gating_ctrl *ctrl;
 
 static struct clk *clk_gating_get_src(
diff --git a/drivers/clk/mvebu/kirkwood.c b/drivers/clk/mvebu/kirkwood.c
index 99550f25975e..a2a8d614039d 100644
--- a/drivers/clk/mvebu/kirkwood.c
+++ b/drivers/clk/mvebu/kirkwood.c
@@ -256,8 +256,6 @@ static const struct clk_muxing_soc_desc kirkwood_mux_desc[] __initconst = {
 		11, 1, 0 },
 };
 
-#define to_clk_mux(_hw) container_of(_hw, struct clk_mux, hw)
-
 static struct clk *clk_muxing_get_src(
 	struct of_phandle_args *clkspec, void *data)
 {
diff --git a/drivers/clk/mxs/clk-div.c b/drivers/clk/mxs/clk-div.c
index 049ee27d5a22..f75e989c578f 100644
--- a/drivers/clk/mxs/clk-div.c
+++ b/drivers/clk/mxs/clk-div.c
@@ -33,7 +33,7 @@ struct clk_div {
 
 static inline struct clk_div *to_clk_div(struct clk_hw *hw)
 {
-	struct clk_divider *divider = container_of(hw, struct clk_divider, hw);
+	struct clk_divider *divider = to_clk_divider(hw);
 
 	return container_of(divider, struct clk_div, divider);
 }
diff --git a/drivers/clk/nxp/clk-lpc18xx-ccu.c b/drivers/clk/nxp/clk-lpc18xx-ccu.c
index 558da89555af..f7136b94fd0e 100644
--- a/drivers/clk/nxp/clk-lpc18xx-ccu.c
+++ b/drivers/clk/nxp/clk-lpc18xx-ccu.c
@@ -28,8 +28,6 @@
 #define CCU_BRANCH_IS_BUS	BIT(0)
 #define CCU_BRANCH_HAVE_DIV2	BIT(1)
 
-#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw)
-
 struct lpc18xx_branch_clk_data {
 	const char **name;
 	int num;
diff --git a/drivers/clk/st/clkgen-mux.c b/drivers/clk/st/clkgen-mux.c
index 5dc5ce217960..0d9a74b66ea3 100644
--- a/drivers/clk/st/clkgen-mux.c
+++ b/drivers/clk/st/clkgen-mux.c
@@ -822,11 +822,10 @@ err:
 		if (!clk_data->clks[i])
 			continue;
 
-		composite = container_of(__clk_get_hw(clk_data->clks[i]),
-					 struct clk_composite, hw);
-		kfree(container_of(composite->gate_hw, struct clk_gate, hw));
-		kfree(container_of(composite->rate_hw, struct clk_divider, hw));
-		kfree(container_of(composite->mux_hw, struct clk_mux, hw));
+		composite = to_clk_composite(__clk_get_hw(clk_data->clks[i]));
+		kfree(to_clk_gate(composite->gate_hw));
+		kfree(to_clk_divider(composite->rate_hw));
+		kfree(to_clk_mux(composite->mux_hw));
 	}
 
 	kfree(clk_data->clks);
diff --git a/drivers/clk/ti/composite.c b/drivers/clk/ti/composite.c
index dbef218fe5ec..43345c417815 100644
--- a/drivers/clk/ti/composite.c
+++ b/drivers/clk/ti/composite.c
@@ -28,8 +28,6 @@
 #undef pr_fmt
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
-#define to_clk_divider(_hw) container_of(_hw, struct clk_divider, hw)
-
 static unsigned long ti_composite_recalc_rate(struct clk_hw *hw,
 					      unsigned long parent_rate)
 {
diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c
index df2558350fc1..b4e5de16e561 100644
--- a/drivers/clk/ti/divider.c
+++ b/drivers/clk/ti/divider.c
@@ -26,8 +26,6 @@
 #undef pr_fmt
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
-#define to_clk_divider(_hw) container_of(_hw, struct clk_divider, hw)
-
 #define div_mask(d)	((1 << ((d)->width)) - 1)
 
 static unsigned int _get_table_maxdiv(const struct clk_div_table *table)
diff --git a/drivers/clk/ti/gate.c b/drivers/clk/ti/gate.c
index 5429d3534363..bc05f276f32b 100644
--- a/drivers/clk/ti/gate.c
+++ b/drivers/clk/ti/gate.c
@@ -24,8 +24,6 @@
 
 #include "clock.h"
 
-#define to_clk_divider(_hw) container_of(_hw, struct clk_divider, hw)
-
 #undef pr_fmt
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
diff --git a/drivers/clk/ti/mux.c b/drivers/clk/ti/mux.c
index dab9ba88b9d6..618ded96ace3 100644
--- a/drivers/clk/ti/mux.c
+++ b/drivers/clk/ti/mux.c
@@ -26,8 +26,6 @@
 #undef pr_fmt
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
-#define to_clk_mux(_hw) container_of(_hw, struct clk_mux, hw)
-
 static u8 ti_clk_mux_get_parent(struct clk_hw *hw)
 {
 	struct clk_mux *mux = to_clk_mux(hw);
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 408a60dca353..33dc814d0f43 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -276,6 +276,8 @@ struct clk_fixed_rate {
 	u8		flags;
 };
 
+#define to_clk_fixed_rate(_hw) container_of(_hw, struct clk_fixed_rate, hw)
+
 extern const struct clk_ops clk_fixed_rate_ops;
 struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
@@ -314,6 +316,8 @@ struct clk_gate {
 	spinlock_t	*lock;
 };
 
+#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw)
+
 #define CLK_GATE_SET_TO_DISABLE		BIT(0)
 #define CLK_GATE_HIWORD_MASK		BIT(1)
 
@@ -376,6 +380,8 @@ struct clk_divider {
 	spinlock_t	*lock;
 };
 
+#define to_clk_divider(_hw) container_of(_hw, struct clk_divider, hw)
+
 #define CLK_DIVIDER_ONE_BASED		BIT(0)
 #define CLK_DIVIDER_POWER_OF_TWO	BIT(1)
 #define CLK_DIVIDER_ALLOW_ZERO		BIT(2)
@@ -441,6 +447,8 @@ struct clk_mux {
 	spinlock_t	*lock;
 };
 
+#define to_clk_mux(_hw) container_of(_hw, struct clk_mux, hw)
+
 #define CLK_MUX_INDEX_ONE		BIT(0)
 #define CLK_MUX_INDEX_BIT		BIT(1)
 #define CLK_MUX_HIWORD_MASK		BIT(2)
@@ -484,6 +492,8 @@ struct clk_fixed_factor {
 	unsigned int	div;
 };
 
+#define to_clk_fixed_factor(_hw) container_of(_hw, struct clk_fixed_factor, hw)
+
 extern const struct clk_ops clk_fixed_factor_ops;
 struct clk *clk_register_fixed_factor(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
@@ -515,6 +525,8 @@ struct clk_fractional_divider {
 	spinlock_t	*lock;
 };
 
+#define to_clk_fd(_hw) container_of(_hw, struct clk_fractional_divider, hw)
+
 extern const struct clk_ops clk_fractional_divider_ops;
 struct clk *clk_register_fractional_divider(struct device *dev,
 		const char *name, const char *parent_name, unsigned long flags,
@@ -551,6 +563,8 @@ struct clk_multiplier {
 	spinlock_t	*lock;
 };
 
+#define to_clk_multiplier(_hw) container_of(_hw, struct clk_multiplier, hw)
+
 #define CLK_MULTIPLIER_ZERO_BYPASS		BIT(0)
 #define CLK_MULTIPLIER_ROUND_CLOSEST	BIT(1)
 
@@ -580,6 +594,8 @@ struct clk_composite {
 	const struct clk_ops	*gate_ops;
 };
 
+#define to_clk_composite(_hw) container_of(_hw, struct clk_composite, hw)
+
 struct clk *clk_register_composite(struct device *dev, const char *name,
 		const char * const *parent_names, int num_parents,
 		struct clk_hw *mux_hw, const struct clk_ops *mux_ops,
@@ -602,6 +618,8 @@ struct clk_gpio {
 	struct gpio_desc *gpiod;
 };
 
+#define to_clk_gpio(_hw) container_of(_hw, struct clk_gpio, hw)
+
 extern const struct clk_ops clk_gpio_gate_ops;
 struct clk *clk_register_gpio_gate(struct device *dev, const char *name,
 		const char *parent_name, unsigned gpio, bool active_low,
-- 
cgit v1.2.3


From cbf9591f665d44f40795afe2eee4fcbcd32575d6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 6 Jan 2016 13:25:09 +0900
Subject: clk: add clk_unregister_fixed_factor()

Allow to unregister fixed factor clock.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-fixed-factor.c | 13 +++++++++++++
 include/linux/clk-provider.h   |  1 +
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c
index f0ddf37d5e15..053448e2453d 100644
--- a/drivers/clk/clk-fixed-factor.c
+++ b/drivers/clk/clk-fixed-factor.c
@@ -100,6 +100,19 @@ struct clk *clk_register_fixed_factor(struct device *dev, const char *name,
 }
 EXPORT_SYMBOL_GPL(clk_register_fixed_factor);
 
+void clk_unregister_fixed_factor(struct clk *clk)
+{
+	struct clk_hw *hw;
+
+	hw = __clk_get_hw(clk);
+	if (!hw)
+		return;
+
+	clk_unregister(clk);
+	kfree(to_clk_fixed_factor(hw));
+}
+EXPORT_SYMBOL_GPL(clk_unregister_fixed_factor);
+
 #ifdef CONFIG_OF
 /**
  * of_fixed_factor_clk_setup() - Setup function for simple fixed factor clock
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 33dc814d0f43..3641eecf462a 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -498,6 +498,7 @@ extern const struct clk_ops clk_fixed_factor_ops;
 struct clk *clk_register_fixed_factor(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		unsigned int mult, unsigned int div);
+void clk_unregister_fixed_factor(struct clk *clk);
 
 /**
  * struct clk_fractional_divider - adjustable fractional divider clock
-- 
cgit v1.2.3


From 0b225e41e369a7e03411bb67988513302a10382f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 6 Jan 2016 13:25:10 +0900
Subject: clk: add clk_unregister_fixed_rate()

Allow to unregister fixed rate clock.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk-fixed-rate.c | 13 +++++++++++++
 include/linux/clk-provider.h |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c
index e156beb871f0..6858bfc548a9 100644
--- a/drivers/clk/clk-fixed-rate.c
+++ b/drivers/clk/clk-fixed-rate.c
@@ -104,6 +104,19 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
 }
 EXPORT_SYMBOL_GPL(clk_register_fixed_rate);
 
+void clk_unregister_fixed_rate(struct clk *clk)
+{
+	struct clk_hw *hw;
+
+	hw = __clk_get_hw(clk);
+	if (!hw)
+		return;
+
+	clk_unregister(clk);
+	kfree(to_clk_fixed_rate(hw));
+}
+EXPORT_SYMBOL_GPL(clk_unregister_fixed_rate);
+
 #ifdef CONFIG_OF
 /**
  * of_fixed_clk_setup() - Setup function for simple fixed rate clock
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 3641eecf462a..fabe5bedbba6 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -285,7 +285,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
 struct clk *clk_register_fixed_rate_with_accuracy(struct device *dev,
 		const char *name, const char *parent_name, unsigned long flags,
 		unsigned long fixed_rate, unsigned long fixed_accuracy);
-
+void clk_unregister_fixed_rate(struct clk *clk);
 void of_fixed_clk_setup(struct device_node *np);
 
 /**
-- 
cgit v1.2.3


From 58f1369216367d27e047bead7221ff977431acaa Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 30 Jan 2016 16:45:47 -0500
Subject: SUNRPC: Remove unused function rpc_task_reset_client

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/clnt.h | 1 -
 net/sunrpc/clnt.c           | 8 --------
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 131032f15cc1..0c5c2cbe96c9 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -139,7 +139,6 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
 					struct rpc_xprt *xprt);
 struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
 				const struct rpc_program *, u32);
-void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 struct rpc_clnt *rpc_clone_client_set_auth(struct rpc_clnt *,
 				rpc_authflavor_t);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b7f21044f4d8..2b4ad7aa40c6 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -900,14 +900,6 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
 	}
 }
 
-void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt)
-{
-	rpc_task_release_client(task);
-	rpc_task_set_client(task, clnt);
-}
-EXPORT_SYMBOL_GPL(rpc_task_reset_client);
-
-
 static void
 rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
 {
-- 
cgit v1.2.3


From 5edd1051f4f3bd7cddcc91e346fdd22eee639c72 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 25 Feb 2015 14:49:16 -0500
Subject: SUNRPC: Reorder rpc_task to put waitqueue related info in same
 cachelines

Try to group all the data required by the waitqueues, their timers and timer
callbacks into the same cachelines for performance. With this reordering,
"pahole" reports the following structure on x86_64:

struct rpc_task {
        atomic_t                   tk_count;             /*     0     4 */
        int                        tk_status;            /*     4     4 */
        struct list_head           tk_task;              /*     8    16 */
        void                       (*tk_callback)(struct rpc_task *); /*    24
        void                       (*tk_action)(struct rpc_task *); /*    32
        long unsigned int          tk_timeout;           /*    40     8 */
        long unsigned int          tk_runstate;          /*    48     8 */
        struct rpc_wait_queue *    tk_waitqueue;         /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        union {
                struct work_struct tk_work;              /*          64 */
                struct rpc_wait    tk_wait;              /*          56 */
        } u;                                             /*    64    64 */
        /* --- cacheline 2 boundary (128 bytes) --- */
        struct rpc_message         tk_msg;               /*   128    32 */
        void *                     tk_calldata;          /*   160     8 */
        const struct rpc_call_ops  * tk_ops;             /*   168     8 */
        struct rpc_clnt *          tk_client;            /*   176     8 */
        struct rpc_rqst *          tk_rqstp;             /*   184     8 */
        /* --- cacheline 3 boundary (192 bytes) --- */
        struct workqueue_struct *  tk_workqueue;         /*   192     8 */
        ktime_t                    tk_start;             /*   200     8 */
        pid_t                      tk_owner;             /*   208     4 */
        short unsigned int         tk_flags;             /*   212     2 */
        short unsigned int         tk_timeouts;          /*   214     2 */
        short unsigned int         tk_pid;               /*   216     2 */
        unsigned char              tk_priority:2;        /*   218: 6  1 */
        unsigned char              tk_garb_retry:2;      /*   218: 4  1 */
        unsigned char              tk_cred_retry:2;      /*   218: 2  1 */
        unsigned char              tk_rebind_retry:2;    /*   218: 0  1 */

        /* size: 224, cachelines: 4, members: 24 */
        /* padding: 5 */
        /* last cacheline: 32 bytes */
};

whereas on i386, it reports everything fitting into the 1st cacheline.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/sched.h | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index d703f0ef37d8..ee0fbcf9b02e 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -42,40 +42,41 @@ struct rpc_wait {
  */
 struct rpc_task {
 	atomic_t		tk_count;	/* Reference count */
+	int			tk_status;	/* result of last operation */
 	struct list_head	tk_task;	/* global list of tasks */
-	struct rpc_clnt *	tk_client;	/* RPC client */
-	struct rpc_rqst *	tk_rqstp;	/* RPC request */
-
-	/*
-	 * RPC call state
-	 */
-	struct rpc_message	tk_msg;		/* RPC call info */
 
 	/*
 	 * callback	to be executed after waking up
 	 * action	next procedure for async tasks
-	 * tk_ops	caller callbacks
 	 */
 	void			(*tk_callback)(struct rpc_task *);
 	void			(*tk_action)(struct rpc_task *);
-	const struct rpc_call_ops *tk_ops;
-	void *			tk_calldata;
 
 	unsigned long		tk_timeout;	/* timeout for rpc_sleep() */
 	unsigned long		tk_runstate;	/* Task run status */
-	struct workqueue_struct	*tk_workqueue;	/* Normally rpciod, but could
-						 * be any workqueue
-						 */
+
 	struct rpc_wait_queue 	*tk_waitqueue;	/* RPC wait queue we're on */
 	union {
 		struct work_struct	tk_work;	/* Async task work queue */
 		struct rpc_wait		tk_wait;	/* RPC wait */
 	} u;
 
+	/*
+	 * RPC call state
+	 */
+	struct rpc_message	tk_msg;		/* RPC call info */
+	void *			tk_calldata;	/* Caller private data */
+	const struct rpc_call_ops *tk_ops;	/* Caller callbacks */
+
+	struct rpc_clnt *	tk_client;	/* RPC client */
+	struct rpc_rqst *	tk_rqstp;	/* RPC request */
+
+	struct workqueue_struct	*tk_workqueue;	/* Normally rpciod, but could
+						 * be any workqueue
+						 */
 	ktime_t			tk_start;	/* RPC task init timestamp */
 
 	pid_t			tk_owner;	/* Process id for batching tasks */
-	int			tk_status;	/* result of last operation */
 	unsigned short		tk_flags;	/* misc flags */
 	unsigned short		tk_timeouts;	/* maj timeouts */
 
-- 
cgit v1.2.3


From 30c5116b113689c87a711a0963753adadd702c04 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 24 Feb 2015 20:31:39 -0500
Subject: SUNRPC: Uninline xprt_get(); It isn't performance critical.

Also allow callers to pass NULL arguments to xprt_get() and xprt_put().

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/xprt.h | 16 +++-------------
 net/sunrpc/xprt.c           | 24 +++++++++++++++++++++---
 2 files changed, 24 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 69ef5b3ab038..1bdb59a2efe8 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -13,6 +13,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/ktime.h>
+#include <linux/kref.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/msg_prot.h>
@@ -166,7 +167,7 @@ enum xprt_transports {
 };
 
 struct rpc_xprt {
-	atomic_t		count;		/* Reference count */
+	struct kref		kref;		/* Reference count */
 	struct rpc_xprt_ops *	ops;		/* transport methods */
 
 	const struct rpc_timeout *timeout;	/* timeout parms */
@@ -318,24 +319,13 @@ int			xprt_adjust_timeout(struct rpc_rqst *req);
 void			xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_release(struct rpc_task *task);
+struct rpc_xprt *	xprt_get(struct rpc_xprt *xprt);
 void			xprt_put(struct rpc_xprt *xprt);
 struct rpc_xprt *	xprt_alloc(struct net *net, size_t size,
 				unsigned int num_prealloc,
 				unsigned int max_req);
 void			xprt_free(struct rpc_xprt *);
 
-/**
- * xprt_get - return a reference to an RPC transport.
- * @xprt: pointer to the transport
- *
- */
-static inline struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
-{
-	if (atomic_inc_not_zero(&xprt->count))
-		return xprt;
-	return NULL;
-}
-
 static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
 {
 	return p + xprt->tsh_size;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 37edea6fa92d..d8fd84c0cbba 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1307,7 +1307,7 @@ void xprt_release(struct rpc_task *task)
 
 static void xprt_init(struct rpc_xprt *xprt, struct net *net)
 {
-	atomic_set(&xprt->count, 1);
+	kref_init(&xprt->kref);
 
 	spin_lock_init(&xprt->transport_lock);
 	spin_lock_init(&xprt->reserve_lock);
@@ -1415,6 +1415,24 @@ static void xprt_destroy(struct rpc_xprt *xprt)
 	xprt->ops->destroy(xprt);
 }
 
+static void xprt_destroy_kref(struct kref *kref)
+{
+	xprt_destroy(container_of(kref, struct rpc_xprt, kref));
+}
+
+/**
+ * xprt_get - return a reference to an RPC transport.
+ * @xprt: pointer to the transport
+ *
+ */
+struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
+{
+	if (xprt != NULL && kref_get_unless_zero(&xprt->kref))
+		return xprt;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xprt_get);
+
 /**
  * xprt_put - release a reference to an RPC transport.
  * @xprt: pointer to the transport
@@ -1422,7 +1440,7 @@ static void xprt_destroy(struct rpc_xprt *xprt)
  */
 void xprt_put(struct rpc_xprt *xprt)
 {
-	if (atomic_dec_and_test(&xprt->count))
-		xprt_destroy(xprt);
+	if (xprt != NULL)
+		kref_put(&xprt->kref, xprt_destroy_kref);
 }
 EXPORT_SYMBOL_GPL(xprt_put);
-- 
cgit v1.2.3


From fda1bfef9e465b28260d27cd9e538dd601c4cdc1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 14 Feb 2015 17:48:49 -0500
Subject: SUNRPC: Make freeing of struct xprt rcu-safe

Have it call kfree_rcu() to ensure that we can use it on rcu-protected
lists.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/xprt.h | 1 +
 net/sunrpc/xprt.c           | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 1bdb59a2efe8..83218129ff28 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -257,6 +257,7 @@ struct rpc_xprt {
 	struct dentry		*debugfs;		/* debugfs directory */
 	atomic_t		inject_disconnect;
 #endif
+	struct rcu_head		rcu;
 };
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index d8fd84c0cbba..605858699f6c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -48,6 +48,7 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/metrics.h>
 #include <linux/sunrpc/bc_xprt.h>
+#include <linux/rcupdate.h>
 
 #include <trace/events/sunrpc.h>
 
@@ -1166,7 +1167,7 @@ void xprt_free(struct rpc_xprt *xprt)
 {
 	put_net(xprt->xprt_net);
 	xprt_free_all_slots(xprt);
-	kfree(xprt);
+	kfree_rcu(xprt, rcu);
 }
 EXPORT_SYMBOL_GPL(xprt_free);
 
-- 
cgit v1.2.3


From 70d2a3cf2f4ae2e93b7a661842d84c2b5132cee7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 1 Feb 2016 22:06:56 +0000
Subject: efi: Remove redundant efi_set_variable_nonblocking() prototype

There is no need for a separate nonblocking prototype definition
for the SetVariable() UEFI Runtime Service, since it is
identical to the blocking version.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1454364428-494-3-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/efi.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 569b5a866bb1..8706e0aabedc 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -507,10 +507,6 @@ typedef efi_status_t efi_get_next_variable_t (unsigned long *name_size, efi_char
 typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor, 
 					 u32 attr, unsigned long data_size,
 					 void *data);
-typedef efi_status_t
-efi_set_variable_nonblocking_t(efi_char16_t *name, efi_guid_t *vendor,
-			       u32 attr, unsigned long data_size, void *data);
-
 typedef efi_status_t efi_get_next_high_mono_count_t (u32 *count);
 typedef void efi_reset_system_t (int reset_type, efi_status_t status,
 				 unsigned long data_size, efi_char16_t *data);
@@ -851,7 +847,7 @@ extern struct efi {
 	efi_get_variable_t *get_variable;
 	efi_get_next_variable_t *get_next_variable;
 	efi_set_variable_t *set_variable;
-	efi_set_variable_nonblocking_t *set_variable_nonblocking;
+	efi_set_variable_t *set_variable_nonblocking;
 	efi_query_variable_info_t *query_variable_info;
 	efi_update_capsule_t *update_capsule;
 	efi_query_capsule_caps_t *query_capsule_caps;
@@ -1091,7 +1087,7 @@ struct efivar_operations {
 	efi_get_variable_t *get_variable;
 	efi_get_next_variable_t *get_next_variable;
 	efi_set_variable_t *set_variable;
-	efi_set_variable_nonblocking_t *set_variable_nonblocking;
+	efi_set_variable_t *set_variable_nonblocking;
 	efi_query_variable_store_t *query_variable_store;
 };
 
-- 
cgit v1.2.3


From d3cac1f83c631b9fe5edaebcba49f6989bfff089 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 1 Feb 2016 22:06:57 +0000
Subject: efi/runtime-wrappers: Add a nonblocking version of
 QueryVariableInfo()

This introduces a new runtime wrapper for the
QueryVariableInfo() UEFI Runtime Service, which gives up
immediately rather than spins on failure to grab the efi_runtime
spinlock.

This is required in the non-blocking path of the efi-pstore
code.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1454364428-494-4-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/runtime-wrappers.c | 22 ++++++++++++++++++++++
 include/linux/efi.h                     |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 228bbf910461..e9f2867f0d91 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -230,6 +230,27 @@ static efi_status_t virt_efi_query_variable_info(u32 attr,
 	return status;
 }
 
+static efi_status_t
+virt_efi_query_variable_info_nonblocking(u32 attr,
+					 u64 *storage_space,
+					 u64 *remaining_space,
+					 u64 *max_variable_size)
+{
+	unsigned long flags;
+	efi_status_t status;
+
+	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
+		return EFI_UNSUPPORTED;
+
+	if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
+		return EFI_NOT_READY;
+
+	status = efi_call_virt(query_variable_info, attr, storage_space,
+			       remaining_space, max_variable_size);
+	spin_unlock_irqrestore(&efi_runtime_lock, flags);
+	return status;
+}
+
 static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
 {
 	unsigned long flags;
@@ -300,6 +321,7 @@ void efi_native_runtime_setup(void)
 	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
 	efi.reset_system = virt_efi_reset_system;
 	efi.query_variable_info = virt_efi_query_variable_info;
+	efi.query_variable_info_nonblocking = virt_efi_query_variable_info_nonblocking;
 	efi.update_capsule = virt_efi_update_capsule;
 	efi.query_capsule_caps = virt_efi_query_capsule_caps;
 }
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8706e0aabedc..ad1e177ba48e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -849,6 +849,7 @@ extern struct efi {
 	efi_set_variable_t *set_variable;
 	efi_set_variable_t *set_variable_nonblocking;
 	efi_query_variable_info_t *query_variable_info;
+	efi_query_variable_info_t *query_variable_info_nonblocking;
 	efi_update_capsule_t *update_capsule;
 	efi_query_capsule_caps_t *query_capsule_caps;
 	efi_get_next_high_mono_count_t *get_next_high_mono_count;
-- 
cgit v1.2.3


From ca0e30dcaa53a3fcb2dfdf74252d30bc40603eea Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 1 Feb 2016 22:06:58 +0000
Subject: efi: Add nonblocking option to efi_query_variable_store()

The function efi_query_variable_store() may be invoked by
efivar_entry_set_nonblocking(), which itself takes care to only
call a non-blocking version of the SetVariable() runtime
wrapper. However, efi_query_variable_store() may call the
SetVariable() wrapper directly, as well as the wrapper for
QueryVariableInfo(), both of which could deadlock in the same
way we are trying to prevent by calling
efivar_entry_set_nonblocking() in the first place.

So instead, modify efi_query_variable_store() to use the
non-blocking variants of QueryVariableInfo() (and give up rather
than free up space if the available space is below
EFI_MIN_RESERVE) if invoked with the 'nonblocking' argument set
to true.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1454364428-494-5-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/efi/quirks.c | 33 ++++++++++++++++++++++++++++++++-
 drivers/firmware/efi/vars.c    | 16 ++++++++++++++--
 include/linux/efi.h            | 12 +++++++++---
 3 files changed, 55 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 453504662a33..2326bf51978f 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -56,6 +56,33 @@ void efi_delete_dummy_variable(void)
 			 0, NULL);
 }
 
+/*
+ * In the nonblocking case we do not attempt to perform garbage
+ * collection if we do not have enough free space. Rather, we do the
+ * bare minimum check and give up immediately if the available space
+ * is below EFI_MIN_RESERVE.
+ *
+ * This function is intended to be small and simple because it is
+ * invoked from crash handler paths.
+ */
+static efi_status_t
+query_variable_store_nonblocking(u32 attributes, unsigned long size)
+{
+	efi_status_t status;
+	u64 storage_size, remaining_size, max_size;
+
+	status = efi.query_variable_info_nonblocking(attributes, &storage_size,
+						     &remaining_size,
+						     &max_size);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	if (remaining_size - size < EFI_MIN_RESERVE)
+		return EFI_OUT_OF_RESOURCES;
+
+	return EFI_SUCCESS;
+}
+
 /*
  * Some firmware implementations refuse to boot if there's insufficient space
  * in the variable store. Ensure that we never use more than a safe limit.
@@ -63,7 +90,8 @@ void efi_delete_dummy_variable(void)
  * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable
  * store.
  */
-efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
+efi_status_t efi_query_variable_store(u32 attributes, unsigned long size,
+				      bool nonblocking)
 {
 	efi_status_t status;
 	u64 storage_size, remaining_size, max_size;
@@ -71,6 +99,9 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
 	if (!(attributes & EFI_VARIABLE_NON_VOLATILE))
 		return 0;
 
+	if (nonblocking)
+		return query_variable_store_nonblocking(attributes, size);
+
 	status = efi.query_variable_info(attributes, &storage_size,
 					 &remaining_size, &max_size);
 	if (status != EFI_SUCCESS)
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 70a0fb10517f..d2a49626a335 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -234,7 +234,18 @@ check_var_size(u32 attributes, unsigned long size)
 	if (!fops->query_variable_store)
 		return EFI_UNSUPPORTED;
 
-	return fops->query_variable_store(attributes, size);
+	return fops->query_variable_store(attributes, size, false);
+}
+
+static efi_status_t
+check_var_size_nonblocking(u32 attributes, unsigned long size)
+{
+	const struct efivar_operations *fops = __efivars->ops;
+
+	if (!fops->query_variable_store)
+		return EFI_UNSUPPORTED;
+
+	return fops->query_variable_store(attributes, size, true);
 }
 
 static int efi_status_to_err(efi_status_t status)
@@ -615,7 +626,8 @@ efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor,
 	if (!spin_trylock_irqsave(&__efivars->lock, flags))
 		return -EBUSY;
 
-	status = check_var_size(attributes, size + ucs2_strsize(name, 1024));
+	status = check_var_size_nonblocking(attributes,
+					    size + ucs2_strsize(name, 1024));
 	if (status != EFI_SUCCESS) {
 		spin_unlock_irqrestore(&__efivars->lock, flags);
 		return -ENOSPC;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index ad1e177ba48e..09f1559e7525 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -525,7 +525,9 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules,
 					      unsigned long count,
 					      u64 *max_size,
 					      int *reset_type);
-typedef efi_status_t efi_query_variable_store_t(u32 attributes, unsigned long size);
+typedef efi_status_t efi_query_variable_store_t(u32 attributes,
+						unsigned long size,
+						bool nonblocking);
 
 void efi_native_runtime_setup(void);
 
@@ -881,13 +883,17 @@ extern void efi_enter_virtual_mode (void);	/* switch EFI to virtual mode, if pos
 #ifdef CONFIG_X86
 extern void efi_late_init(void);
 extern void efi_free_boot_services(void);
-extern efi_status_t efi_query_variable_store(u32 attributes, unsigned long size);
+extern efi_status_t efi_query_variable_store(u32 attributes,
+					     unsigned long size,
+					     bool nonblocking);
 extern void efi_find_mirror(void);
 #else
 static inline void efi_late_init(void) {}
 static inline void efi_free_boot_services(void) {}
 
-static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
+static inline efi_status_t efi_query_variable_store(u32 attributes,
+						    unsigned long size,
+						    bool nonblocking)
 {
 	return EFI_SUCCESS;
 }
-- 
cgit v1.2.3


From c016ca08f89c6c78ed815f025262bdb87aba3f4c Mon Sep 17 00:00:00 2001
From: Robert Elliott <elliott@hpe.com>
Date: Mon, 1 Feb 2016 22:07:06 +0000
Subject: efi: Add NV memory attribute

Add the NV memory attribute introduced in UEFI 2.5 and add a
column for it in the types and attributes string used when
printing the UEFI memory map.

old:
  efi: mem61: [type=14            |   |  |  |  |  |  | |WB|WT|WC|UC] range=[0x0000000880000000-0x0000000c7fffffff) (16384MB)

new:
  efi: mem61: [type=14            |   |  |NV|  |  |  |  | |WB|WT|WC|UC] range=[0x0000000880000000-0x0000000c7fffffff) (16384MB)

Signed-off-by: Robert Elliott <elliott@hpe.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Taku Izumi <izumi.taku@jp.fujitsu.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1454364428-494-13-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/efi.c | 5 ++++-
 include/linux/efi.h        | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 20451c290233..f4370485c26a 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -582,13 +582,16 @@ char * __init efi_md_typeattr_format(char *buf, size_t size,
 	if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
 		     EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO |
 		     EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP |
+		     EFI_MEMORY_NV |
 		     EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE))
 		snprintf(pos, size, "|attr=0x%016llx]",
 			 (unsigned long long)attr);
 	else
-		snprintf(pos, size, "|%3s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
+		snprintf(pos, size,
+			 "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
 			 attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
 			 attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "",
+			 attr & EFI_MEMORY_NV      ? "NV"  : "",
 			 attr & EFI_MEMORY_XP      ? "XP"  : "",
 			 attr & EFI_MEMORY_RP      ? "RP"  : "",
 			 attr & EFI_MEMORY_WP      ? "WP"  : "",
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 09f1559e7525..3c6cbbdae4aa 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -97,6 +97,7 @@ typedef	struct {
 #define EFI_MEMORY_WP		((u64)0x0000000000001000ULL)	/* write-protect */
 #define EFI_MEMORY_RP		((u64)0x0000000000002000ULL)	/* read-protect */
 #define EFI_MEMORY_XP		((u64)0x0000000000004000ULL)	/* execute-protect */
+#define EFI_MEMORY_NV		((u64)0x0000000000008000ULL)	/* non-volatile */
 #define EFI_MEMORY_MORE_RELIABLE \
 				((u64)0x0000000000010000ULL)	/* higher reliability */
 #define EFI_MEMORY_RO		((u64)0x0000000000020000ULL)	/* read-only */
-- 
cgit v1.2.3


From 5363de75307e333d89df7531f9dd8310d973ecdb Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 25 Jan 2016 20:30:30 +0100
Subject: usb: core: switch bus numbering to using idr

USB bus numbering is based on directly dealing with bitmaps and
defines a separate list of busses.
This can be simplified and unified by using existing idr functionality.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/devices.c      | 10 ++--------
 drivers/usb/core/hcd.c          | 21 ++++++---------------
 drivers/usb/core/usb.c          |  1 +
 drivers/usb/host/r8a66597-hcd.c |  9 ++-------
 drivers/usb/mon/mon_main.c      |  5 ++---
 include/linux/usb.h             |  1 -
 include/linux/usb/hcd.h         |  3 ++-
 7 files changed, 15 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c
index b35a6a52210f..6118a04f0b84 100644
--- a/drivers/usb/core/devices.c
+++ b/drivers/usb/core/devices.c
@@ -110,13 +110,6 @@ static const char format_endpt[] =
 /* E:  Ad=xx(s) Atr=xx(ssss) MxPS=dddd Ivl=D?s */
   "E:  Ad=%02x(%c) Atr=%02x(%-4s) MxPS=%4d Ivl=%d%cs\n";
 
-
-/*
- * Need access to the driver and USB bus lists.
- * extern struct list_head usb_bus_list;
- * However, these will come from functions that return ptrs to each of them.
- */
-
 /*
  * Wait for an connect/disconnect event to happen. We initialize
  * the event counter with an odd number, and each event will increment
@@ -618,6 +611,7 @@ static ssize_t usb_device_read(struct file *file, char __user *buf,
 	struct usb_bus *bus;
 	ssize_t ret, total_written = 0;
 	loff_t skip_bytes = *ppos;
+	int id;
 
 	if (*ppos < 0)
 		return -EINVAL;
@@ -628,7 +622,7 @@ static ssize_t usb_device_read(struct file *file, char __user *buf,
 
 	mutex_lock(&usb_bus_list_lock);
 	/* print devices for all busses */
-	list_for_each_entry(bus, &usb_bus_list, bus_list) {
+	idr_for_each_entry(&usb_bus_idr, bus, id) {
 		/* recurse through all children of the root hub */
 		if (!bus_to_hcd(bus)->rh_registered)
 			continue;
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 232c8c93dd3a..cf3eb22dbeb4 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -90,12 +90,11 @@ unsigned long usb_hcds_loaded;
 EXPORT_SYMBOL_GPL(usb_hcds_loaded);
 
 /* host controllers we manage */
-LIST_HEAD (usb_bus_list);
-EXPORT_SYMBOL_GPL (usb_bus_list);
+DEFINE_IDR (usb_bus_idr);
+EXPORT_SYMBOL_GPL (usb_bus_idr);
 
 /* used when allocating bus numbers */
 #define USB_MAXBUS		64
-static DECLARE_BITMAP(busmap, USB_MAXBUS);
 
 /* used when updating list of hcds */
 DEFINE_MUTEX(usb_bus_list_lock);	/* exported only for usbfs */
@@ -996,8 +995,6 @@ static void usb_bus_init (struct usb_bus *bus)
 	bus->bandwidth_int_reqs  = 0;
 	bus->bandwidth_isoc_reqs = 0;
 	mutex_init(&bus->usb_address0_mutex);
-
-	INIT_LIST_HEAD (&bus->bus_list);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -1018,16 +1015,12 @@ static int usb_register_bus(struct usb_bus *bus)
 	int busnum;
 
 	mutex_lock(&usb_bus_list_lock);
-	busnum = find_next_zero_bit(busmap, USB_MAXBUS, 1);
-	if (busnum >= USB_MAXBUS) {
-		printk (KERN_ERR "%s: too many buses\n", usbcore_name);
+	busnum = idr_alloc(&usb_bus_idr, bus, 1, USB_MAXBUS, GFP_KERNEL);
+	if (busnum < 0) {
+		pr_err("%s: failed to get bus number\n", usbcore_name);
 		goto error_find_busnum;
 	}
-	set_bit(busnum, busmap);
 	bus->busnum = busnum;
-
-	/* Add it to the local list of buses */
-	list_add (&bus->bus_list, &usb_bus_list);
 	mutex_unlock(&usb_bus_list_lock);
 
 	usb_notify_add_bus(bus);
@@ -1059,12 +1052,10 @@ static void usb_deregister_bus (struct usb_bus *bus)
 	 * itself up
 	 */
 	mutex_lock(&usb_bus_list_lock);
-	list_del (&bus->bus_list);
+	idr_remove(&usb_bus_idr, bus->busnum);
 	mutex_unlock(&usb_bus_list_lock);
 
 	usb_notify_remove_bus(bus);
-
-	clear_bit(bus->busnum, busmap);
 }
 
 /**
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 16ade41759cd..524c9822d2bb 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -1115,6 +1115,7 @@ static void __exit usb_exit(void)
 	bus_unregister(&usb_bus_type);
 	usb_acpi_unregister();
 	usb_debugfs_cleanup();
+	idr_destroy(&usb_bus_idr);
 }
 
 subsys_initcall(usb_init);
diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index 4cbd0633c5c2..1ef887361ac0 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -2099,13 +2099,8 @@ static void r8a66597_check_detect_child(struct r8a66597 *r8a66597,
 
 	memset(now_map, 0, sizeof(now_map));
 
-	list_for_each_entry(bus, &usb_bus_list, bus_list) {
-		if (!bus->root_hub)
-			continue;
-
-		if (bus->busnum != hcd->self.busnum)
-			continue;
-
+	bus = idr_find(&usb_bus_idr, hcd->self.busnum);
+	if (bus && bus->root_hub) {
 		collect_usb_address_map(bus->root_hub, now_map);
 		update_usb_address_map(r8a66597, bus->root_hub, now_map);
 	}
diff --git a/drivers/usb/mon/mon_main.c b/drivers/usb/mon/mon_main.c
index fec3f1128fdc..9b87efb0e50d 100644
--- a/drivers/usb/mon/mon_main.c
+++ b/drivers/usb/mon/mon_main.c
@@ -349,7 +349,7 @@ struct mon_bus *mon_bus_lookup(unsigned int num)
 static int __init mon_init(void)
 {
 	struct usb_bus *ubus;
-	int rc;
+	int rc, id;
 
 	if ((rc = mon_text_init()) != 0)
 		goto err_text;
@@ -366,9 +366,8 @@ static int __init mon_init(void)
 	// MOD_INC_USE_COUNT(which_module?);
 
 	mutex_lock(&usb_bus_list_lock);
-	list_for_each_entry (ubus, &usb_bus_list, bus_list) {
+	idr_for_each_entry(&usb_bus_idr, ubus, id)
 		mon_bus_init(ubus);
-	}
 	usb_register_notify(&mon_nb);
 	mutex_unlock(&usb_bus_list_lock);
 	return 0;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 89533ba38691..0d348fa84a66 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -375,7 +375,6 @@ struct usb_bus {
 	struct usb_devmap devmap;	/* device address allocation map */
 	struct usb_device *root_hub;	/* Root hub */
 	struct usb_bus *hs_companion;	/* Companion EHCI bus, if any */
-	struct list_head bus_list;	/* list of busses */
 
 	struct mutex usb_address0_mutex; /* unaddressed device mutex */
 
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 4dcf8446dbcd..c293dd044599 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -23,6 +23,7 @@
 
 #include <linux/rwsem.h>
 #include <linux/interrupt.h>
+#include <linux/idr.h>
 
 #define MAX_TOPO_LEVEL		6
 
@@ -630,7 +631,7 @@ extern void usb_set_device_state(struct usb_device *udev,
 
 /* exported only within usbcore */
 
-extern struct list_head usb_bus_list;
+extern struct idr usb_bus_idr;
 extern struct mutex usb_bus_list_lock;
 extern wait_queue_head_t usb_kill_urb_queue;
 
-- 
cgit v1.2.3


From 7dd9cba5bb90ffa9c60c1533b715dc91c5082cd9 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Thu, 21 Jan 2016 15:18:47 +0100
Subject: usb: sysfs: make locking interruptible

232275a USB: fix substandard locking for the sysfs files
introduced needed locking into sysfs operations on USB devices
It, however, uses uninterruptible sleep and if the error
handling is on extreme cases of sleep lengths of 10s of seconds
are possible. Unless we are removing the device we should use
interruptible sleep.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/sysfs.c | 69 ++++++++++++++++++++++++++++++++++--------------
 include/linux/device.h   |  5 ++++
 include/linux/usb.h      |  7 ++---
 3 files changed, 58 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index f195320d35c0..7a6209314997 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -23,10 +23,12 @@ static ssize_t field##_show(struct device *dev,				\
 {									\
 	struct usb_device *udev;					\
 	struct usb_host_config *actconfig;				\
-	ssize_t rc = 0;							\
+	ssize_t rc;							\
 									\
 	udev = to_usb_device(dev);					\
-	usb_lock_device(udev);						\
+	rc = usb_lock_device_interruptible(udev);			\
+	if (rc < 0)							\
+		return -EINTR;						\
 	actconfig = udev->actconfig;					\
 	if (actconfig)							\
 		rc = sprintf(buf, format_string,			\
@@ -47,10 +49,12 @@ static ssize_t bMaxPower_show(struct device *dev,
 {
 	struct usb_device *udev;
 	struct usb_host_config *actconfig;
-	ssize_t rc = 0;
+	ssize_t rc;
 
 	udev = to_usb_device(dev);
-	usb_lock_device(udev);
+	rc = usb_lock_device_interruptible(udev);
+	if (rc < 0)
+		return -EINTR;
 	actconfig = udev->actconfig;
 	if (actconfig)
 		rc = sprintf(buf, "%dmA\n", usb_get_max_power(udev, actconfig));
@@ -64,10 +68,12 @@ static ssize_t configuration_show(struct device *dev,
 {
 	struct usb_device *udev;
 	struct usb_host_config *actconfig;
-	ssize_t rc = 0;
+	ssize_t rc;
 
 	udev = to_usb_device(dev);
-	usb_lock_device(udev);
+	rc = usb_lock_device_interruptible(udev);
+	if (rc < 0)
+		return -EINTR;
 	actconfig = udev->actconfig;
 	if (actconfig && actconfig->string)
 		rc = sprintf(buf, "%s\n", actconfig->string);
@@ -84,11 +90,13 @@ static ssize_t bConfigurationValue_store(struct device *dev,
 					 const char *buf, size_t count)
 {
 	struct usb_device	*udev = to_usb_device(dev);
-	int			config, value;
+	int			config, value, rc;
 
 	if (sscanf(buf, "%d", &config) != 1 || config < -1 || config > 255)
 		return -EINVAL;
-	usb_lock_device(udev);
+	rc = usb_lock_device_interruptible(udev);
+	if (rc < 0)
+		return -EINTR;
 	value = usb_set_configuration(udev, config);
 	usb_unlock_device(udev);
 	return (value < 0) ? value : count;
@@ -105,7 +113,9 @@ static ssize_t  name##_show(struct device *dev,				\
 	int retval;							\
 									\
 	udev = to_usb_device(dev);					\
-	usb_lock_device(udev);						\
+	retval = usb_lock_device_interruptible(udev);			\
+	if (retval < 0)							\
+		return -EINTR;						\
 	retval = sprintf(buf, "%s\n", udev->name);			\
 	usb_unlock_device(udev);					\
 	return retval;							\
@@ -227,11 +237,13 @@ static ssize_t avoid_reset_quirk_store(struct device *dev,
 				      const char *buf, size_t count)
 {
 	struct usb_device	*udev = to_usb_device(dev);
-	int			val;
+	int			val, rc;
 
 	if (sscanf(buf, "%d", &val) != 1 || val < 0 || val > 1)
 		return -EINVAL;
-	usb_lock_device(udev);
+	rc = usb_lock_device_interruptible(udev);
+	if (rc < 0)
+		return -EINTR;
 	if (val)
 		udev->quirks |= USB_QUIRK_RESET;
 	else
@@ -297,7 +309,7 @@ static ssize_t persist_store(struct device *dev, struct device_attribute *attr,
 			     const char *buf, size_t count)
 {
 	struct usb_device *udev = to_usb_device(dev);
-	int value;
+	int value, rc;
 
 	/* Hubs are always enabled for USB_PERSIST */
 	if (udev->descriptor.bDeviceClass == USB_CLASS_HUB)
@@ -306,7 +318,9 @@ static ssize_t persist_store(struct device *dev, struct device_attribute *attr,
 	if (sscanf(buf, "%d", &value) != 1)
 		return -EINVAL;
 
-	usb_lock_device(udev);
+	rc = usb_lock_device_interruptible(udev);
+	if (rc < 0)
+		return -EINTR;
 	udev->persist_enabled = !!value;
 	usb_unlock_device(udev);
 	return count;
@@ -423,13 +437,16 @@ static ssize_t level_store(struct device *dev, struct device_attribute *attr,
 	int len = count;
 	char *cp;
 	int rc = count;
+	int rv;
 
 	warn_level();
 	cp = memchr(buf, '\n', count);
 	if (cp)
 		len = cp - buf;
 
-	usb_lock_device(udev);
+	rv = usb_lock_device_interruptible(udev);
+	if (rv < 0)
+		return -EINTR;
 
 	if (len == sizeof on_string - 1 &&
 			strncmp(buf, on_string, len) == 0)
@@ -469,7 +486,9 @@ static ssize_t usb2_hardware_lpm_store(struct device *dev,
 	bool value;
 	int ret;
 
-	usb_lock_device(udev);
+	ret = usb_lock_device_interruptible(udev);
+	if (ret < 0)
+		return -EINTR;
 
 	ret = strtobool(buf, &value);
 
@@ -539,8 +558,11 @@ static ssize_t usb3_hardware_lpm_u1_show(struct device *dev,
 {
 	struct usb_device *udev = to_usb_device(dev);
 	const char *p;
+	int rc;
 
-	usb_lock_device(udev);
+	rc = usb_lock_device_interruptible(udev);
+	if (rc < 0)
+		return -EINTR;
 
 	if (udev->usb3_lpm_u1_enabled)
 		p = "enabled";
@@ -558,8 +580,11 @@ static ssize_t usb3_hardware_lpm_u2_show(struct device *dev,
 {
 	struct usb_device *udev = to_usb_device(dev);
 	const char *p;
+	int rc;
 
-	usb_lock_device(udev);
+	rc = usb_lock_device_interruptible(udev);
+	if (rc < 0)
+		return -EINTR;
 
 	if (udev->usb3_lpm_u2_enabled)
 		p = "enabled";
@@ -818,14 +843,16 @@ read_descriptors(struct file *filp, struct kobject *kobj,
 	struct usb_device *udev = to_usb_device(dev);
 	size_t nleft = count;
 	size_t srclen, n;
-	int cfgno;
+	int cfgno, rc;
 	void *src;
 
 	/* The binary attribute begins with the device descriptor.
 	 * Following that are the raw descriptor entries for all the
 	 * configurations (config plus subsidiary descriptors).
 	 */
-	usb_lock_device(udev);
+	rc = usb_lock_device_interruptible(udev);
+	if (rc < 0)
+		return -EINTR;
 	for (cfgno = -1; cfgno < udev->descriptor.bNumConfigurations &&
 			nleft > 0; ++cfgno) {
 		if (cfgno < 0) {
@@ -972,7 +999,9 @@ static ssize_t supports_autosuspend_show(struct device *dev,
 {
 	int s;
 
-	device_lock(dev);
+	s = device_lock_interruptible(dev);
+	if (s < 0)
+		return -EINTR;
 	/* Devices will be autosuspended even when an interface isn't claimed */
 	s = (!dev->driver || to_usb_driver(dev->driver)->supports_autosuspend);
 	device_unlock(dev);
diff --git a/include/linux/device.h b/include/linux/device.h
index 6d6f1fec092f..2d0e6e541d52 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -958,6 +958,11 @@ static inline void device_lock(struct device *dev)
 	mutex_lock(&dev->mutex);
 }
 
+static inline int device_lock_interruptible(struct device *dev)
+{
+	return mutex_lock_interruptible(&dev->mutex);
+}
+
 static inline int device_trylock(struct device *dev)
 {
 	return mutex_trylock(&dev->mutex);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 0d348fa84a66..dc0ea0de8a81 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -641,9 +641,10 @@ extern struct usb_device *usb_hub_find_child(struct usb_device *hdev,
 		if (!child) continue; else
 
 /* USB device locking */
-#define usb_lock_device(udev)		device_lock(&(udev)->dev)
-#define usb_unlock_device(udev)		device_unlock(&(udev)->dev)
-#define usb_trylock_device(udev)	device_trylock(&(udev)->dev)
+#define usb_lock_device(udev)			device_lock(&(udev)->dev)
+#define usb_unlock_device(udev)			device_unlock(&(udev)->dev)
+#define usb_lock_device_interruptible(udev)	device_lock_interruptible(&(udev)->dev)
+#define usb_trylock_device(udev)		device_trylock(&(udev)->dev)
 extern int usb_lock_device_for_reset(struct usb_device *udev,
 				     const struct usb_interface *iface);
 
-- 
cgit v1.2.3


From 1b83349fddd1efa814a83735e44e684a139d2425 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ao2@ao2.it>
Date: Wed, 27 Jan 2016 15:05:49 +0100
Subject: usb/storage: misc fixes to comments in include/linux/usb/storage.h

The fixes are:
 - fix indentation of a comment
 - fix a typo in the comment about bulk_cb_wrap.Length
 - make comment about US_BULK_CB_SIGN look like the one about
   US_BULK_CS_SIGN below
 - make the comment about bulk_cs_wrap.Signature look more like the one
   about bulk_cb_wrap.Signature

Signed-off-by: Antonio Ospite <ao2@ao2.it>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/storage.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/storage.h b/include/linux/usb/storage.h
index cb33fff2ba0b..305ee8db7faf 100644
--- a/include/linux/usb/storage.h
+++ b/include/linux/usb/storage.h
@@ -45,9 +45,9 @@
 
 #define USB_PR_DEVICE	0xff		/* Use device's value */
 
- /*
-  * Bulk only data structures
-  */
+/*
+ * Bulk only data structures
+ */
 
 /* command block wrapper */
 struct bulk_cb_wrap {
@@ -56,18 +56,18 @@ struct bulk_cb_wrap {
 	__le32	DataTransferLength;	/* size of data */
 	__u8	Flags;			/* direction in bit 0 */
 	__u8	Lun;			/* LUN normally 0 */
-	__u8	Length;			/* of of the CDB */
+	__u8	Length;			/* length of the CDB */
 	__u8	CDB[16];		/* max command */
 };
 
 #define US_BULK_CB_WRAP_LEN	31
-#define US_BULK_CB_SIGN		0x43425355	/*spells out USBC */
+#define US_BULK_CB_SIGN		0x43425355	/* spells out 'USBC' */
 #define US_BULK_FLAG_IN		(1 << 7)
 #define US_BULK_FLAG_OUT	0
 
 /* command status wrapper */
 struct bulk_cs_wrap {
-	__le32	Signature;	/* should = 'USBS' */
+	__le32	Signature;	/* contains 'USBS' */
 	__u32	Tag;		/* same as original command */
 	__le32	Residue;	/* amount not transferred */
 	__u8	Status;		/* see below */
-- 
cgit v1.2.3


From de1df26b7cef702a32ae876ed45c1112f523df48 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 5 Feb 2016 02:37:42 +0100
Subject: cpufreq: Clean up default and fallback governor setup

The preprocessor magic used for setting the default cpufreq governor
(and for using the performance governor as a fallback one for that
matter) is really nasty, so replace it with __weak functions and
overrides.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Saravana Kannan <skannan@codeaurora.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c              | 37 +++++++++++++++++++---------------
 drivers/cpufreq/cpufreq_conservative.c | 10 +++++----
 drivers/cpufreq/cpufreq_ondemand.c     | 36 ++++++++++++++++-----------------
 drivers/cpufreq/cpufreq_performance.c  | 18 +++++++++++++----
 drivers/cpufreq/cpufreq_powersave.c    | 10 +++++----
 drivers/cpufreq/cpufreq_userspace.c    | 10 +++++----
 include/linux/cpufreq.h                | 25 ++---------------------
 7 files changed, 73 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index e979ec78b695..34b17447e0d1 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -959,6 +959,11 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
 	return cpufreq_add_dev_symlink(policy);
 }
 
+__weak struct cpufreq_governor *cpufreq_default_governor(void)
+{
+	return NULL;
+}
+
 static int cpufreq_init_policy(struct cpufreq_policy *policy)
 {
 	struct cpufreq_governor *gov = NULL;
@@ -968,11 +973,14 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
 
 	/* Update governor of new_policy to the governor used before hotplug */
 	gov = find_governor(policy->last_governor);
-	if (gov)
+	if (gov) {
 		pr_debug("Restoring governor %s for cpu %d\n",
 				policy->governor->name, policy->cpu);
-	else
-		gov = CPUFREQ_DEFAULT_GOVERNOR;
+	} else {
+		gov = cpufreq_default_governor();
+		if (!gov)
+			return -ENODATA;
+	}
 
 	new_policy.governor = gov;
 
@@ -1920,21 +1928,16 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
 }
 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
 
+__weak struct cpufreq_governor *cpufreq_fallback_governor(void)
+{
+	return NULL;
+}
+
 static int __cpufreq_governor(struct cpufreq_policy *policy,
 					unsigned int event)
 {
 	int ret;
 
-	/* Only must be defined when default governor is known to have latency
-	   restrictions, like e.g. conservative or ondemand.
-	   That this is the case is already ensured in Kconfig
-	*/
-#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
-	struct cpufreq_governor *gov = &cpufreq_gov_performance;
-#else
-	struct cpufreq_governor *gov = NULL;
-#endif
-
 	/* Don't start any governor operations if we are entering suspend */
 	if (cpufreq_suspended)
 		return 0;
@@ -1948,12 +1951,14 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 	if (policy->governor->max_transition_latency &&
 	    policy->cpuinfo.transition_latency >
 	    policy->governor->max_transition_latency) {
-		if (!gov)
-			return -EINVAL;
-		else {
+		struct cpufreq_governor *gov = cpufreq_fallback_governor();
+
+		if (gov) {
 			pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n",
 				policy->governor->name, gov->name);
 			policy->governor = gov;
+		} else {
+			return -EINVAL;
 		}
 	}
 
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 606ad74abe6e..8504a70a4785 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -26,10 +26,7 @@ static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info);
 static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy,
 				   unsigned int event);
 
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
-static
-#endif
-struct cpufreq_governor cpufreq_gov_conservative = {
+static struct cpufreq_governor cpufreq_gov_conservative = {
 	.name			= "conservative",
 	.governor		= cs_cpufreq_governor_dbs,
 	.max_transition_latency	= TRANSITION_LATENCY_LIMIT,
@@ -399,6 +396,11 @@ MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for "
 MODULE_LICENSE("GPL");
 
 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+	return &cpufreq_gov_conservative;
+}
+
 fs_initcall(cpufreq_gov_dbs_init);
 #else
 module_init(cpufreq_gov_dbs_init);
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index eae51070c034..929e193ac1c1 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -31,9 +31,7 @@ static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info);
 
 static struct od_ops od_ops;
 
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
 static struct cpufreq_governor cpufreq_gov_ondemand;
-#endif
 
 static unsigned int default_powersave_bias;
 
@@ -554,6 +552,19 @@ static struct common_dbs_data od_dbs_cdata = {
 	.mutex = __MUTEX_INITIALIZER(od_dbs_cdata.mutex),
 };
 
+static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy,
+		unsigned int event)
+{
+	return cpufreq_governor_dbs(policy, &od_dbs_cdata, event);
+}
+
+static struct cpufreq_governor cpufreq_gov_ondemand = {
+	.name			= "ondemand",
+	.governor		= od_cpufreq_governor_dbs,
+	.max_transition_latency	= TRANSITION_LATENCY_LIMIT,
+	.owner			= THIS_MODULE,
+};
+
 static void od_set_powersave_bias(unsigned int powersave_bias)
 {
 	struct cpufreq_policy *policy;
@@ -605,22 +616,6 @@ void od_unregister_powersave_bias_handler(void)
 }
 EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler);
 
-static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy,
-		unsigned int event)
-{
-	return cpufreq_governor_dbs(policy, &od_dbs_cdata, event);
-}
-
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
-static
-#endif
-struct cpufreq_governor cpufreq_gov_ondemand = {
-	.name			= "ondemand",
-	.governor		= od_cpufreq_governor_dbs,
-	.max_transition_latency	= TRANSITION_LATENCY_LIMIT,
-	.owner			= THIS_MODULE,
-};
-
 static int __init cpufreq_gov_dbs_init(void)
 {
 	return cpufreq_register_governor(&cpufreq_gov_ondemand);
@@ -638,6 +633,11 @@ MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
 MODULE_LICENSE("GPL");
 
 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+	return &cpufreq_gov_ondemand;
+}
+
 fs_initcall(cpufreq_gov_dbs_init);
 #else
 module_init(cpufreq_gov_dbs_init);
diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c
index cf117deb39b1..af9f4b96f5a8 100644
--- a/drivers/cpufreq/cpufreq_performance.c
+++ b/drivers/cpufreq/cpufreq_performance.c
@@ -33,10 +33,7 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy,
 	return 0;
 }
 
-#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE
-static
-#endif
-struct cpufreq_governor cpufreq_gov_performance = {
+static struct cpufreq_governor cpufreq_gov_performance = {
 	.name		= "performance",
 	.governor	= cpufreq_governor_performance,
 	.owner		= THIS_MODULE,
@@ -52,6 +49,19 @@ static void __exit cpufreq_gov_performance_exit(void)
 	cpufreq_unregister_governor(&cpufreq_gov_performance);
 }
 
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+	return &cpufreq_gov_performance;
+}
+#endif
+#ifndef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE
+struct cpufreq_governor *cpufreq_fallback_governor(void)
+{
+	return &cpufreq_gov_performance;
+}
+#endif
+
 MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("CPUfreq policy governor 'performance'");
 MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c
index e3b874c235ea..b8b400232a74 100644
--- a/drivers/cpufreq/cpufreq_powersave.c
+++ b/drivers/cpufreq/cpufreq_powersave.c
@@ -33,10 +33,7 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy,
 	return 0;
 }
 
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE
-static
-#endif
-struct cpufreq_governor cpufreq_gov_powersave = {
+static struct cpufreq_governor cpufreq_gov_powersave = {
 	.name		= "powersave",
 	.governor	= cpufreq_governor_powersave,
 	.owner		= THIS_MODULE,
@@ -57,6 +54,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'");
 MODULE_LICENSE("GPL");
 
 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+	return &cpufreq_gov_powersave;
+}
+
 fs_initcall(cpufreq_gov_powersave_init);
 #else
 module_init(cpufreq_gov_powersave_init);
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
index 4dbf1db16aca..4d16f45ee1da 100644
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -89,10 +89,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 	return rc;
 }
 
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE
-static
-#endif
-struct cpufreq_governor cpufreq_gov_userspace = {
+static struct cpufreq_governor cpufreq_gov_userspace = {
 	.name		= "userspace",
 	.governor	= cpufreq_governor_userspace,
 	.store_setspeed	= cpufreq_set,
@@ -116,6 +113,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'");
 MODULE_LICENSE("GPL");
 
 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+	return &cpufreq_gov_userspace;
+}
+
 fs_initcall(cpufreq_gov_userspace_init);
 #else
 module_init(cpufreq_gov_userspace_init);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 88a4215125bc..d0bf555b6bbf 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -464,29 +464,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 int cpufreq_register_governor(struct cpufreq_governor *governor);
 void cpufreq_unregister_governor(struct cpufreq_governor *governor);
 
-/* CPUFREQ DEFAULT GOVERNOR */
-/*
- * Performance governor is fallback governor if any other gov failed to auto
- * load due latency restrictions
- */
-#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
-extern struct cpufreq_governor cpufreq_gov_performance;
-#endif
-#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
-#define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_performance)
-#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE)
-extern struct cpufreq_governor cpufreq_gov_powersave;
-#define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_powersave)
-#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE)
-extern struct cpufreq_governor cpufreq_gov_userspace;
-#define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_userspace)
-#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND)
-extern struct cpufreq_governor cpufreq_gov_ondemand;
-#define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_ondemand)
-#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE)
-extern struct cpufreq_governor cpufreq_gov_conservative;
-#define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_conservative)
-#endif
+struct cpufreq_governor *cpufreq_default_governor(void);
+struct cpufreq_governor *cpufreq_fallback_governor(void);
 
 /*********************************************************************
  *                     FREQUENCY TABLE HELPERS                       *
-- 
cgit v1.2.3


From b7aad8e2685b0aa58295bc4250c8476c9c7193eb Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Wed, 30 Dec 2015 09:55:45 +0900
Subject: extcon: palmas: Add the support for VBUS detection by using GPIO

This patch support for VBUS detection by using GPIO pin.

Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/extcon/extcon-palmas.c | 50 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/palmas.h     |  3 +++
 2 files changed, 53 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-palmas.c b/drivers/extcon/extcon-palmas.c
index 93c30a885740..885ee95a6a7b 100644
--- a/drivers/extcon/extcon-palmas.c
+++ b/drivers/extcon/extcon-palmas.c
@@ -216,11 +216,23 @@ static int palmas_usb_probe(struct platform_device *pdev)
 		return PTR_ERR(palmas_usb->id_gpiod);
 	}
 
+	palmas_usb->vbus_gpiod = devm_gpiod_get_optional(&pdev->dev, "vbus",
+							GPIOD_IN);
+	if (IS_ERR(palmas_usb->vbus_gpiod)) {
+		dev_err(&pdev->dev, "failed to get vbus gpio\n");
+		return PTR_ERR(palmas_usb->vbus_gpiod);
+	}
+
 	if (palmas_usb->enable_id_detection && palmas_usb->id_gpiod) {
 		palmas_usb->enable_id_detection = false;
 		palmas_usb->enable_gpio_id_detection = true;
 	}
 
+	if (palmas_usb->enable_vbus_detection && palmas_usb->vbus_gpiod) {
+		palmas_usb->enable_vbus_detection = false;
+		palmas_usb->enable_gpio_vbus_detection = true;
+	}
+
 	if (palmas_usb->enable_gpio_id_detection) {
 		u32 debounce;
 
@@ -311,6 +323,40 @@ static int palmas_usb_probe(struct platform_device *pdev)
 					palmas_usb->vbus_irq, status);
 			return status;
 		}
+	} else if (palmas_usb->enable_gpio_vbus_detection) {
+		/* remux GPIO_1 as VBUSDET */
+		status = palmas_update_bits(palmas,
+			PALMAS_PU_PD_OD_BASE,
+			PALMAS_PRIMARY_SECONDARY_PAD1,
+			PALMAS_PRIMARY_SECONDARY_PAD1_GPIO_1_MASK,
+			(1 << PALMAS_PRIMARY_SECONDARY_PAD1_GPIO_1_SHIFT));
+		if (status < 0) {
+			dev_err(&pdev->dev, "can't remux GPIO1\n");
+			return status;
+		}
+
+		palmas_usb->vbus_otg_irq = regmap_irq_get_virq(palmas->irq_data,
+						       PALMAS_VBUS_OTG_IRQ);
+		palmas_usb->gpio_vbus_irq = gpiod_to_irq(palmas_usb->vbus_gpiod);
+		if (palmas_usb->gpio_vbus_irq < 0) {
+			dev_err(&pdev->dev, "failed to get vbus irq\n");
+			return palmas_usb->gpio_vbus_irq;
+		}
+		status = devm_request_threaded_irq(&pdev->dev,
+						palmas_usb->gpio_vbus_irq,
+						NULL,
+						palmas_vbus_irq_handler,
+						IRQF_TRIGGER_FALLING |
+						IRQF_TRIGGER_RISING |
+						IRQF_ONESHOT |
+						IRQF_EARLY_RESUME,
+						"palmas_usb_vbus",
+						palmas_usb);
+		if (status < 0) {
+			dev_err(&pdev->dev,
+				"failed to request handler for vbus irq\n");
+			return status;
+		}
 	}
 
 	palmas_enable_irq(palmas_usb);
@@ -337,6 +383,8 @@ static int palmas_usb_suspend(struct device *dev)
 	if (device_may_wakeup(dev)) {
 		if (palmas_usb->enable_vbus_detection)
 			enable_irq_wake(palmas_usb->vbus_irq);
+		if (palmas_usb->enable_gpio_vbus_detection)
+			enable_irq_wake(palmas_usb->gpio_vbus_irq);
 		if (palmas_usb->enable_id_detection)
 			enable_irq_wake(palmas_usb->id_irq);
 		if (palmas_usb->enable_gpio_id_detection)
@@ -352,6 +400,8 @@ static int palmas_usb_resume(struct device *dev)
 	if (device_may_wakeup(dev)) {
 		if (palmas_usb->enable_vbus_detection)
 			disable_irq_wake(palmas_usb->vbus_irq);
+		if (palmas_usb->enable_gpio_vbus_detection)
+			disable_irq_wake(palmas_usb->gpio_vbus_irq);
 		if (palmas_usb->enable_id_detection)
 			disable_irq_wake(palmas_usb->id_irq);
 		if (palmas_usb->enable_gpio_id_detection)
diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index c800dbc42079..5c9a1d44c125 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -580,7 +580,9 @@ struct palmas_usb {
 	int vbus_irq;
 
 	int gpio_id_irq;
+	int gpio_vbus_irq;
 	struct gpio_desc *id_gpiod;
+	struct gpio_desc *vbus_gpiod;
 	unsigned long sw_debounce_jiffies;
 	struct delayed_work wq_detectid;
 
@@ -589,6 +591,7 @@ struct palmas_usb {
 	bool enable_vbus_detection;
 	bool enable_id_detection;
 	bool enable_gpio_id_detection;
+	bool enable_gpio_vbus_detection;
 };
 
 #define comparator_to_palmas(x) container_of((x), struct palmas_usb, comparator)
-- 
cgit v1.2.3


From 7e6213f4345c3798b7fb7af41d221e2fd77ec6a6 Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <contact@paulk.fr>
Date: Fri, 5 Feb 2016 19:42:19 +0100
Subject: regulator: lp872x: Add enable GPIO pin support

LP872x regulators are made active via the EN pin, which might be hooked to a
GPIO. This adds support for driving the GPIO high when the driver is in use.

Signed-off-by: Paul Kocialkowski <contact@paulk.fr>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/regulator/lp872x.txt       |  1 +
 drivers/regulator/lp872x.c                         | 34 ++++++++++++++++++++++
 include/linux/regulator/lp872x.h                   |  5 ++++
 3 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/regulator/lp872x.txt b/Documentation/devicetree/bindings/regulator/lp872x.txt
index 78183182dad9..ca58a68ffdf1 100644
--- a/Documentation/devicetree/bindings/regulator/lp872x.txt
+++ b/Documentation/devicetree/bindings/regulator/lp872x.txt
@@ -28,6 +28,7 @@ Optional properties:
   - ti,dvs-gpio: GPIO specifier for external DVS pin control of LP872x devices.
   - ti,dvs-vsel: DVS selector. 0 = SEL_V1, 1 = SEL_V2.
   - ti,dvs-state: initial DVS pin state. 0 = DVS_LOW, 1 = DVS_HIGH.
+  - enable-gpios: GPIO specifier for EN pin control of LP872x devices.
 
   Sub nodes for regulator_init_data
     LP8720 has maximum 6 nodes. (child name: ldo1 ~ 5 and buck)
diff --git a/drivers/regulator/lp872x.c b/drivers/regulator/lp872x.c
index 21c49d871aba..38992112fd6e 100644
--- a/drivers/regulator/lp872x.c
+++ b/drivers/regulator/lp872x.c
@@ -15,6 +15,7 @@
 #include <linux/regmap.h>
 #include <linux/err.h>
 #include <linux/gpio.h>
+#include <linux/delay.h>
 #include <linux/regulator/lp872x.h>
 #include <linux/regulator/driver.h>
 #include <linux/platform_device.h>
@@ -757,6 +758,33 @@ set_default_dvs_mode:
 				default_dvs_mode[lp->chipid]);
 }
 
+static int lp872x_hw_enable(struct lp872x *lp)
+{
+	int ret, gpio;
+
+	if (!lp->pdata)
+		return -EINVAL;
+
+	gpio = lp->pdata->enable_gpio;
+	if (!gpio_is_valid(gpio))
+		return 0;
+
+	/* Always set enable GPIO high. */
+	ret = devm_gpio_request_one(lp->dev, gpio, GPIOF_OUT_INIT_HIGH, "LP872X EN");
+	if (ret) {
+		dev_err(lp->dev, "gpio request err: %d\n", ret);
+		return ret;
+	}
+
+	/* Each chip has a different enable delay. */
+	if (lp->chipid == LP8720)
+		usleep_range(LP8720_ENABLE_DELAY, 1.5 * LP8720_ENABLE_DELAY);
+	else
+		usleep_range(LP8725_ENABLE_DELAY, 1.5 * LP8725_ENABLE_DELAY);
+
+	return 0;
+}
+
 static int lp872x_config(struct lp872x *lp)
 {
 	struct lp872x_platform_data *pdata = lp->pdata;
@@ -875,6 +903,8 @@ static struct lp872x_platform_data
 	of_property_read_u8(np, "ti,dvs-state", &dvs_state);
 	pdata->dvs->init_state = dvs_state ? DVS_HIGH : DVS_LOW;
 
+	pdata->enable_gpio = of_get_named_gpio(np, "enable-gpios", 0);
+
 	if (of_get_child_count(np) == 0)
 		goto out;
 
@@ -948,6 +978,10 @@ static int lp872x_probe(struct i2c_client *cl, const struct i2c_device_id *id)
 	lp->chipid = id->driver_data;
 	i2c_set_clientdata(cl, lp);
 
+	ret = lp872x_hw_enable(lp);
+	if (ret)
+		return ret;
+
 	ret = lp872x_config(lp);
 	if (ret)
 		return ret;
diff --git a/include/linux/regulator/lp872x.h b/include/linux/regulator/lp872x.h
index 132e05c46661..6029279f4eed 100644
--- a/include/linux/regulator/lp872x.h
+++ b/include/linux/regulator/lp872x.h
@@ -18,6 +18,9 @@
 
 #define LP872X_MAX_REGULATORS		9
 
+#define LP8720_ENABLE_DELAY		200
+#define LP8725_ENABLE_DELAY		30000
+
 enum lp872x_regulator_id {
 	LP8720_ID_BASE,
 	LP8720_ID_LDO1 = LP8720_ID_BASE,
@@ -79,12 +82,14 @@ struct lp872x_regulator_data {
  * @update_config     : if LP872X_GENERAL_CFG register is updated, set true
  * @regulator_data    : platform regulator id and init data
  * @dvs               : dvs data for buck voltage control
+ * @enable_gpio       : gpio pin number for enable control
  */
 struct lp872x_platform_data {
 	u8 general_config;
 	bool update_config;
 	struct lp872x_regulator_data regulator_data[LP872X_MAX_REGULATORS];
 	struct lp872x_dvs *dvs;
+	int enable_gpio;
 };
 
 #endif
-- 
cgit v1.2.3


From 5bbe029ff7bc8ff82ef31d3480d68c95642a3c7a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 5 Feb 2016 14:57:47 -0600
Subject: PCI: Move pci_set_flags() from asm-generic/pci-bridge.h to
 linux/pci.h

The PCI flag management constants and functions were previously declared in
include/asm-generic/pci-bridge.h.  But they are not specific to bridges,
and arches did not include pci-bridge.h consistently.

Move the following interfaces and related constants to include/linux/pci.h
and remove pci-bridge.h:

  pci_set_flags()
  pci_add_flags()
  pci_clear_flags()
  pci_has_flag()

This fixes these warnings when building for some arches:

  drivers/pci/host/pcie-designware.c:562:20: error: 'PCI_PROBE_ONLY' undeclared (first use in this function)
  drivers/pci/host/pcie-designware.c:562:7: error: implicit declaration of function 'pci_has_flag' [-Werror=implicit-function-declaration]

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/probe.c              |  7 +++++
 include/asm-generic/pci-bridge.h | 65 ----------------------------------------
 include/linux/pci.h              | 22 ++++++++++++++
 3 files changed, 29 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 6d7ab9bb0d5a..ead1ac1dc1e3 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1803,6 +1803,13 @@ static int only_one_child(struct pci_bus *bus)
 		return 0;
 	if (pci_pcie_type(parent) == PCI_EXP_TYPE_ROOT_PORT)
 		return 1;
+
+	/*
+	 * PCIe downstream ports are bridges that normally lead to only a
+	 * device 0, but if PCI_SCAN_ALL_PCIE_DEVS is set, scan all
+	 * possible devices, not just device 0.  See PCIe spec r3.0,
+	 * sec 7.3.1.
+	 */
 	if (parent->has_secondary_link &&
 	    !pci_has_flag(PCI_SCAN_ALL_PCIE_DEVS))
 		return 1;
diff --git a/include/asm-generic/pci-bridge.h b/include/asm-generic/pci-bridge.h
index 20db2e5a0a69..1b1d82f66d57 100644
--- a/include/asm-generic/pci-bridge.h
+++ b/include/asm-generic/pci-bridge.h
@@ -6,69 +6,4 @@
  */
 #ifndef _ASM_GENERIC_PCI_BRIDGE_H
 #define _ASM_GENERIC_PCI_BRIDGE_H
-
-#ifdef __KERNEL__
-
-enum {
-	/* Force re-assigning all resources (ignore firmware
-	 * setup completely)
-	 */
-	PCI_REASSIGN_ALL_RSRC	= 0x00000001,
-
-	/* Re-assign all bus numbers */
-	PCI_REASSIGN_ALL_BUS	= 0x00000002,
-
-	/* Do not try to assign, just use existing setup */
-	PCI_PROBE_ONLY		= 0x00000004,
-
-	/* Don't bother with ISA alignment unless the bridge has
-	 * ISA forwarding enabled
-	 */
-	PCI_CAN_SKIP_ISA_ALIGN	= 0x00000008,
-
-	/* Enable domain numbers in /proc */
-	PCI_ENABLE_PROC_DOMAINS	= 0x00000010,
-	/* ... except for domain 0 */
-	PCI_COMPAT_DOMAIN_0	= 0x00000020,
-
-	/* PCIe downstream ports are bridges that normally lead to only a
-	 * device 0, but if this is set, we scan all possible devices, not
-	 * just device 0.
-	 */
-	PCI_SCAN_ALL_PCIE_DEVS	= 0x00000040,
-};
-
-#ifdef CONFIG_PCI
-extern unsigned int pci_flags;
-
-static inline void pci_set_flags(int flags)
-{
-	pci_flags = flags;
-}
-
-static inline void pci_add_flags(int flags)
-{
-	pci_flags |= flags;
-}
-
-static inline void pci_clear_flags(int flags)
-{
-	pci_flags &= ~flags;
-}
-
-static inline int pci_has_flag(int flag)
-{
-	return pci_flags & flag;
-}
-#else
-static inline void pci_set_flags(int flags) { }
-static inline void pci_add_flags(int flags) { }
-static inline void pci_clear_flags(int flags) { }
-static inline int pci_has_flag(int flag)
-{
-	return 0;
-}
-#endif	/* CONFIG_PCI */
-
-#endif	/* __KERNEL__ */
 #endif	/* _ASM_GENERIC_PCI_BRIDGE_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 27df4a6585da..3d371c150753 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -746,9 +746,26 @@ struct pci_driver {
 	.vendor = PCI_VENDOR_ID_##vend, .device = (dev), \
 	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0
 
+enum {
+	PCI_REASSIGN_ALL_RSRC	= 0x00000001,	/* ignore firmware setup */
+	PCI_REASSIGN_ALL_BUS	= 0x00000002,	/* reassign all bus numbers */
+	PCI_PROBE_ONLY		= 0x00000004,	/* use existing setup */
+	PCI_CAN_SKIP_ISA_ALIGN	= 0x00000008,	/* don't do ISA alignment */
+	PCI_ENABLE_PROC_DOMAINS	= 0x00000010,	/* enable domains in /proc */
+	PCI_COMPAT_DOMAIN_0	= 0x00000020,	/* ... except domain 0 */
+	PCI_SCAN_ALL_PCIE_DEVS	= 0x00000040,	/* scan all, not just dev 0 */
+};
+
 /* these external functions are only available when PCI support is enabled */
 #ifdef CONFIG_PCI
 
+extern unsigned int pci_flags;
+
+static inline void pci_set_flags(int flags) { pci_flags = flags; }
+static inline void pci_add_flags(int flags) { pci_flags |= flags; }
+static inline void pci_clear_flags(int flags) { pci_flags &= ~flags; }
+static inline int pci_has_flag(int flag) { return pci_flags & flag; }
+
 void pcie_bus_configure_settings(struct pci_bus *bus);
 
 enum pcie_bus_config_types {
@@ -1405,6 +1422,11 @@ void pci_register_set_vga_state(arch_set_vga_state_t func);
 
 #else /* CONFIG_PCI is not enabled */
 
+static inline void pci_set_flags(int flags) { }
+static inline void pci_add_flags(int flags) { }
+static inline void pci_clear_flags(int flags) { }
+static inline int pci_has_flag(int flag) { return 0; }
+
 /*
  *  If the system does not have PCI, clearly these return errors.  Define
  *  these as simple inline functions to avoid hair in drivers.
-- 
cgit v1.2.3


From 80b14d5e61ca6d08e46b4fc72baf6e4f738b30ce Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 14 Feb 2015 20:31:59 -0500
Subject: SUNRPC: Add a structure to track multiple transports

In order to support multipathing/trunking we will need the ability to
track multiple transports. This patch sets up a basic structure for
doing so.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/xprt.h          |   5 +
 include/linux/sunrpc/xprtmultipath.h |  69 +++++
 net/sunrpc/Makefile                  |   3 +-
 net/sunrpc/xprt.c                    |   1 +
 net/sunrpc/xprtmultipath.c           | 475 +++++++++++++++++++++++++++++++++++
 5 files changed, 552 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sunrpc/xprtmultipath.h
 create mode 100644 net/sunrpc/xprtmultipath.c

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 83218129ff28..fb0d212e0d3a 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -197,6 +197,11 @@ struct rpc_xprt {
 						   transport */
 	unsigned int		bind_index;	/* bind function index */
 
+	/*
+	 * Multipath
+	 */
+	struct list_head	xprt_switch;
+
 	/*
 	 * Connection of transports
 	 */
diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h
new file mode 100644
index 000000000000..5a9acffa41be
--- /dev/null
+++ b/include/linux/sunrpc/xprtmultipath.h
@@ -0,0 +1,69 @@
+/*
+ * RPC client multipathing definitions
+ *
+ * Copyright (c) 2015, 2016, Primary Data, Inc. All rights reserved.
+ *
+ * Trond Myklebust <trond.myklebust@primarydata.com>
+ */
+#ifndef _NET_SUNRPC_XPRTMULTIPATH_H
+#define _NET_SUNRPC_XPRTMULTIPATH_H
+
+struct rpc_xprt_iter_ops;
+struct rpc_xprt_switch {
+	spinlock_t		xps_lock;
+	struct kref		xps_kref;
+
+	unsigned int		xps_nxprts;
+	struct list_head	xps_xprt_list;
+
+	struct net *		xps_net;
+
+	const struct rpc_xprt_iter_ops *xps_iter_ops;
+
+	struct rcu_head		xps_rcu;
+};
+
+struct rpc_xprt_iter {
+	struct rpc_xprt_switch __rcu *xpi_xpswitch;
+	struct rpc_xprt *	xpi_cursor;
+
+	const struct rpc_xprt_iter_ops *xpi_ops;
+};
+
+
+struct rpc_xprt_iter_ops {
+	void (*xpi_rewind)(struct rpc_xprt_iter *);
+	struct rpc_xprt *(*xpi_xprt)(struct rpc_xprt_iter *);
+	struct rpc_xprt *(*xpi_next)(struct rpc_xprt_iter *);
+};
+
+extern struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt,
+		gfp_t gfp_flags);
+
+extern struct rpc_xprt_switch *xprt_switch_get(struct rpc_xprt_switch *xps);
+extern void xprt_switch_put(struct rpc_xprt_switch *xps);
+
+extern void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps);
+
+extern void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
+		struct rpc_xprt *xprt);
+extern void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps,
+		struct rpc_xprt *xprt);
+
+extern void xprt_iter_init(struct rpc_xprt_iter *xpi,
+		struct rpc_xprt_switch *xps);
+
+extern void xprt_iter_init_listall(struct rpc_xprt_iter *xpi,
+		struct rpc_xprt_switch *xps);
+
+extern void xprt_iter_destroy(struct rpc_xprt_iter *xpi);
+
+extern struct rpc_xprt_switch *xprt_iter_xchg_switch(
+		struct rpc_xprt_iter *xpi,
+		struct rpc_xprt_switch *newswitch);
+
+extern struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi);
+extern struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi);
+extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi);
+
+#endif
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index b512fbd9d79a..ea7ffa12e0f9 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -12,7 +12,8 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
 	    addr.o rpcb_clnt.o timer.o xdr.o \
 	    sunrpc_syms.o cache.o rpc_pipe.o \
-	    svc_xprt.o
+	    svc_xprt.o \
+	    xprtmultipath.o
 sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o
 sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 605858699f6c..323b332f8f7c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1319,6 +1319,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
 	spin_lock_init(&xprt->bc_pa_lock);
 	INIT_LIST_HEAD(&xprt->bc_pa_list);
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
+	INIT_LIST_HEAD(&xprt->xprt_switch);
 
 	xprt->last_used = jiffies;
 	xprt->cwnd = RPC_INITCWND;
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
new file mode 100644
index 000000000000..e7fd76975d86
--- /dev/null
+++ b/net/sunrpc/xprtmultipath.c
@@ -0,0 +1,475 @@
+/*
+ * Multipath support for RPC
+ *
+ * Copyright (c) 2015, 2016, Primary Data, Inc. All rights reserved.
+ *
+ * Trond Myklebust <trond.myklebust@primarydata.com>
+ *
+ */
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <asm/cmpxchg.h>
+#include <linux/spinlock.h>
+#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/xprtmultipath.h>
+
+typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct list_head *head,
+		const struct rpc_xprt *cur);
+
+static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular;
+static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin;
+static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall;
+
+static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps,
+		struct rpc_xprt *xprt)
+{
+	if (unlikely(xprt_get(xprt) == NULL))
+		return;
+	list_add_tail_rcu(&xprt->xprt_switch, &xps->xps_xprt_list);
+	smp_wmb();
+	if (xps->xps_nxprts == 0)
+		xps->xps_net = xprt->xprt_net;
+	xps->xps_nxprts++;
+}
+
+/**
+ * rpc_xprt_switch_add_xprt - Add a new rpc_xprt to an rpc_xprt_switch
+ * @xps: pointer to struct rpc_xprt_switch
+ * @xprt: pointer to struct rpc_xprt
+ *
+ * Adds xprt to the end of the list of struct rpc_xprt in xps.
+ */
+void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
+		struct rpc_xprt *xprt)
+{
+	if (xprt == NULL)
+		return;
+	spin_lock(&xps->xps_lock);
+	if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL)
+		xprt_switch_add_xprt_locked(xps, xprt);
+	spin_unlock(&xps->xps_lock);
+}
+
+static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
+		struct rpc_xprt *xprt)
+{
+	if (unlikely(xprt == NULL))
+		return;
+	xps->xps_nxprts--;
+	if (xps->xps_nxprts == 0)
+		xps->xps_net = NULL;
+	smp_wmb();
+	list_del_rcu(&xprt->xprt_switch);
+}
+
+/**
+ * rpc_xprt_switch_remove_xprt - Removes an rpc_xprt from a rpc_xprt_switch
+ * @xps: pointer to struct rpc_xprt_switch
+ * @xprt: pointer to struct rpc_xprt
+ *
+ * Removes xprt from the list of struct rpc_xprt in xps.
+ */
+void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps,
+		struct rpc_xprt *xprt)
+{
+	spin_lock(&xps->xps_lock);
+	xprt_switch_remove_xprt_locked(xps, xprt);
+	spin_unlock(&xps->xps_lock);
+	xprt_put(xprt);
+}
+
+/**
+ * xprt_switch_alloc - Allocate a new struct rpc_xprt_switch
+ * @xprt: pointer to struct rpc_xprt
+ * @gfp_flags: allocation flags
+ *
+ * On success, returns an initialised struct rpc_xprt_switch, containing
+ * the entry xprt. Returns NULL on failure.
+ */
+struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt,
+		gfp_t gfp_flags)
+{
+	struct rpc_xprt_switch *xps;
+
+	xps = kmalloc(sizeof(*xps), gfp_flags);
+	if (xps != NULL) {
+		spin_lock_init(&xps->xps_lock);
+		kref_init(&xps->xps_kref);
+		xps->xps_nxprts = 0;
+		INIT_LIST_HEAD(&xps->xps_xprt_list);
+		xps->xps_iter_ops = &rpc_xprt_iter_singular;
+		xprt_switch_add_xprt_locked(xps, xprt);
+	}
+
+	return xps;
+}
+
+static void xprt_switch_free_entries(struct rpc_xprt_switch *xps)
+{
+	spin_lock(&xps->xps_lock);
+	while (!list_empty(&xps->xps_xprt_list)) {
+		struct rpc_xprt *xprt;
+
+		xprt = list_first_entry(&xps->xps_xprt_list,
+				struct rpc_xprt, xprt_switch);
+		xprt_switch_remove_xprt_locked(xps, xprt);
+		spin_unlock(&xps->xps_lock);
+		xprt_put(xprt);
+		spin_lock(&xps->xps_lock);
+	}
+	spin_unlock(&xps->xps_lock);
+}
+
+static void xprt_switch_free(struct kref *kref)
+{
+	struct rpc_xprt_switch *xps = container_of(kref,
+			struct rpc_xprt_switch, xps_kref);
+
+	xprt_switch_free_entries(xps);
+	kfree_rcu(xps, xps_rcu);
+}
+
+/**
+ * xprt_switch_get - Return a reference to a rpc_xprt_switch
+ * @xps: pointer to struct rpc_xprt_switch
+ *
+ * Returns a reference to xps unless the refcount is already zero.
+ */
+struct rpc_xprt_switch *xprt_switch_get(struct rpc_xprt_switch *xps)
+{
+	if (xps != NULL && kref_get_unless_zero(&xps->xps_kref))
+		return xps;
+	return NULL;
+}
+
+/**
+ * xprt_switch_put - Release a reference to a rpc_xprt_switch
+ * @xps: pointer to struct rpc_xprt_switch
+ *
+ * Release the reference to xps, and free it once the refcount is zero.
+ */
+void xprt_switch_put(struct rpc_xprt_switch *xps)
+{
+	if (xps != NULL)
+		kref_put(&xps->xps_kref, xprt_switch_free);
+}
+
+/**
+ * rpc_xprt_switch_set_roundrobin - Set a round-robin policy on rpc_xprt_switch
+ * @xps: pointer to struct rpc_xprt_switch
+ *
+ * Sets a round-robin default policy for iterators acting on xps.
+ */
+void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps)
+{
+	if (READ_ONCE(xps->xps_iter_ops) != &rpc_xprt_iter_roundrobin)
+		WRITE_ONCE(xps->xps_iter_ops, &rpc_xprt_iter_roundrobin);
+}
+
+static
+const struct rpc_xprt_iter_ops *xprt_iter_ops(const struct rpc_xprt_iter *xpi)
+{
+	if (xpi->xpi_ops != NULL)
+		return xpi->xpi_ops;
+	return rcu_dereference(xpi->xpi_xpswitch)->xps_iter_ops;
+}
+
+static
+void xprt_iter_no_rewind(struct rpc_xprt_iter *xpi)
+{
+}
+
+static
+void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi)
+{
+	WRITE_ONCE(xpi->xpi_cursor, NULL);
+}
+
+static
+struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head)
+{
+	return list_first_or_null_rcu(head, struct rpc_xprt, xprt_switch);
+}
+
+static
+struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi)
+{
+	struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
+
+	if (xps == NULL)
+		return NULL;
+	return xprt_switch_find_first_entry(&xps->xps_xprt_list);
+}
+
+static
+struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head,
+		const struct rpc_xprt *cur)
+{
+	struct rpc_xprt *pos;
+
+	list_for_each_entry_rcu(pos, head, xprt_switch) {
+		if (cur == pos)
+			return pos;
+	}
+	return NULL;
+}
+
+static
+struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi)
+{
+	struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
+	struct list_head *head;
+
+	if (xps == NULL)
+		return NULL;
+	head = &xps->xps_xprt_list;
+	if (xpi->xpi_cursor == NULL || xps->xps_nxprts < 2)
+		return xprt_switch_find_first_entry(head);
+	return xprt_switch_find_current_entry(head, xpi->xpi_cursor);
+}
+
+static
+struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
+		const struct rpc_xprt *cur)
+{
+	struct rpc_xprt *pos, *prev = NULL;
+
+	list_for_each_entry_rcu(pos, head, xprt_switch) {
+		if (cur == prev)
+			return pos;
+		prev = pos;
+	}
+	return NULL;
+}
+
+static
+struct rpc_xprt *xprt_switch_set_next_cursor(struct list_head *head,
+		struct rpc_xprt **cursor,
+		xprt_switch_find_xprt_t find_next)
+{
+	struct rpc_xprt *cur, *pos, *old;
+
+	cur = READ_ONCE(*cursor);
+	for (;;) {
+		old = cur;
+		pos = find_next(head, old);
+		if (pos == NULL)
+			break;
+		cur = cmpxchg_relaxed(cursor, old, pos);
+		if (cur == old)
+			break;
+	}
+	return pos;
+}
+
+static
+struct rpc_xprt *xprt_iter_next_entry_multiple(struct rpc_xprt_iter *xpi,
+		xprt_switch_find_xprt_t find_next)
+{
+	struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
+	struct list_head *head;
+
+	if (xps == NULL)
+		return NULL;
+	head = &xps->xps_xprt_list;
+	if (xps->xps_nxprts < 2)
+		return xprt_switch_find_first_entry(head);
+	return xprt_switch_set_next_cursor(head, &xpi->xpi_cursor, find_next);
+}
+
+static
+struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct list_head *head,
+		const struct rpc_xprt *cur)
+{
+	struct rpc_xprt *ret;
+
+	ret = xprt_switch_find_next_entry(head, cur);
+	if (ret != NULL)
+		return ret;
+	return xprt_switch_find_first_entry(head);
+}
+
+static
+struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi)
+{
+	return xprt_iter_next_entry_multiple(xpi,
+			xprt_switch_find_next_entry_roundrobin);
+}
+
+static
+struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi)
+{
+	return xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry);
+}
+
+/*
+ * xprt_iter_rewind - Resets the xprt iterator
+ * @xpi: pointer to rpc_xprt_iter
+ *
+ * Resets xpi to ensure that it points to the first entry in the list
+ * of transports.
+ */
+static
+void xprt_iter_rewind(struct rpc_xprt_iter *xpi)
+{
+	rcu_read_lock();
+	xprt_iter_ops(xpi)->xpi_rewind(xpi);
+	rcu_read_unlock();
+}
+
+static void __xprt_iter_init(struct rpc_xprt_iter *xpi,
+		struct rpc_xprt_switch *xps,
+		const struct rpc_xprt_iter_ops *ops)
+{
+	rcu_assign_pointer(xpi->xpi_xpswitch, xprt_switch_get(xps));
+	xpi->xpi_cursor = NULL;
+	xpi->xpi_ops = ops;
+}
+
+/**
+ * xprt_iter_init - Initialise an xprt iterator
+ * @xpi: pointer to rpc_xprt_iter
+ * @xps: pointer to rpc_xprt_switch
+ *
+ * Initialises the iterator to use the default iterator ops
+ * as set in xps. This function is mainly intended for internal
+ * use in the rpc_client.
+ */
+void xprt_iter_init(struct rpc_xprt_iter *xpi,
+		struct rpc_xprt_switch *xps)
+{
+	__xprt_iter_init(xpi, xps, NULL);
+}
+
+/**
+ * xprt_iter_init_listall - Initialise an xprt iterator
+ * @xpi: pointer to rpc_xprt_iter
+ * @xps: pointer to rpc_xprt_switch
+ *
+ * Initialises the iterator to iterate once through the entire list
+ * of entries in xps.
+ */
+void xprt_iter_init_listall(struct rpc_xprt_iter *xpi,
+		struct rpc_xprt_switch *xps)
+{
+	__xprt_iter_init(xpi, xps, &rpc_xprt_iter_listall);
+}
+
+/**
+ * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch
+ * @xpi: pointer to rpc_xprt_iter
+ * @xps: pointer to a new rpc_xprt_switch or NULL
+ *
+ * Swaps out the existing xpi->xpi_xpswitch with a new value.
+ */
+struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi,
+		struct rpc_xprt_switch *newswitch)
+{
+	struct rpc_xprt_switch __rcu *oldswitch;
+
+	/* Atomically swap out the old xpswitch */
+	oldswitch = xchg(&xpi->xpi_xpswitch, RCU_INITIALIZER(newswitch));
+	if (newswitch != NULL)
+		xprt_iter_rewind(xpi);
+	return rcu_dereference_protected(oldswitch, true);
+}
+
+/**
+ * xprt_iter_destroy - Destroys the xprt iterator
+ * @xpi pointer to rpc_xprt_iter
+ */
+void xprt_iter_destroy(struct rpc_xprt_iter *xpi)
+{
+	xprt_switch_put(xprt_iter_xchg_switch(xpi, NULL));
+}
+
+/**
+ * xprt_iter_xprt - Returns the rpc_xprt pointed to by the cursor
+ * @xpi: pointer to rpc_xprt_iter
+ *
+ * Returns a pointer to the struct rpc_xprt that is currently
+ * pointed to by the cursor.
+ * Caller must be holding rcu_read_lock().
+ */
+struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	return xprt_iter_ops(xpi)->xpi_xprt(xpi);
+}
+
+static
+struct rpc_xprt *xprt_iter_get_helper(struct rpc_xprt_iter *xpi,
+		struct rpc_xprt *(*fn)(struct rpc_xprt_iter *))
+{
+	struct rpc_xprt *ret;
+
+	do {
+		ret = fn(xpi);
+		if (ret == NULL)
+			break;
+		ret = xprt_get(ret);
+	} while (ret == NULL);
+	return ret;
+}
+
+/**
+ * xprt_iter_get_xprt - Returns the rpc_xprt pointed to by the cursor
+ * @xpi: pointer to rpc_xprt_iter
+ *
+ * Returns a reference to the struct rpc_xprt that is currently
+ * pointed to by the cursor.
+ */
+struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi)
+{
+	struct rpc_xprt *xprt;
+
+	rcu_read_lock();
+	xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_xprt);
+	rcu_read_unlock();
+	return xprt;
+}
+
+/**
+ * xprt_iter_get_next - Returns the next rpc_xprt following the cursor
+ * @xpi: pointer to rpc_xprt_iter
+ *
+ * Returns a reference to the struct rpc_xprt that immediately follows the
+ * entry pointed to by the cursor.
+ */
+struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi)
+{
+	struct rpc_xprt *xprt;
+
+	rcu_read_lock();
+	xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_next);
+	rcu_read_unlock();
+	return xprt;
+}
+
+/* Policy for always returning the first entry in the rpc_xprt_switch */
+static
+const struct rpc_xprt_iter_ops rpc_xprt_iter_singular = {
+	.xpi_rewind = xprt_iter_no_rewind,
+	.xpi_xprt = xprt_iter_first_entry,
+	.xpi_next = xprt_iter_first_entry,
+};
+
+/* Policy for round-robin iteration of entries in the rpc_xprt_switch */
+static
+const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin = {
+	.xpi_rewind = xprt_iter_default_rewind,
+	.xpi_xprt = xprt_iter_current_entry,
+	.xpi_next = xprt_iter_next_entry_roundrobin,
+};
+
+/* Policy for once-through iteration of entries in the rpc_xprt_switch */
+static
+const struct rpc_xprt_iter_ops rpc_xprt_iter_listall = {
+	.xpi_rewind = xprt_iter_default_rewind,
+	.xpi_xprt = xprt_iter_current_entry,
+	.xpi_next = xprt_iter_next_entry_all,
+};
-- 
cgit v1.2.3


From ad01b2c68d0ac5f3d4a3f807bec77b720b1c62a0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 30 Jan 2016 14:17:26 -0500
Subject: SUNRPC: Make rpc_clnt store the multipath iterators

This is a pre-patch for the RPC multipath code. It sets up the storage in
struct rpc_clnt for the multipath code.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/clnt.h    |  2 ++
 net/sunrpc/auth_gss/auth_gss.c |  4 ++--
 net/sunrpc/clnt.c              | 32 +++++++++++++++++++++++++++++---
 net/sunrpc/rpcb_clnt.c         |  4 ++--
 4 files changed, 35 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 0c5c2cbe96c9..1713e41d65ae 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -25,6 +25,7 @@
 #include <asm/signal.h>
 #include <linux/path.h>
 #include <net/ipv6.h>
+#include <linux/sunrpc/xprtmultipath.h>
 
 struct rpc_inode;
 
@@ -67,6 +68,7 @@ struct rpc_clnt {
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	struct dentry		*cl_debugfs;	/* debugfs directory */
 #endif
+	struct rpc_xprt_iter	cl_xpi;
 };
 
 /*
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 799e65b944b9..3ce391cb80c4 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1181,12 +1181,12 @@ static struct rpc_auth *
 gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	struct gss_auth *gss_auth;
-	struct rpc_xprt *xprt = rcu_access_pointer(clnt->cl_xprt);
+	struct rpc_xprt_switch *xps = rcu_access_pointer(clnt->cl_xpi.xpi_xpswitch);
 
 	while (clnt != clnt->cl_parent) {
 		struct rpc_clnt *parent = clnt->cl_parent;
 		/* Find the original parent for this transport */
-		if (rcu_access_pointer(parent->cl_xprt) != xprt)
+		if (rcu_access_pointer(parent->cl_xpi.xpi_xpswitch) != xps)
 			break;
 		clnt = parent;
 	}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 2b4ad7aa40c6..625fb8a184c6 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -354,6 +354,7 @@ static void rpc_free_clid(struct rpc_clnt *clnt)
 }
 
 static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
+		struct rpc_xprt_switch *xps,
 		struct rpc_xprt *xprt,
 		struct rpc_clnt *parent)
 {
@@ -411,6 +412,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
 	}
 
 	rpc_clnt_set_transport(clnt, xprt, timeout);
+	xprt_iter_init(&clnt->cl_xpi, xps);
+	xprt_switch_put(xps);
 
 	clnt->cl_rtt = &clnt->cl_rtt_default;
 	rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
@@ -438,6 +441,7 @@ out_no_clid:
 out_err:
 	rpciod_down();
 out_no_rpciod:
+	xprt_switch_put(xps);
 	xprt_put(xprt);
 	return ERR_PTR(err);
 }
@@ -446,8 +450,13 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
 					struct rpc_xprt *xprt)
 {
 	struct rpc_clnt *clnt = NULL;
+	struct rpc_xprt_switch *xps;
 
-	clnt = rpc_new_client(args, xprt, NULL);
+	xps = xprt_switch_alloc(xprt, GFP_KERNEL);
+	if (xps == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	clnt = rpc_new_client(args, xps, xprt, NULL);
 	if (IS_ERR(clnt))
 		return clnt;
 
@@ -564,6 +573,7 @@ EXPORT_SYMBOL_GPL(rpc_create);
 static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
 					   struct rpc_clnt *clnt)
 {
+	struct rpc_xprt_switch *xps;
 	struct rpc_xprt *xprt;
 	struct rpc_clnt *new;
 	int err;
@@ -571,13 +581,17 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
 	err = -ENOMEM;
 	rcu_read_lock();
 	xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+	xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
 	rcu_read_unlock();
-	if (xprt == NULL)
+	if (xprt == NULL || xps == NULL) {
+		xprt_put(xprt);
+		xprt_switch_put(xps);
 		goto out_err;
+	}
 	args->servername = xprt->servername;
 	args->nodename = clnt->cl_nodename;
 
-	new = rpc_new_client(args, xprt, clnt);
+	new = rpc_new_client(args, xps, xprt, clnt);
 	if (IS_ERR(new)) {
 		err = PTR_ERR(new);
 		goto out_err;
@@ -657,6 +671,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
 {
 	const struct rpc_timeout *old_timeo;
 	rpc_authflavor_t pseudoflavor;
+	struct rpc_xprt_switch *xps, *oldxps;
 	struct rpc_xprt *xprt, *old;
 	struct rpc_clnt *parent;
 	int err;
@@ -668,10 +683,17 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
 		return PTR_ERR(xprt);
 	}
 
+	xps = xprt_switch_alloc(xprt, GFP_KERNEL);
+	if (xps == NULL) {
+		xprt_put(xprt);
+		return -ENOMEM;
+	}
+
 	pseudoflavor = clnt->cl_auth->au_flavor;
 
 	old_timeo = clnt->cl_timeout;
 	old = rpc_clnt_set_transport(clnt, xprt, timeout);
+	oldxps = xprt_iter_xchg_switch(&clnt->cl_xpi, xps);
 
 	rpc_unregister_client(clnt);
 	__rpc_clnt_remove_pipedir(clnt);
@@ -697,14 +719,17 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
 	synchronize_rcu();
 	if (parent != clnt)
 		rpc_release_client(parent);
+	xprt_switch_put(oldxps);
 	xprt_put(old);
 	dprintk("RPC:       replaced xprt for clnt %p\n", clnt);
 	return 0;
 
 out_revert:
+	xps = xprt_iter_xchg_switch(&clnt->cl_xpi, oldxps);
 	rpc_clnt_set_transport(clnt, old, old_timeo);
 	clnt->cl_parent = parent;
 	rpc_client_register(clnt, pseudoflavor, NULL);
+	xprt_switch_put(xps);
 	xprt_put(xprt);
 	dprintk("RPC:       failed to switch xprt for clnt %p\n", clnt);
 	return err;
@@ -783,6 +808,7 @@ rpc_free_client(struct rpc_clnt *clnt)
 	rpc_free_iostats(clnt->cl_metrics);
 	clnt->cl_metrics = NULL;
 	xprt_put(rcu_dereference_raw(clnt->cl_xprt));
+	xprt_iter_destroy(&clnt->cl_xpi);
 	rpciod_down();
 	rpc_free_clid(clnt);
 	kfree(clnt);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index cf5770d8f49a..44f025c150d8 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -648,10 +648,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
 static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
 {
 	struct rpc_clnt *parent = clnt->cl_parent;
-	struct rpc_xprt *xprt = rcu_dereference(clnt->cl_xprt);
+	struct rpc_xprt_switch *xps = rcu_access_pointer(clnt->cl_xpi.xpi_xpswitch);
 
 	while (parent != clnt) {
-		if (rcu_dereference(parent->cl_xprt) != xprt)
+		if (rcu_access_pointer(parent->cl_xpi.xpi_xpswitch) != xps)
 			break;
 		if (clnt->cl_autobind)
 			break;
-- 
cgit v1.2.3


From fb43d17210baa538e58fc83d2d0f8a32399db73b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 30 Jan 2016 16:39:26 -0500
Subject: SUNRPC: Use the multipath iterator to assign a transport to each task

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/sched.h |  2 ++
 net/sunrpc/clnt.c            | 38 ++++++++++++++++++++------------------
 net/sunrpc/rpcb_clnt.c       |  6 ++----
 net/sunrpc/xprt.c            | 14 +++-----------
 net/sunrpc/xprtsock.c        |  4 +---
 5 files changed, 28 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index ee0fbcf9b02e..0b248e98ee3b 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -69,6 +69,8 @@ struct rpc_task {
 	const struct rpc_call_ops *tk_ops;	/* Caller callbacks */
 
 	struct rpc_clnt *	tk_client;	/* RPC client */
+	struct rpc_xprt *	tk_xprt;	/* Transport */
+
 	struct rpc_rqst *	tk_rqstp;	/* RPC request */
 
 	struct workqueue_struct	*tk_workqueue;	/* Normally rpciod, but could
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 625fb8a184c6..8e46fa5a2ab1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -894,6 +894,7 @@ EXPORT_SYMBOL_GPL(rpc_bind_new_program);
 void rpc_task_release_client(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt = task->tk_client;
+	struct rpc_xprt *xprt = task->tk_xprt;
 
 	if (clnt != NULL) {
 		/* Remove from client task list */
@@ -904,13 +905,22 @@ void rpc_task_release_client(struct rpc_task *task)
 
 		rpc_release_client(clnt);
 	}
+
+	if (xprt != NULL) {
+		task->tk_xprt = NULL;
+
+		xprt_put(xprt);
+	}
 }
 
 static
 void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
 {
+
 	if (clnt != NULL) {
 		rpc_task_release_client(task);
+		if (task->tk_xprt == NULL)
+			task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
 		task->tk_client = clnt;
 		atomic_inc(&clnt->cl_count);
 		if (clnt->cl_softrtry)
@@ -2122,11 +2132,9 @@ call_timeout(struct rpc_task *task)
 	}
 	if (RPC_IS_SOFT(task)) {
 		if (clnt->cl_chatty) {
-			rcu_read_lock();
 			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
 				clnt->cl_program->name,
-				rcu_dereference(clnt->cl_xprt)->servername);
-			rcu_read_unlock();
+				task->tk_xprt->servername);
 		}
 		if (task->tk_flags & RPC_TASK_TIMEOUT)
 			rpc_exit(task, -ETIMEDOUT);
@@ -2138,11 +2146,9 @@ call_timeout(struct rpc_task *task)
 	if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
 		task->tk_flags |= RPC_CALL_MAJORSEEN;
 		if (clnt->cl_chatty) {
-			rcu_read_lock();
 			printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
 			clnt->cl_program->name,
-			rcu_dereference(clnt->cl_xprt)->servername);
-			rcu_read_unlock();
+			task->tk_xprt->servername);
 		}
 	}
 	rpc_force_rebind(clnt);
@@ -2172,11 +2178,9 @@ call_decode(struct rpc_task *task)
 
 	if (task->tk_flags & RPC_CALL_MAJORSEEN) {
 		if (clnt->cl_chatty) {
-			rcu_read_lock();
 			printk(KERN_NOTICE "%s: server %s OK\n",
 				clnt->cl_program->name,
-				rcu_dereference(clnt->cl_xprt)->servername);
-			rcu_read_unlock();
+				task->tk_xprt->servername);
 		}
 		task->tk_flags &= ~RPC_CALL_MAJORSEEN;
 	}
@@ -2330,11 +2334,9 @@ rpc_verify_header(struct rpc_task *task)
 			task->tk_action = call_bind;
 			goto out_retry;
 		case RPC_AUTH_TOOWEAK:
-			rcu_read_lock();
 			printk(KERN_NOTICE "RPC: server %s requires stronger "
 			       "authentication.\n",
-			       rcu_dereference(clnt->cl_xprt)->servername);
-			rcu_read_unlock();
+			       task->tk_xprt->servername);
 			break;
 		default:
 			dprintk("RPC: %5u %s: unknown auth error: %x\n",
@@ -2359,27 +2361,27 @@ rpc_verify_header(struct rpc_task *task)
 	case RPC_SUCCESS:
 		return p;
 	case RPC_PROG_UNAVAIL:
-		dprintk_rcu("RPC: %5u %s: program %u is unsupported "
+		dprintk("RPC: %5u %s: program %u is unsupported "
 				"by server %s\n", task->tk_pid, __func__,
 				(unsigned int)clnt->cl_prog,
-				rcu_dereference(clnt->cl_xprt)->servername);
+				task->tk_xprt->servername);
 		error = -EPFNOSUPPORT;
 		goto out_err;
 	case RPC_PROG_MISMATCH:
-		dprintk_rcu("RPC: %5u %s: program %u, version %u unsupported "
+		dprintk("RPC: %5u %s: program %u, version %u unsupported "
 				"by server %s\n", task->tk_pid, __func__,
 				(unsigned int)clnt->cl_prog,
 				(unsigned int)clnt->cl_vers,
-				rcu_dereference(clnt->cl_xprt)->servername);
+				task->tk_xprt->servername);
 		error = -EPROTONOSUPPORT;
 		goto out_err;
 	case RPC_PROC_UNAVAIL:
-		dprintk_rcu("RPC: %5u %s: proc %s unsupported by program %u, "
+		dprintk("RPC: %5u %s: proc %s unsupported by program %u, "
 				"version %u on server %s\n",
 				task->tk_pid, __func__,
 				rpc_proc_name(task),
 				clnt->cl_prog, clnt->cl_vers,
-				rcu_dereference(clnt->cl_xprt)->servername);
+				task->tk_xprt->servername);
 		error = -EOPNOTSUPP;
 		goto out_err;
 	case RPC_GARBAGE_ARGS:
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 44f025c150d8..5b30603596d0 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -683,11 +683,9 @@ void rpcb_getport_async(struct rpc_task *task)
 	int status;
 
 	rcu_read_lock();
-	do {
-		clnt = rpcb_find_transport_owner(task->tk_client);
-		xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
-	} while (xprt == NULL);
+	clnt = rpcb_find_transport_owner(task->tk_client);
 	rcu_read_unlock();
+	xprt = xprt_get(task->tk_xprt);
 
 	dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
 		task->tk_pid, __func__,
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 323b332f8f7c..216a1385718a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1181,7 +1181,7 @@ EXPORT_SYMBOL_GPL(xprt_free);
  */
 void xprt_reserve(struct rpc_task *task)
 {
-	struct rpc_xprt	*xprt;
+	struct rpc_xprt *xprt = task->tk_xprt;
 
 	task->tk_status = 0;
 	if (task->tk_rqstp != NULL)
@@ -1189,11 +1189,8 @@ void xprt_reserve(struct rpc_task *task)
 
 	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
-	rcu_read_lock();
-	xprt = rcu_dereference(task->tk_client->cl_xprt);
 	if (!xprt_throttle_congested(xprt, task))
 		xprt->ops->alloc_slot(xprt, task);
-	rcu_read_unlock();
 }
 
 /**
@@ -1207,7 +1204,7 @@ void xprt_reserve(struct rpc_task *task)
  */
 void xprt_retry_reserve(struct rpc_task *task)
 {
-	struct rpc_xprt	*xprt;
+	struct rpc_xprt *xprt = task->tk_xprt;
 
 	task->tk_status = 0;
 	if (task->tk_rqstp != NULL)
@@ -1215,10 +1212,7 @@ void xprt_retry_reserve(struct rpc_task *task)
 
 	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
-	rcu_read_lock();
-	xprt = rcu_dereference(task->tk_client->cl_xprt);
 	xprt->ops->alloc_slot(xprt, task);
-	rcu_read_unlock();
 }
 
 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
@@ -1265,11 +1259,9 @@ void xprt_release(struct rpc_task *task)
 
 	if (req == NULL) {
 		if (task->tk_client) {
-			rcu_read_lock();
-			xprt = rcu_dereference(task->tk_client->cl_xprt);
+			xprt = task->tk_xprt;
 			if (xprt->snd_task == task)
 				xprt_release_write(xprt, task);
-			rcu_read_unlock();
 		}
 		return;
 	}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fde2138b81e7..65e759569e48 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1844,9 +1844,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
  */
 static void xs_local_rpcbind(struct rpc_task *task)
 {
-	rcu_read_lock();
-	xprt_set_bound(rcu_dereference(task->tk_client->cl_xprt));
-	rcu_read_unlock();
+	xprt_set_bound(task->tk_xprt);
 }
 
 static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port)
-- 
cgit v1.2.3


From 9d61498d5f6cde68a708781bf2cd33cae21121dc Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 30 Jan 2016 18:13:05 -0500
Subject: SUNRPC: Allow caller to specify the transport to use

This is needed in order to allow the NFSv4.1 backchannel and
BIND_CONN_TO_SESSION function to work.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/sched.h | 1 +
 net/sunrpc/sched.c           | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 0b248e98ee3b..05a1809c44d9 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -103,6 +103,7 @@ struct rpc_call_ops {
 struct rpc_task_setup {
 	struct rpc_task *task;
 	struct rpc_clnt *rpc_client;
+	struct rpc_xprt *rpc_xprt;
 	const struct rpc_message *rpc_message;
 	const struct rpc_call_ops *callback_ops;
 	void *callback_data;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 73ad57a59989..fcfd48d263f6 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -909,6 +909,8 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
 	/* Initialize workqueue for async tasks */
 	task->tk_workqueue = task_setup_data->workqueue;
 
+	task->tk_xprt = xprt_get(task_setup_data->rpc_xprt);
+
 	if (task->tk_ops->rpc_call_prepare != NULL)
 		task->tk_action = rpc_prepare_task;
 
-- 
cgit v1.2.3


From 3227886c6500e76c17f3b864ff9b7dd84f636a99 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 30 Jan 2016 20:39:19 -0500
Subject: SUNRPC: Add a helper to apply a function to all the rpc_clnt's
 transports

Add a helper for tasks that require us to apply a function to all the
transports in an rpc_clnt.
An example of a usecase would be BIND_CONN_TO_SESSION, where we want
to send one RPC call down each transport.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/clnt.h |  4 ++++
 net/sunrpc/clnt.c           | 51 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 1713e41d65ae..d6510f64a361 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -182,6 +182,10 @@ size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
 const char	*rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
 int		rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t);
 
+int 		rpc_clnt_iterate_for_each_xprt(struct rpc_clnt *clnt,
+			int (*fn)(struct rpc_clnt *, struct rpc_xprt *, void *),
+			void *data);
+
 const char *rpc_proc_name(const struct rpc_task *task);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8e46fa5a2ab1..19feb4d5d796 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -736,6 +736,57 @@ out_revert:
 }
 EXPORT_SYMBOL_GPL(rpc_switch_client_transport);
 
+static
+int rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi)
+{
+	struct rpc_xprt_switch *xps;
+
+	rcu_read_lock();
+	xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+	rcu_read_unlock();
+	if (xps == NULL)
+		return -EAGAIN;
+	xprt_iter_init_listall(xpi, xps);
+	xprt_switch_put(xps);
+	return 0;
+}
+
+/**
+ * rpc_clnt_iterate_for_each_xprt - Apply a function to all transports
+ * @clnt: pointer to client
+ * @fn: function to apply
+ * @data: void pointer to function data
+ *
+ * Iterates through the list of RPC transports currently attached to the
+ * client and applies the function fn(clnt, xprt, data).
+ *
+ * On error, the iteration stops, and the function returns the error value.
+ */
+int rpc_clnt_iterate_for_each_xprt(struct rpc_clnt *clnt,
+		int (*fn)(struct rpc_clnt *, struct rpc_xprt *, void *),
+		void *data)
+{
+	struct rpc_xprt_iter xpi;
+	int ret;
+
+	ret = rpc_clnt_xprt_iter_init(clnt, &xpi);
+	if (ret)
+		return ret;
+	for (;;) {
+		struct rpc_xprt *xprt = xprt_iter_get_next(&xpi);
+
+		if (!xprt)
+			break;
+		ret = fn(clnt, xprt, data);
+		xprt_put(xprt);
+		if (ret < 0)
+			break;
+	}
+	xprt_iter_destroy(&xpi);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_iterate_for_each_xprt);
+
 /*
  * Kill all tasks for the given client.
  * XXX: kill their descendants as well?
-- 
cgit v1.2.3


From 7f554890587c63ca4c087d6bcc4a9fe368e57484 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 30 Jan 2016 23:43:35 -0500
Subject: SUNRPC: Allow addition of new transports to a struct rpc_clnt

Add a function to allow creation and addition of a new transport
to an existing rpc_clnt

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/clnt.h |  11 ++++
 net/sunrpc/clnt.c           | 133 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 142 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index d6510f64a361..9a7ddbaf116e 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -186,6 +186,17 @@ int 		rpc_clnt_iterate_for_each_xprt(struct rpc_clnt *clnt,
 			int (*fn)(struct rpc_clnt *, struct rpc_xprt *, void *),
 			void *data);
 
+int 		rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
+			struct rpc_xprt_switch *xps,
+			struct rpc_xprt *xprt,
+			void *dummy);
+int		rpc_clnt_add_xprt(struct rpc_clnt *, struct xprt_create *,
+			int (*setup)(struct rpc_clnt *,
+				struct rpc_xprt_switch *,
+				struct rpc_xprt *,
+				void *),
+			void *data);
+
 const char *rpc_proc_name(const struct rpc_task *task);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 4b6ceb7c5e1d..7e0c9bf22df8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2492,7 +2492,10 @@ static int rpc_ping(struct rpc_clnt *clnt)
 	return err;
 }
 
-struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
+static
+struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
+		struct rpc_xprt *xprt, struct rpc_cred *cred, int flags,
+		const struct rpc_call_ops *ops, void *data)
 {
 	struct rpc_message msg = {
 		.rpc_proc = &rpcproc_null,
@@ -2500,14 +2503,140 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int
 	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = clnt,
+		.rpc_xprt = xprt,
 		.rpc_message = &msg,
-		.callback_ops = &rpc_default_ops,
+		.callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
+		.callback_data = data,
 		.flags = flags,
 	};
+
 	return rpc_run_task(&task_setup_data);
 }
+
+struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
+{
+	return rpc_call_null_helper(clnt, NULL, cred, flags, NULL, NULL);
+}
 EXPORT_SYMBOL_GPL(rpc_call_null);
 
+struct rpc_cb_add_xprt_calldata {
+	struct rpc_xprt_switch *xps;
+	struct rpc_xprt *xprt;
+};
+
+static void rpc_cb_add_xprt_done(struct rpc_task *task, void *calldata)
+{
+	struct rpc_cb_add_xprt_calldata *data = calldata;
+
+	if (task->tk_status == 0)
+		rpc_xprt_switch_add_xprt(data->xps, data->xprt);
+}
+
+static void rpc_cb_add_xprt_release(void *calldata)
+{
+	struct rpc_cb_add_xprt_calldata *data = calldata;
+
+	xprt_put(data->xprt);
+	xprt_switch_put(data->xps);
+	kfree(data);
+}
+
+const static struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
+	.rpc_call_done = rpc_cb_add_xprt_done,
+	.rpc_release = rpc_cb_add_xprt_release,
+};
+
+/**
+ * rpc_clnt_test_and_add_xprt - Test and add a new transport to a rpc_clnt
+ * @clnt: pointer to struct rpc_clnt
+ * @xps: pointer to struct rpc_xprt_switch,
+ * @xprt: pointer struct rpc_xprt
+ * @dummy: unused
+ */
+int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
+		struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
+		void *dummy)
+{
+	struct rpc_cb_add_xprt_calldata *data;
+	struct rpc_cred *cred;
+	struct rpc_task *task;
+
+	data = kmalloc(sizeof(*data), GFP_NOFS);
+	if (!data)
+		return -ENOMEM;
+	data->xps = xprt_switch_get(xps);
+	data->xprt = xprt_get(xprt);
+
+	cred = authnull_ops.lookup_cred(NULL, NULL, 0);
+	task = rpc_call_null_helper(clnt, xprt, cred,
+			RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC,
+			&rpc_cb_add_xprt_call_ops, data);
+	put_rpccred(cred);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	rpc_put_task(task);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt);
+
+/**
+ * rpc_clnt_add_xprt - Add a new transport to a rpc_clnt
+ * @clnt: pointer to struct rpc_clnt
+ * @xprtargs: pointer to struct xprt_create
+ * @setup: callback to test and/or set up the connection
+ * @data: pointer to setup function data
+ *
+ * Creates a new transport using the parameters set in args and
+ * adds it to clnt.
+ * If ping is set, then test that connectivity succeeds before
+ * adding the new transport.
+ *
+ */
+int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
+		struct xprt_create *xprtargs,
+		int (*setup)(struct rpc_clnt *,
+			struct rpc_xprt_switch *,
+			struct rpc_xprt *,
+			void *),
+		void *data)
+{
+	struct rpc_xprt_switch *xps;
+	struct rpc_xprt *xprt;
+	unsigned char resvport;
+	int ret = 0;
+
+	rcu_read_lock();
+	xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+	xprt = xprt_iter_xprt(&clnt->cl_xpi);
+	if (xps == NULL || xprt == NULL) {
+		rcu_read_unlock();
+		return -EAGAIN;
+	}
+	resvport = xprt->resvport;
+	rcu_read_unlock();
+
+	xprt = xprt_create_transport(xprtargs);
+	if (IS_ERR(xprt)) {
+		ret = PTR_ERR(xprt);
+		goto out_put_switch;
+	}
+	xprt->resvport = resvport;
+
+	rpc_xprt_switch_set_roundrobin(xps);
+	if (setup) {
+		ret = setup(clnt, xps, xprt, data);
+		if (ret != 0)
+			goto out_put_xprt;
+	}
+	rpc_xprt_switch_add_xprt(xps, xprt);
+out_put_xprt:
+	xprt_put(xprt);
+out_put_switch:
+	xprt_switch_put(xps);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
+
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 static void rpc_show_header(void)
 {
-- 
cgit v1.2.3


From 896545098777564212b9e91af4c973f094649aa7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 1 Feb 2016 21:36:54 +0800
Subject: crypto: hash - Remove crypto_hash interface

This patch removes all traces of the crypto_hash interface, now
that everyone has switched over to shash or ahash.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/DocBook/crypto-API.tmpl |  13 --
 crypto/ahash.c                        |  18 ---
 crypto/shash.c                        | 147 --------------------
 include/crypto/algapi.h               |  18 ---
 include/crypto/internal/hash.h        |   3 -
 include/linux/crypto.h                | 251 ----------------------------------
 6 files changed, 450 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/crypto-API.tmpl b/Documentation/DocBook/crypto-API.tmpl
index 07df23ea06e4..866ff082272b 100644
--- a/Documentation/DocBook/crypto-API.tmpl
+++ b/Documentation/DocBook/crypto-API.tmpl
@@ -1761,19 +1761,6 @@ read(opfd, out, outlen);
 !Finclude/linux/crypto.h crypto_cipher_setkey
 !Finclude/linux/crypto.h crypto_cipher_encrypt_one
 !Finclude/linux/crypto.h crypto_cipher_decrypt_one
-   </sect1>
-   <sect1><title>Synchronous Message Digest API</title>
-!Pinclude/linux/crypto.h Synchronous Message Digest API
-!Finclude/linux/crypto.h crypto_alloc_hash
-!Finclude/linux/crypto.h crypto_free_hash
-!Finclude/linux/crypto.h crypto_has_hash
-!Finclude/linux/crypto.h crypto_hash_blocksize
-!Finclude/linux/crypto.h crypto_hash_digestsize
-!Finclude/linux/crypto.h crypto_hash_init
-!Finclude/linux/crypto.h crypto_hash_update
-!Finclude/linux/crypto.h crypto_hash_final
-!Finclude/linux/crypto.h crypto_hash_digest
-!Finclude/linux/crypto.h crypto_hash_setkey
    </sect1>
    <sect1><title>Message Digest Algorithm Definitions</title>
 !Pinclude/crypto/hash.h Message Digest Algorithm Definitions
diff --git a/crypto/ahash.c b/crypto/ahash.c
index 8b08a59221a6..5fc1f172963d 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -166,24 +166,6 @@ int crypto_ahash_walk_first(struct ahash_request *req,
 }
 EXPORT_SYMBOL_GPL(crypto_ahash_walk_first);
 
-int crypto_hash_walk_first_compat(struct hash_desc *hdesc,
-				  struct crypto_hash_walk *walk,
-				  struct scatterlist *sg, unsigned int len)
-{
-	walk->total = len;
-
-	if (!walk->total) {
-		walk->entrylen = 0;
-		return 0;
-	}
-
-	walk->alignmask = crypto_hash_alignmask(hdesc->tfm);
-	walk->sg = sg;
-	walk->flags = hdesc->flags & CRYPTO_TFM_REQ_MASK;
-
-	return hash_walk_new_entry(walk);
-}
-
 static int ahash_setkey_unaligned(struct crypto_ahash *tfm, const u8 *key,
 				unsigned int keylen)
 {
diff --git a/crypto/shash.c b/crypto/shash.c
index 88a27de79848..472bc141f50d 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -369,151 +369,6 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
 	return 0;
 }
 
-static int shash_compat_setkey(struct crypto_hash *tfm, const u8 *key,
-			       unsigned int keylen)
-{
-	struct shash_desc **descp = crypto_hash_ctx(tfm);
-	struct shash_desc *desc = *descp;
-
-	return crypto_shash_setkey(desc->tfm, key, keylen);
-}
-
-static int shash_compat_init(struct hash_desc *hdesc)
-{
-	struct shash_desc **descp = crypto_hash_ctx(hdesc->tfm);
-	struct shash_desc *desc = *descp;
-
-	desc->flags = hdesc->flags;
-
-	return crypto_shash_init(desc);
-}
-
-static int shash_compat_update(struct hash_desc *hdesc, struct scatterlist *sg,
-			       unsigned int len)
-{
-	struct shash_desc **descp = crypto_hash_ctx(hdesc->tfm);
-	struct shash_desc *desc = *descp;
-	struct crypto_hash_walk walk;
-	int nbytes;
-
-	for (nbytes = crypto_hash_walk_first_compat(hdesc, &walk, sg, len);
-	     nbytes > 0; nbytes = crypto_hash_walk_done(&walk, nbytes))
-		nbytes = crypto_shash_update(desc, walk.data, nbytes);
-
-	return nbytes;
-}
-
-static int shash_compat_final(struct hash_desc *hdesc, u8 *out)
-{
-	struct shash_desc **descp = crypto_hash_ctx(hdesc->tfm);
-
-	return crypto_shash_final(*descp, out);
-}
-
-static int shash_compat_digest(struct hash_desc *hdesc, struct scatterlist *sg,
-			       unsigned int nbytes, u8 *out)
-{
-	unsigned int offset = sg->offset;
-	int err;
-
-	if (nbytes < min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset)) {
-		struct shash_desc **descp = crypto_hash_ctx(hdesc->tfm);
-		struct shash_desc *desc = *descp;
-		void *data;
-
-		desc->flags = hdesc->flags;
-
-		data = kmap_atomic(sg_page(sg));
-		err = crypto_shash_digest(desc, data + offset, nbytes, out);
-		kunmap_atomic(data);
-		crypto_yield(desc->flags);
-		goto out;
-	}
-
-	err = shash_compat_init(hdesc);
-	if (err)
-		goto out;
-
-	err = shash_compat_update(hdesc, sg, nbytes);
-	if (err)
-		goto out;
-
-	err = shash_compat_final(hdesc, out);
-
-out:
-	return err;
-}
-
-static void crypto_exit_shash_ops_compat(struct crypto_tfm *tfm)
-{
-	struct shash_desc **descp = crypto_tfm_ctx(tfm);
-	struct shash_desc *desc = *descp;
-
-	crypto_free_shash(desc->tfm);
-	kzfree(desc);
-}
-
-static int crypto_init_shash_ops_compat(struct crypto_tfm *tfm)
-{
-	struct hash_tfm *crt = &tfm->crt_hash;
-	struct crypto_alg *calg = tfm->__crt_alg;
-	struct shash_alg *alg = __crypto_shash_alg(calg);
-	struct shash_desc **descp = crypto_tfm_ctx(tfm);
-	struct crypto_shash *shash;
-	struct shash_desc *desc;
-
-	if (!crypto_mod_get(calg))
-		return -EAGAIN;
-
-	shash = crypto_create_tfm(calg, &crypto_shash_type);
-	if (IS_ERR(shash)) {
-		crypto_mod_put(calg);
-		return PTR_ERR(shash);
-	}
-
-	desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(shash),
-		       GFP_KERNEL);
-	if (!desc) {
-		crypto_free_shash(shash);
-		return -ENOMEM;
-	}
-
-	*descp = desc;
-	desc->tfm = shash;
-	tfm->exit = crypto_exit_shash_ops_compat;
-
-	crt->init = shash_compat_init;
-	crt->update = shash_compat_update;
-	crt->final  = shash_compat_final;
-	crt->digest = shash_compat_digest;
-	crt->setkey = shash_compat_setkey;
-
-	crt->digestsize = alg->digestsize;
-
-	return 0;
-}
-
-static int crypto_init_shash_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
-{
-	switch (mask & CRYPTO_ALG_TYPE_MASK) {
-	case CRYPTO_ALG_TYPE_HASH_MASK:
-		return crypto_init_shash_ops_compat(tfm);
-	}
-
-	return -EINVAL;
-}
-
-static unsigned int crypto_shash_ctxsize(struct crypto_alg *alg, u32 type,
-					 u32 mask)
-{
-	switch (mask & CRYPTO_ALG_TYPE_MASK) {
-	case CRYPTO_ALG_TYPE_HASH_MASK:
-		return sizeof(struct shash_desc *);
-	}
-
-	return 0;
-}
-
 static int crypto_shash_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_shash *hash = __crypto_shash_cast(tfm);
@@ -560,9 +415,7 @@ static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg)
 }
 
 static const struct crypto_type crypto_shash_type = {
-	.ctxsize = crypto_shash_ctxsize,
 	.extsize = crypto_alg_extsize,
-	.init = crypto_init_shash_ops,
 	.init_tfm = crypto_shash_init_tfm,
 #ifdef CONFIG_PROC_FS
 	.show = crypto_shash_show,
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index b09d43f612e1..eeafd21afb44 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -349,24 +349,6 @@ static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm)
 	return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher;
 }
 
-static inline struct crypto_hash *crypto_spawn_hash(struct crypto_spawn *spawn)
-{
-	u32 type = CRYPTO_ALG_TYPE_HASH;
-	u32 mask = CRYPTO_ALG_TYPE_HASH_MASK;
-
-	return __crypto_hash_cast(crypto_spawn_tfm(spawn, type, mask));
-}
-
-static inline void *crypto_hash_ctx(struct crypto_hash *tfm)
-{
-	return crypto_tfm_ctx(&tfm->base);
-}
-
-static inline void *crypto_hash_ctx_aligned(struct crypto_hash *tfm)
-{
-	return crypto_tfm_ctx_aligned(&tfm->base);
-}
-
 static inline void blkcipher_walk_init(struct blkcipher_walk *walk,
 				       struct scatterlist *dst,
 				       struct scatterlist *src,
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 3b4af1d7c7e9..49dae16f8929 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -57,9 +57,6 @@ int crypto_hash_walk_first(struct ahash_request *req,
 			   struct crypto_hash_walk *walk);
 int crypto_ahash_walk_first(struct ahash_request *req,
 			   struct crypto_hash_walk *walk);
-int crypto_hash_walk_first_compat(struct hash_desc *hdesc,
-				  struct crypto_hash_walk *walk,
-				  struct scatterlist *sg, unsigned int len);
 
 static inline int crypto_ahash_walk_done(struct crypto_hash_walk *walk,
 					 int err)
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index ab2a745d85f3..99c94899ad0f 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -136,7 +136,6 @@ struct scatterlist;
 struct crypto_ablkcipher;
 struct crypto_async_request;
 struct crypto_blkcipher;
-struct crypto_hash;
 struct crypto_tfm;
 struct crypto_type;
 struct skcipher_givcrypt_request;
@@ -186,11 +185,6 @@ struct cipher_desc {
 	void *info;
 };
 
-struct hash_desc {
-	struct crypto_hash *tfm;
-	u32 flags;
-};
-
 /**
  * DOC: Block Cipher Algorithm Definitions
  *
@@ -518,18 +512,6 @@ struct cipher_tfm {
 	void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 };
 
-struct hash_tfm {
-	int (*init)(struct hash_desc *desc);
-	int (*update)(struct hash_desc *desc,
-		      struct scatterlist *sg, unsigned int nsg);
-	int (*final)(struct hash_desc *desc, u8 *out);
-	int (*digest)(struct hash_desc *desc, struct scatterlist *sg,
-		      unsigned int nsg, u8 *out);
-	int (*setkey)(struct crypto_hash *tfm, const u8 *key,
-		      unsigned int keylen);
-	unsigned int digestsize;
-};
-
 struct compress_tfm {
 	int (*cot_compress)(struct crypto_tfm *tfm,
 	                    const u8 *src, unsigned int slen,
@@ -542,7 +524,6 @@ struct compress_tfm {
 #define crt_ablkcipher	crt_u.ablkcipher
 #define crt_blkcipher	crt_u.blkcipher
 #define crt_cipher	crt_u.cipher
-#define crt_hash	crt_u.hash
 #define crt_compress	crt_u.compress
 
 struct crypto_tfm {
@@ -553,7 +534,6 @@ struct crypto_tfm {
 		struct ablkcipher_tfm ablkcipher;
 		struct blkcipher_tfm blkcipher;
 		struct cipher_tfm cipher;
-		struct hash_tfm hash;
 		struct compress_tfm compress;
 	} crt_u;
 
@@ -580,10 +560,6 @@ struct crypto_comp {
 	struct crypto_tfm base;
 };
 
-struct crypto_hash {
-	struct crypto_tfm base;
-};
-
 enum {
 	CRYPTOA_UNSPEC,
 	CRYPTOA_ALG,
@@ -1576,233 +1552,6 @@ static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm,
 						dst, src);
 }
 
-/**
- * DOC: Synchronous Message Digest API
- *
- * The synchronous message digest API is used with the ciphers of type
- * CRYPTO_ALG_TYPE_HASH (listed as type "hash" in /proc/crypto)
- */
-
-static inline struct crypto_hash *__crypto_hash_cast(struct crypto_tfm *tfm)
-{
-	return (struct crypto_hash *)tfm;
-}
-
-static inline struct crypto_hash *crypto_hash_cast(struct crypto_tfm *tfm)
-{
-	BUG_ON((crypto_tfm_alg_type(tfm) ^ CRYPTO_ALG_TYPE_HASH) &
-	       CRYPTO_ALG_TYPE_HASH_MASK);
-	return __crypto_hash_cast(tfm);
-}
-
-/**
- * crypto_alloc_hash() - allocate synchronous message digest handle
- * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
- *	      message digest cipher
- * @type: specifies the type of the cipher
- * @mask: specifies the mask for the cipher
- *
- * Allocate a cipher handle for a message digest. The returned struct
- * crypto_hash is the cipher handle that is required for any subsequent
- * API invocation for that message digest.
- *
- * Return: allocated cipher handle in case of success; IS_ERR() is true in case
- * of an error, PTR_ERR() returns the error code.
- */
-static inline struct crypto_hash *crypto_alloc_hash(const char *alg_name,
-						    u32 type, u32 mask)
-{
-	type &= ~CRYPTO_ALG_TYPE_MASK;
-	mask &= ~CRYPTO_ALG_TYPE_MASK;
-	type |= CRYPTO_ALG_TYPE_HASH;
-	mask |= CRYPTO_ALG_TYPE_HASH_MASK;
-
-	return __crypto_hash_cast(crypto_alloc_base(alg_name, type, mask));
-}
-
-static inline struct crypto_tfm *crypto_hash_tfm(struct crypto_hash *tfm)
-{
-	return &tfm->base;
-}
-
-/**
- * crypto_free_hash() - zeroize and free message digest handle
- * @tfm: cipher handle to be freed
- */
-static inline void crypto_free_hash(struct crypto_hash *tfm)
-{
-	crypto_free_tfm(crypto_hash_tfm(tfm));
-}
-
-/**
- * crypto_has_hash() - Search for the availability of a message digest
- * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
- *	      message digest cipher
- * @type: specifies the type of the cipher
- * @mask: specifies the mask for the cipher
- *
- * Return: true when the message digest cipher is known to the kernel crypto
- *	   API; false otherwise
- */
-static inline int crypto_has_hash(const char *alg_name, u32 type, u32 mask)
-{
-	type &= ~CRYPTO_ALG_TYPE_MASK;
-	mask &= ~CRYPTO_ALG_TYPE_MASK;
-	type |= CRYPTO_ALG_TYPE_HASH;
-	mask |= CRYPTO_ALG_TYPE_HASH_MASK;
-
-	return crypto_has_alg(alg_name, type, mask);
-}
-
-static inline struct hash_tfm *crypto_hash_crt(struct crypto_hash *tfm)
-{
-	return &crypto_hash_tfm(tfm)->crt_hash;
-}
-
-/**
- * crypto_hash_blocksize() - obtain block size for message digest
- * @tfm: cipher handle
- *
- * The block size for the message digest cipher referenced with the cipher
- * handle is returned.
- *
- * Return: block size of cipher
- */
-static inline unsigned int crypto_hash_blocksize(struct crypto_hash *tfm)
-{
-	return crypto_tfm_alg_blocksize(crypto_hash_tfm(tfm));
-}
-
-static inline unsigned int crypto_hash_alignmask(struct crypto_hash *tfm)
-{
-	return crypto_tfm_alg_alignmask(crypto_hash_tfm(tfm));
-}
-
-/**
- * crypto_hash_digestsize() - obtain message digest size
- * @tfm: cipher handle
- *
- * The size for the message digest created by the message digest cipher
- * referenced with the cipher handle is returned.
- *
- * Return: message digest size
- */
-static inline unsigned int crypto_hash_digestsize(struct crypto_hash *tfm)
-{
-	return crypto_hash_crt(tfm)->digestsize;
-}
-
-static inline u32 crypto_hash_get_flags(struct crypto_hash *tfm)
-{
-	return crypto_tfm_get_flags(crypto_hash_tfm(tfm));
-}
-
-static inline void crypto_hash_set_flags(struct crypto_hash *tfm, u32 flags)
-{
-	crypto_tfm_set_flags(crypto_hash_tfm(tfm), flags);
-}
-
-static inline void crypto_hash_clear_flags(struct crypto_hash *tfm, u32 flags)
-{
-	crypto_tfm_clear_flags(crypto_hash_tfm(tfm), flags);
-}
-
-/**
- * crypto_hash_init() - (re)initialize message digest handle
- * @desc: cipher request handle that to be filled by caller --
- *	  desc.tfm is filled with the hash cipher handle;
- *	  desc.flags is filled with either CRYPTO_TFM_REQ_MAY_SLEEP or 0.
- *
- * The call (re-)initializes the message digest referenced by the hash cipher
- * request handle. Any potentially existing state created by previous
- * operations is discarded.
- *
- * Return: 0 if the message digest initialization was successful; < 0 if an
- *	   error occurred
- */
-static inline int crypto_hash_init(struct hash_desc *desc)
-{
-	return crypto_hash_crt(desc->tfm)->init(desc);
-}
-
-/**
- * crypto_hash_update() - add data to message digest for processing
- * @desc: cipher request handle
- * @sg: scatter / gather list pointing to the data to be added to the message
- *      digest
- * @nbytes: number of bytes to be processed from @sg
- *
- * Updates the message digest state of the cipher handle pointed to by the
- * hash cipher request handle with the input data pointed to by the
- * scatter/gather list.
- *
- * Return: 0 if the message digest update was successful; < 0 if an error
- *	   occurred
- */
-static inline int crypto_hash_update(struct hash_desc *desc,
-				     struct scatterlist *sg,
-				     unsigned int nbytes)
-{
-	return crypto_hash_crt(desc->tfm)->update(desc, sg, nbytes);
-}
-
-/**
- * crypto_hash_final() - calculate message digest
- * @desc: cipher request handle
- * @out: message digest output buffer -- The caller must ensure that the out
- *	 buffer has a sufficient size (e.g. by using the crypto_hash_digestsize
- *	 function).
- *
- * Finalize the message digest operation and create the message digest
- * based on all data added to the cipher handle. The message digest is placed
- * into the output buffer.
- *
- * Return: 0 if the message digest creation was successful; < 0 if an error
- *	   occurred
- */
-static inline int crypto_hash_final(struct hash_desc *desc, u8 *out)
-{
-	return crypto_hash_crt(desc->tfm)->final(desc, out);
-}
-
-/**
- * crypto_hash_digest() - calculate message digest for a buffer
- * @desc: see crypto_hash_final()
- * @sg: see crypto_hash_update()
- * @nbytes:  see crypto_hash_update()
- * @out: see crypto_hash_final()
- *
- * This function is a "short-hand" for the function calls of crypto_hash_init,
- * crypto_hash_update and crypto_hash_final. The parameters have the same
- * meaning as discussed for those separate three functions.
- *
- * Return: 0 if the message digest creation was successful; < 0 if an error
- *	   occurred
- */
-static inline int crypto_hash_digest(struct hash_desc *desc,
-				     struct scatterlist *sg,
-				     unsigned int nbytes, u8 *out)
-{
-	return crypto_hash_crt(desc->tfm)->digest(desc, sg, nbytes, out);
-}
-
-/**
- * crypto_hash_setkey() - set key for message digest
- * @hash: cipher handle
- * @key: buffer holding the key
- * @keylen: length of the key in bytes
- *
- * The caller provided key is set for the message digest cipher. The cipher
- * handle must point to a keyed hash in order for this function to succeed.
- *
- * Return: 0 if the setting of the key was successful; < 0 if an error occurred
- */
-static inline int crypto_hash_setkey(struct crypto_hash *hash,
-				     const u8 *key, unsigned int keylen)
-{
-	return crypto_hash_crt(hash)->setkey(hash, key, keylen);
-}
-
 static inline struct crypto_comp *__crypto_comp_cast(struct crypto_tfm *tfm)
 {
 	return (struct crypto_comp *)tfm;
-- 
cgit v1.2.3


From 6e7333d315a768170a59ac771297ee0551bdddbf Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Mon, 1 Feb 2016 18:51:05 -0500
Subject: net: add rx_nohandler stat counter

This adds an rx_nohandler stat counter, along with a sysfs statistics
node, and copies the counter out via netlink as well.

CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@mellanox.com>
CC: Daniel Borkmann <daniel@iogearbox.net>
CC: Tom Herbert <tom@herbertland.com>
CC: Jay Vosburgh <j.vosburgh@gmail.com>
CC: Veaceslav Falico <vfalico@gmail.com>
CC: Andy Gospodarek <gospo@cumulusnetworks.com>
CC: netdev@vger.kernel.org
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h    | 3 +++
 include/uapi/linux/if_link.h | 4 ++++
 net/core/dev.c               | 6 +++++-
 net/core/net-sysfs.c         | 2 ++
 net/core/rtnetlink.c         | 2 ++
 5 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 289c2314d766..78a20cec2a0a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1397,6 +1397,8 @@ enum netdev_priv_flags {
  *			do not use this in drivers
  *	@tx_dropped:	Dropped packets by core network,
  *			do not use this in drivers
+ *	@rx_nohandler:	nohandler dropped packets by core network on
+ *			inactive devices, do not use this in drivers
  *
  *	@wireless_handlers:	List of functions to handle Wireless Extensions,
  *				instead of ioctl,
@@ -1611,6 +1613,7 @@ struct net_device {
 
 	atomic_long_t		rx_dropped;
 	atomic_long_t		tx_dropped;
+	atomic_long_t		rx_nohandler;
 
 #ifdef CONFIG_WIRELESS_EXT
 	const struct iw_handler_def *	wireless_handlers;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index a30b78090594..d3e90b91e07e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -35,6 +35,8 @@ struct rtnl_link_stats {
 	/* for cslip etc */
 	__u32	rx_compressed;
 	__u32	tx_compressed;
+
+	__u32	rx_nohandler;		/* dropped, no handler found	*/
 };
 
 /* The main device statistics structure */
@@ -68,6 +70,8 @@ struct rtnl_link_stats64 {
 	/* for cslip etc */
 	__u64	rx_compressed;
 	__u64	tx_compressed;
+
+	__u64	rx_nohandler;		/* dropped, no handler found	*/
 };
 
 /* The struct should be in sync with struct ifmap */
diff --git a/net/core/dev.c b/net/core/dev.c
index 65863e512227..f1284835b8c9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4154,7 +4154,10 @@ ncls:
 			ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 	} else {
 drop:
-		atomic_long_inc(&skb->dev->rx_dropped);
+		if (!deliver_exact)
+			atomic_long_inc(&skb->dev->rx_dropped);
+		else
+			atomic_long_inc(&skb->dev->rx_nohandler);
 		kfree_skb(skb);
 		/* Jamal, now you will not able to escape explaining
 		 * me how you were going to use this. :-)
@@ -7307,6 +7310,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 	}
 	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
 	storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
+	storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
 	return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b6c8a6629b39..da7dbc237a5f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -574,6 +574,7 @@ NETSTAT_ENTRY(tx_heartbeat_errors);
 NETSTAT_ENTRY(tx_window_errors);
 NETSTAT_ENTRY(rx_compressed);
 NETSTAT_ENTRY(tx_compressed);
+NETSTAT_ENTRY(rx_nohandler);
 
 static struct attribute *netstat_attrs[] = {
 	&dev_attr_rx_packets.attr,
@@ -599,6 +600,7 @@ static struct attribute *netstat_attrs[] = {
 	&dev_attr_tx_window_errors.attr,
 	&dev_attr_rx_compressed.attr,
 	&dev_attr_tx_compressed.attr,
+	&dev_attr_rx_nohandler.attr,
 	NULL
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d735e854f916..20d71358c143 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -804,6 +804,8 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 
 	a->rx_compressed = b->rx_compressed;
 	a->tx_compressed = b->tx_compressed;
+
+	a->rx_nohandler = b->rx_nohandler;
 }
 
 static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
-- 
cgit v1.2.3


From bb63daf9efb4f2bcb657d7179a53bd808f978dc9 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Mon, 1 Feb 2016 18:51:06 -0500
Subject: team: track sum of rx_nohandler for all slaves

CC: Jiri Pirko <jiri@resnulli.us>
CC: netdev@vger.kernel.org
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 10 +++++++---
 include/linux/if_team.h |  1 +
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 718ceeab4dbc..00558e139584 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -758,6 +758,8 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb)
 		u64_stats_update_end(&pcpu_stats->syncp);
 
 		skb->dev = team->dev;
+	} else if (res == RX_HANDLER_EXACT) {
+		this_cpu_inc(team->pcpu_stats->rx_nohandler);
 	} else {
 		this_cpu_inc(team->pcpu_stats->rx_dropped);
 	}
@@ -1807,7 +1809,7 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	struct team *team = netdev_priv(dev);
 	struct team_pcpu_stats *p;
 	u64 rx_packets, rx_bytes, rx_multicast, tx_packets, tx_bytes;
-	u32 rx_dropped = 0, tx_dropped = 0;
+	u32 rx_dropped = 0, tx_dropped = 0, rx_nohandler = 0;
 	unsigned int start;
 	int i;
 
@@ -1828,14 +1830,16 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 		stats->tx_packets	+= tx_packets;
 		stats->tx_bytes		+= tx_bytes;
 		/*
-		 * rx_dropped & tx_dropped are u32, updated
-		 * without syncp protection.
+		 * rx_dropped, tx_dropped & rx_nohandler are u32,
+		 * updated without syncp protection.
 		 */
 		rx_dropped	+= p->rx_dropped;
 		tx_dropped	+= p->tx_dropped;
+		rx_nohandler	+= p->rx_nohandler;
 	}
 	stats->rx_dropped	= rx_dropped;
 	stats->tx_dropped	= tx_dropped;
+	stats->rx_nohandler	= rx_nohandler;
 	return stats;
 }
 
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index b84e49c3a738..174f43f43aff 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -24,6 +24,7 @@ struct team_pcpu_stats {
 	struct u64_stats_sync	syncp;
 	u32			rx_dropped;
 	u32			tx_dropped;
+	u32			rx_nohandler;
 };
 
 struct team;
-- 
cgit v1.2.3


From ba905f5e2f63d86ed4cfbd3d9096fb28d156f1ee Mon Sep 17 00:00:00 2001
From: Kim Jones <kim-marie.jones@intel.com>
Date: Tue, 2 Feb 2016 03:51:16 +0000
Subject: ethtool: Declare netdev_rss_key as __read_mostly.

netdev_rss_key is written to once and thereafter is read by
drivers when they are initialising. The fact that it is mostly
read and not written to makes it a candidate for a __read_mostly
declaration.

Signed-off-by: Kim Jones <kim-marie.jones@intel.com>
Signed-off-by: Alan Carey <alan.carey@intel.com>
Acked-by: Rami Rosen <rami.rosen@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 net/core/ethtool.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 78a20cec2a0a..219f53c30cb3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3744,7 +3744,7 @@ void netdev_lower_state_changed(struct net_device *lower_dev,
 
 /* RSS keys are 40 or 52 bytes long */
 #define NETDEV_RSS_KEY_LEN 52
-extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN];
+extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
 void netdev_rss_key_fill(void *buffer, size_t len);
 
 int dev_get_nest_level(struct net_device *dev,
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index daf04709dd3c..453c803f1c87 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -632,7 +632,7 @@ static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
 	return 0;
 }
 
-u8 netdev_rss_key[NETDEV_RSS_KEY_LEN];
+u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
 
 void netdev_rss_key_fill(void *buffer, size_t len)
 {
-- 
cgit v1.2.3


From a10423b87a7eae75da79ce80a8d9475047a674ee Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Mon, 1 Feb 2016 22:39:54 -0800
Subject: bpf: introduce BPF_MAP_TYPE_PERCPU_ARRAY map

Primary use case is a histogram array of latency
where bpf program computes the latency of block requests or other
events and stores histogram of latency into array of 64 elements.
All cpus are constantly running, so normal increment is not accurate,
bpf_xadd causes cache ping-pong and this per-cpu approach allows
fastest collision-free counters.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |   1 +
 include/uapi/linux/bpf.h |   1 +
 kernel/bpf/arraymap.c    | 102 ++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 93 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 83d1926c61e4..141fb0d45731 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -151,6 +151,7 @@ struct bpf_array {
 	union {
 		char value[0] __aligned(8);
 		void *ptrs[0] __aligned(8);
+		void __percpu *pptrs[0] __aligned(8);
 	};
 };
 #define MAX_TAIL_CALL_CNT 32
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 43ae40c8763e..2ee0fde1bf96 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -82,6 +82,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_PROG_ARRAY,
 	BPF_MAP_TYPE_PERF_EVENT_ARRAY,
 	BPF_MAP_TYPE_PERCPU_HASH,
+	BPF_MAP_TYPE_PERCPU_ARRAY,
 };
 
 enum bpf_prog_type {
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 89ebbc4d1164..b9bf1d7949ca 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -17,11 +17,39 @@
 #include <linux/filter.h>
 #include <linux/perf_event.h>
 
+static void bpf_array_free_percpu(struct bpf_array *array)
+{
+	int i;
+
+	for (i = 0; i < array->map.max_entries; i++)
+		free_percpu(array->pptrs[i]);
+}
+
+static int bpf_array_alloc_percpu(struct bpf_array *array)
+{
+	void __percpu *ptr;
+	int i;
+
+	for (i = 0; i < array->map.max_entries; i++) {
+		ptr = __alloc_percpu_gfp(array->elem_size, 8,
+					 GFP_USER | __GFP_NOWARN);
+		if (!ptr) {
+			bpf_array_free_percpu(array);
+			return -ENOMEM;
+		}
+		array->pptrs[i] = ptr;
+	}
+
+	return 0;
+}
+
 /* Called from syscall */
 static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 {
+	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
 	struct bpf_array *array;
-	u32 elem_size, array_size;
+	u64 array_size;
+	u32 elem_size;
 
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -36,12 +64,16 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 
 	elem_size = round_up(attr->value_size, 8);
 
-	/* check round_up into zero and u32 overflow */
-	if (elem_size == 0 ||
-	    attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size)
+	array_size = sizeof(*array);
+	if (percpu)
+		array_size += (u64) attr->max_entries * sizeof(void *);
+	else
+		array_size += (u64) attr->max_entries * elem_size;
+
+	/* make sure there is no u32 overflow later in round_up() */
+	if (array_size >= U32_MAX - PAGE_SIZE)
 		return ERR_PTR(-ENOMEM);
 
-	array_size = sizeof(*array) + attr->max_entries * elem_size;
 
 	/* allocate all map elements and zero-initialize them */
 	array = kzalloc(array_size, GFP_USER | __GFP_NOWARN);
@@ -52,12 +84,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	}
 
 	/* copy mandatory map attributes */
+	array->map.map_type = attr->map_type;
 	array->map.key_size = attr->key_size;
 	array->map.value_size = attr->value_size;
 	array->map.max_entries = attr->max_entries;
-	array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
 	array->elem_size = elem_size;
 
+	if (!percpu)
+		goto out;
+
+	array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
+
+	if (array_size >= U32_MAX - PAGE_SIZE ||
+	    elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) {
+		kvfree(array);
+		return ERR_PTR(-ENOMEM);
+	}
+out:
+	array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
+
 	return &array->map;
 }
 
@@ -67,12 +112,24 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	u32 index = *(u32 *)key;
 
-	if (index >= array->map.max_entries)
+	if (unlikely(index >= array->map.max_entries))
 		return NULL;
 
 	return array->value + array->elem_size * index;
 }
 
+/* Called from eBPF program */
+static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	u32 index = *(u32 *)key;
+
+	if (unlikely(index >= array->map.max_entries))
+		return NULL;
+
+	return this_cpu_ptr(array->pptrs[index]);
+}
+
 /* Called from syscall */
 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 {
@@ -99,19 +156,24 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	u32 index = *(u32 *)key;
 
-	if (map_flags > BPF_EXIST)
+	if (unlikely(map_flags > BPF_EXIST))
 		/* unknown flags */
 		return -EINVAL;
 
-	if (index >= array->map.max_entries)
+	if (unlikely(index >= array->map.max_entries))
 		/* all elements were pre-allocated, cannot insert a new one */
 		return -E2BIG;
 
-	if (map_flags == BPF_NOEXIST)
+	if (unlikely(map_flags == BPF_NOEXIST))
 		/* all elements already exist */
 		return -EEXIST;
 
-	memcpy(array->value + array->elem_size * index, value, map->value_size);
+	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+		memcpy(this_cpu_ptr(array->pptrs[index]),
+		       value, map->value_size);
+	else
+		memcpy(array->value + array->elem_size * index,
+		       value, map->value_size);
 	return 0;
 }
 
@@ -133,6 +195,9 @@ static void array_map_free(struct bpf_map *map)
 	 */
 	synchronize_rcu();
 
+	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+		bpf_array_free_percpu(array);
+
 	kvfree(array);
 }
 
@@ -150,9 +215,24 @@ static struct bpf_map_type_list array_type __read_mostly = {
 	.type = BPF_MAP_TYPE_ARRAY,
 };
 
+static const struct bpf_map_ops percpu_array_ops = {
+	.map_alloc = array_map_alloc,
+	.map_free = array_map_free,
+	.map_get_next_key = array_map_get_next_key,
+	.map_lookup_elem = percpu_array_map_lookup_elem,
+	.map_update_elem = array_map_update_elem,
+	.map_delete_elem = array_map_delete_elem,
+};
+
+static struct bpf_map_type_list percpu_array_type __read_mostly = {
+	.ops = &percpu_array_ops,
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+};
+
 static int __init register_array_map(void)
 {
 	bpf_register_map_type(&array_type);
+	bpf_register_map_type(&percpu_array_type);
 	return 0;
 }
 late_initcall(register_array_map);
-- 
cgit v1.2.3


From 15a07b33814d14ca817887dbea8530728dc0fbe4 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Mon, 1 Feb 2016 22:39:55 -0800
Subject: bpf: add lookup/update support for per-cpu hash and array maps

The functions bpf_map_lookup_elem(map, key, value) and
bpf_map_update_elem(map, key, value, flags) need to get/set
values from all-cpus for per-cpu hash and array maps,
so that user space can aggregate/update them as necessary.

Example of single counter aggregation in user space:
  unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
  long values[nr_cpus];
  long value = 0;

  bpf_lookup_elem(fd, key, values);
  for (i = 0; i < nr_cpus; i++)
    value += values[i];

The user space must provide round_up(value_size, 8) * nr_cpus
array to get/set values, since kernel will use 'long' copy
of per-cpu values to try to copy good counters atomically.
It's a best-effort, since bpf programs and user space are racing
to access the same memory.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   | 23 ++++++++++++++
 kernel/bpf/arraymap.c | 64 +++++++++++++++++++++++++++++++++++++++
 kernel/bpf/hashtab.c  | 83 +++++++++++++++++++++++++++++++++++++++++++++------
 kernel/bpf/syscall.c  | 57 ++++++++++++++++++++++++-----------
 4 files changed, 201 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 141fb0d45731..90ee6ab24bc5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -183,6 +183,29 @@ int bpf_prog_new_fd(struct bpf_prog *prog);
 int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname);
 
+int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
+int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
+int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
+			   u64 flags);
+int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
+			    u64 flags);
+
+/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
+ * forced to use 'long' read/writes to try to atomically copy long counters.
+ * Best-effort only.  No barriers here, since it _will_ race with concurrent
+ * updates from BPF programs. Called from bpf syscall and mostly used with
+ * size 8 or 16 bytes, so ask compiler to inline it.
+ */
+static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
+{
+	const long *lsrc = src;
+	long *ldst = dst;
+
+	size /= sizeof(long);
+	while (size--)
+		*ldst++ = *lsrc++;
+}
+
 /* verify correctness of eBPF program */
 int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
 #else
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index b9bf1d7949ca..bd3bdf2486a7 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -130,6 +130,32 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
 	return this_cpu_ptr(array->pptrs[index]);
 }
 
+int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	u32 index = *(u32 *)key;
+	void __percpu *pptr;
+	int cpu, off = 0;
+	u32 size;
+
+	if (unlikely(index >= array->map.max_entries))
+		return -ENOENT;
+
+	/* per_cpu areas are zero-filled and bpf programs can only
+	 * access 'value_size' of them, so copying rounded areas
+	 * will not leak any kernel data
+	 */
+	size = round_up(map->value_size, 8);
+	rcu_read_lock();
+	pptr = array->pptrs[index];
+	for_each_possible_cpu(cpu) {
+		bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
+		off += size;
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
 /* Called from syscall */
 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 {
@@ -177,6 +203,44 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
 	return 0;
 }
 
+int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
+			    u64 map_flags)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	u32 index = *(u32 *)key;
+	void __percpu *pptr;
+	int cpu, off = 0;
+	u32 size;
+
+	if (unlikely(map_flags > BPF_EXIST))
+		/* unknown flags */
+		return -EINVAL;
+
+	if (unlikely(index >= array->map.max_entries))
+		/* all elements were pre-allocated, cannot insert a new one */
+		return -E2BIG;
+
+	if (unlikely(map_flags == BPF_NOEXIST))
+		/* all elements already exist */
+		return -EEXIST;
+
+	/* the user space will provide round_up(value_size, 8) bytes that
+	 * will be copied into per-cpu area. bpf programs can only access
+	 * value_size of it. During lookup the same extra bytes will be
+	 * returned or zeros which were zero-filled by percpu_alloc,
+	 * so no kernel data leaks possible
+	 */
+	size = round_up(map->value_size, 8);
+	rcu_read_lock();
+	pptr = array->pptrs[index];
+	for_each_possible_cpu(cpu) {
+		bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
+		off += size;
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
 /* Called from syscall or from eBPF program */
 static int array_map_delete_elem(struct bpf_map *map, void *key)
 {
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 2be5f6e8bb04..fd5db8fe9360 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -290,7 +290,7 @@ static void free_htab_elem(struct htab_elem *l, bool percpu, u32 key_size)
 
 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 					 void *value, u32 key_size, u32 hash,
-					 bool percpu)
+					 bool percpu, bool onallcpus)
 {
 	u32 size = htab->map.value_size;
 	struct htab_elem *l_new;
@@ -312,8 +312,18 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 			return NULL;
 		}
 
-		/* copy true value_size bytes */
-		memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
+		if (!onallcpus) {
+			/* copy true value_size bytes */
+			memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
+		} else {
+			int off = 0, cpu;
+
+			for_each_possible_cpu(cpu) {
+				bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
+						value + off, size);
+				off += size;
+			}
+		}
 		htab_elem_set_ptr(l_new, key_size, pptr);
 	} else {
 		memcpy(l_new->key + round_up(key_size, 8), value, size);
@@ -368,7 +378,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	/* allocate new element outside of the lock, since
 	 * we're most likley going to insert it
 	 */
-	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false);
+	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
 	if (!l_new)
 		return -ENOMEM;
 
@@ -402,8 +412,9 @@ err:
 	return ret;
 }
 
-static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
-				       void *value, u64 map_flags)
+static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+					 void *value, u64 map_flags,
+					 bool onallcpus)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 	struct htab_elem *l_new = NULL, *l_old;
@@ -436,12 +447,25 @@ static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 		goto err;
 
 	if (l_old) {
+		void __percpu *pptr = htab_elem_get_ptr(l_old, key_size);
+		u32 size = htab->map.value_size;
+
 		/* per-cpu hash map can update value in-place */
-		memcpy(this_cpu_ptr(htab_elem_get_ptr(l_old, key_size)),
-		       value, htab->map.value_size);
+		if (!onallcpus) {
+			memcpy(this_cpu_ptr(pptr), value, size);
+		} else {
+			int off = 0, cpu;
+
+			size = round_up(size, 8);
+			for_each_possible_cpu(cpu) {
+				bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
+						value + off, size);
+				off += size;
+			}
+		}
 	} else {
 		l_new = alloc_htab_elem(htab, key, value, key_size,
-					hash, true);
+					hash, true, onallcpus);
 		if (!l_new) {
 			ret = -ENOMEM;
 			goto err;
@@ -455,6 +479,12 @@ err:
 	return ret;
 }
 
+static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+				       void *value, u64 map_flags)
+{
+	return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
+}
+
 /* Called from syscall or from eBPF program */
 static int htab_map_delete_elem(struct bpf_map *map, void *key)
 {
@@ -557,6 +587,41 @@ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
 		return NULL;
 }
 
+int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
+{
+	struct htab_elem *l;
+	void __percpu *pptr;
+	int ret = -ENOENT;
+	int cpu, off = 0;
+	u32 size;
+
+	/* per_cpu areas are zero-filled and bpf programs can only
+	 * access 'value_size' of them, so copying rounded areas
+	 * will not leak any kernel data
+	 */
+	size = round_up(map->value_size, 8);
+	rcu_read_lock();
+	l = __htab_map_lookup_elem(map, key);
+	if (!l)
+		goto out;
+	pptr = htab_elem_get_ptr(l, map->key_size);
+	for_each_possible_cpu(cpu) {
+		bpf_long_memcpy(value + off,
+				per_cpu_ptr(pptr, cpu), size);
+		off += size;
+	}
+	ret = 0;
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
+int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
+			   u64 map_flags)
+{
+	return __htab_percpu_map_update_elem(map, key, value, map_flags, true);
+}
+
 static const struct bpf_map_ops htab_percpu_ops = {
 	.map_alloc = htab_map_alloc,
 	.map_free = htab_map_free,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 637397059f76..c95a753c2007 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -239,6 +239,7 @@ static int map_lookup_elem(union bpf_attr *attr)
 	int ufd = attr->map_fd;
 	struct bpf_map *map;
 	void *key, *value, *ptr;
+	u32 value_size;
 	struct fd f;
 	int err;
 
@@ -259,23 +260,35 @@ static int map_lookup_elem(union bpf_attr *attr)
 	if (copy_from_user(key, ukey, map->key_size) != 0)
 		goto free_key;
 
+	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+		value_size = round_up(map->value_size, 8) * num_possible_cpus();
+	else
+		value_size = map->value_size;
+
 	err = -ENOMEM;
-	value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
+	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 	if (!value)
 		goto free_key;
 
-	rcu_read_lock();
-	ptr = map->ops->map_lookup_elem(map, key);
-	if (ptr)
-		memcpy(value, ptr, map->value_size);
-	rcu_read_unlock();
+	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
+		err = bpf_percpu_hash_copy(map, key, value);
+	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+		err = bpf_percpu_array_copy(map, key, value);
+	} else {
+		rcu_read_lock();
+		ptr = map->ops->map_lookup_elem(map, key);
+		if (ptr)
+			memcpy(value, ptr, value_size);
+		rcu_read_unlock();
+		err = ptr ? 0 : -ENOENT;
+	}
 
-	err = -ENOENT;
-	if (!ptr)
+	if (err)
 		goto free_value;
 
 	err = -EFAULT;
-	if (copy_to_user(uvalue, value, map->value_size) != 0)
+	if (copy_to_user(uvalue, value, value_size) != 0)
 		goto free_value;
 
 	err = 0;
@@ -298,6 +311,7 @@ static int map_update_elem(union bpf_attr *attr)
 	int ufd = attr->map_fd;
 	struct bpf_map *map;
 	void *key, *value;
+	u32 value_size;
 	struct fd f;
 	int err;
 
@@ -318,21 +332,30 @@ static int map_update_elem(union bpf_attr *attr)
 	if (copy_from_user(key, ukey, map->key_size) != 0)
 		goto free_key;
 
+	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+		value_size = round_up(map->value_size, 8) * num_possible_cpus();
+	else
+		value_size = map->value_size;
+
 	err = -ENOMEM;
-	value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
+	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 	if (!value)
 		goto free_key;
 
 	err = -EFAULT;
-	if (copy_from_user(value, uvalue, map->value_size) != 0)
+	if (copy_from_user(value, uvalue, value_size) != 0)
 		goto free_value;
 
-	/* eBPF program that use maps are running under rcu_read_lock(),
-	 * therefore all map accessors rely on this fact, so do the same here
-	 */
-	rcu_read_lock();
-	err = map->ops->map_update_elem(map, key, value, attr->flags);
-	rcu_read_unlock();
+	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
+		err = bpf_percpu_hash_update(map, key, value, attr->flags);
+	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+		err = bpf_percpu_array_update(map, key, value, attr->flags);
+	} else {
+		rcu_read_lock();
+		err = map->ops->map_update_elem(map, key, value, attr->flags);
+		rcu_read_unlock();
+	}
 
 free_value:
 	kfree(value);
-- 
cgit v1.2.3


From 7267bcda332e2782e21a559f3b1b859a35b4062d Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sat, 16 Jan 2016 00:48:52 +0100
Subject: bcma: identify bus cores (devices) found on BCM47189
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add missing defines and print proper names.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/scan.c       | 3 +++
 include/linux/bcma/bcma.h | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
index df806b9c5490..5ee731132365 100644
--- a/drivers/bcma/scan.c
+++ b/drivers/bcma/scan.c
@@ -98,6 +98,9 @@ static const struct bcma_device_id_name bcma_bcm_device_names[] = {
 	{ BCMA_CORE_SHIM, "SHIM" },
 	{ BCMA_CORE_PCIE2, "PCIe Gen2" },
 	{ BCMA_CORE_ARM_CR4, "ARM CR4" },
+	{ BCMA_CORE_GCI, "GCI" },
+	{ BCMA_CORE_CMEM, "CNDS DDR2/3 memory controller" },
+	{ BCMA_CORE_ARM_CA7, "ARM CA7" },
 	{ BCMA_CORE_DEFAULT, "Default" },
 };
 
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 3feb1b2d75d8..991ebb4c2015 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -151,6 +151,8 @@ struct bcma_host_ops {
 #define BCMA_CORE_PCIE2			0x83C	/* PCI Express Gen2 */
 #define BCMA_CORE_USB30_DEV		0x83D
 #define BCMA_CORE_ARM_CR4		0x83E
+#define BCMA_CORE_GCI			0x840
+#define BCMA_CORE_CMEM			0x846	/* CNDS DDR2/3 memory controller */
 #define BCMA_CORE_ARM_CA7		0x847
 #define BCMA_CORE_SYS_MEM		0x849
 #define BCMA_CORE_DEFAULT		0xFFF
-- 
cgit v1.2.3


From 67edf354faaf93156646e741483b2313bc756c0f Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Tue, 19 Jan 2016 08:45:25 +0100
Subject: bcma: use _PMU_ in all names of PMU registers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PMU (Power Management Unit) seems to be a separated piece of hardware,
just accessed using ChipCommon core registers. In recent Broadcom
chipsets PMU is not bounded to CC but available as separated core.

To make code cleaner & easier to review (for a correct R/W access) use
clearer names.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/driver_chipcommon_pmu.c        | 46 ++++++++++++++---------------
 drivers/net/wireless/broadcom/b43/main.c    |  8 ++---
 include/linux/bcma/bcma_driver_chipcommon.h | 12 ++++----
 3 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c
index fe0d48cb1778..472f39dc5a38 100644
--- a/drivers/bcma/driver_chipcommon_pmu.c
+++ b/drivers/bcma/driver_chipcommon_pmu.c
@@ -15,44 +15,44 @@
 
 u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset)
 {
-	bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR, offset);
-	bcma_cc_read32(cc, BCMA_CC_PLLCTL_ADDR);
-	return bcma_cc_read32(cc, BCMA_CC_PLLCTL_DATA);
+	bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset);
+	bcma_cc_read32(cc, BCMA_CC_PMU_PLLCTL_ADDR);
+	return bcma_cc_read32(cc, BCMA_CC_PMU_PLLCTL_DATA);
 }
 EXPORT_SYMBOL_GPL(bcma_chipco_pll_read);
 
 void bcma_chipco_pll_write(struct bcma_drv_cc *cc, u32 offset, u32 value)
 {
-	bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR, offset);
-	bcma_cc_read32(cc, BCMA_CC_PLLCTL_ADDR);
-	bcma_cc_write32(cc, BCMA_CC_PLLCTL_DATA, value);
+	bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset);
+	bcma_cc_read32(cc, BCMA_CC_PMU_PLLCTL_ADDR);
+	bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, value);
 }
 EXPORT_SYMBOL_GPL(bcma_chipco_pll_write);
 
 void bcma_chipco_pll_maskset(struct bcma_drv_cc *cc, u32 offset, u32 mask,
 			     u32 set)
 {
-	bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR, offset);
-	bcma_cc_read32(cc, BCMA_CC_PLLCTL_ADDR);
-	bcma_cc_maskset32(cc, BCMA_CC_PLLCTL_DATA, mask, set);
+	bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset);
+	bcma_cc_read32(cc, BCMA_CC_PMU_PLLCTL_ADDR);
+	bcma_cc_maskset32(cc, BCMA_CC_PMU_PLLCTL_DATA, mask, set);
 }
 EXPORT_SYMBOL_GPL(bcma_chipco_pll_maskset);
 
 void bcma_chipco_chipctl_maskset(struct bcma_drv_cc *cc,
 				 u32 offset, u32 mask, u32 set)
 {
-	bcma_cc_write32(cc, BCMA_CC_CHIPCTL_ADDR, offset);
-	bcma_cc_read32(cc, BCMA_CC_CHIPCTL_ADDR);
-	bcma_cc_maskset32(cc, BCMA_CC_CHIPCTL_DATA, mask, set);
+	bcma_cc_write32(cc, BCMA_CC_PMU_CHIPCTL_ADDR, offset);
+	bcma_cc_read32(cc, BCMA_CC_PMU_CHIPCTL_ADDR);
+	bcma_cc_maskset32(cc, BCMA_CC_PMU_CHIPCTL_DATA, mask, set);
 }
 EXPORT_SYMBOL_GPL(bcma_chipco_chipctl_maskset);
 
 void bcma_chipco_regctl_maskset(struct bcma_drv_cc *cc, u32 offset, u32 mask,
 				u32 set)
 {
-	bcma_cc_write32(cc, BCMA_CC_REGCTL_ADDR, offset);
-	bcma_cc_read32(cc, BCMA_CC_REGCTL_ADDR);
-	bcma_cc_maskset32(cc, BCMA_CC_REGCTL_DATA, mask, set);
+	bcma_cc_write32(cc, BCMA_CC_PMU_REGCTL_ADDR, offset);
+	bcma_cc_read32(cc, BCMA_CC_PMU_REGCTL_ADDR);
+	bcma_cc_maskset32(cc, BCMA_CC_PMU_REGCTL_DATA, mask, set);
 }
 EXPORT_SYMBOL_GPL(bcma_chipco_regctl_maskset);
 
@@ -472,8 +472,8 @@ u32 bcma_pmu_get_cpu_clock(struct bcma_drv_cc *cc)
 static void bcma_pmu_spuravoid_pll_write(struct bcma_drv_cc *cc, u32 offset,
 					 u32 value)
 {
-	bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR, offset);
-	bcma_cc_write32(cc, BCMA_CC_PLLCTL_DATA, value);
+	bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset);
+	bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, value);
 }
 
 void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid)
@@ -497,20 +497,20 @@ void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid)
 		       bus->chipinfo.id == BCMA_CHIP_ID_BCM53572) ? 6 : 0;
 
 		/* RMW only the P1 divider */
-		bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR,
+		bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR,
 				BCMA_CC_PMU_PLL_CTL0 + phypll_offset);
-		tmp = bcma_cc_read32(cc, BCMA_CC_PLLCTL_DATA);
+		tmp = bcma_cc_read32(cc, BCMA_CC_PMU_PLLCTL_DATA);
 		tmp &= (~(BCMA_CC_PMU1_PLL0_PC0_P1DIV_MASK));
 		tmp |= (bcm5357_bcm43236_p1div[spuravoid] << BCMA_CC_PMU1_PLL0_PC0_P1DIV_SHIFT);
-		bcma_cc_write32(cc, BCMA_CC_PLLCTL_DATA, tmp);
+		bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, tmp);
 
 		/* RMW only the int feedback divider */
-		bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR,
+		bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR,
 				BCMA_CC_PMU_PLL_CTL2 + phypll_offset);
-		tmp = bcma_cc_read32(cc, BCMA_CC_PLLCTL_DATA);
+		tmp = bcma_cc_read32(cc, BCMA_CC_PMU_PLLCTL_DATA);
 		tmp &= ~(BCMA_CC_PMU1_PLL0_PC2_NDIV_INT_MASK);
 		tmp |= (bcm5357_bcm43236_ndiv[spuravoid]) << BCMA_CC_PMU1_PLL0_PC2_NDIV_INT_SHIFT;
-		bcma_cc_write32(cc, BCMA_CC_PLLCTL_DATA, tmp);
+		bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, tmp);
 
 		tmp = BCMA_CC_PMU_CTL_PLL_UPD;
 		break;
diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c
index ec013fbd6a81..c279211e49f9 100644
--- a/drivers/net/wireless/broadcom/b43/main.c
+++ b/drivers/net/wireless/broadcom/b43/main.c
@@ -1215,10 +1215,10 @@ void b43_wireless_core_phy_pll_reset(struct b43_wldev *dev)
 	case B43_BUS_BCMA:
 		bcma_cc = &dev->dev->bdev->bus->drv_cc;
 
-		bcma_cc_write32(bcma_cc, BCMA_CC_CHIPCTL_ADDR, 0);
-		bcma_cc_mask32(bcma_cc, BCMA_CC_CHIPCTL_DATA, ~0x4);
-		bcma_cc_set32(bcma_cc, BCMA_CC_CHIPCTL_DATA, 0x4);
-		bcma_cc_mask32(bcma_cc, BCMA_CC_CHIPCTL_DATA, ~0x4);
+		bcma_cc_write32(bcma_cc, BCMA_CC_PMU_CHIPCTL_ADDR, 0);
+		bcma_cc_mask32(bcma_cc, BCMA_CC_PMU_CHIPCTL_DATA, ~0x4);
+		bcma_cc_set32(bcma_cc, BCMA_CC_PMU_CHIPCTL_DATA, 0x4);
+		bcma_cc_mask32(bcma_cc, BCMA_CC_PMU_CHIPCTL_DATA, ~0x4);
 		break;
 #endif
 #ifdef CONFIG_B43_SSB
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index db51a6ffb7d6..96d8d56f240f 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -351,12 +351,12 @@
 #define BCMA_CC_PMU_RES_REQTS		0x0640 /* PMU res req timer sel */
 #define BCMA_CC_PMU_RES_REQT		0x0644 /* PMU res req timer */
 #define BCMA_CC_PMU_RES_REQM		0x0648 /* PMU res req mask */
-#define BCMA_CC_CHIPCTL_ADDR		0x0650
-#define BCMA_CC_CHIPCTL_DATA		0x0654
-#define BCMA_CC_REGCTL_ADDR		0x0658
-#define BCMA_CC_REGCTL_DATA		0x065C
-#define BCMA_CC_PLLCTL_ADDR		0x0660
-#define BCMA_CC_PLLCTL_DATA		0x0664
+#define BCMA_CC_PMU_CHIPCTL_ADDR	0x0650
+#define BCMA_CC_PMU_CHIPCTL_DATA	0x0654
+#define BCMA_CC_PMU_REGCTL_ADDR		0x0658
+#define BCMA_CC_PMU_REGCTL_DATA		0x065C
+#define BCMA_CC_PMU_PLLCTL_ADDR		0x0660
+#define BCMA_CC_PMU_PLLCTL_DATA		0x0664
 #define BCMA_CC_PMU_STRAPOPT		0x0668 /* (corerev >= 28) */
 #define BCMA_CC_PMU_XTAL_FREQ		0x066C /* (pmurev >= 10) */
 #define  BCMA_CC_PMU_XTAL_FREQ_ILPCTL_MASK	0x00001FFF
-- 
cgit v1.2.3


From b3c47afbf54d86daa0473895e8ca9e8b663f5c1a Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Tue, 19 Jan 2016 08:45:26 +0100
Subject: bcma: support PMU present as separated bus core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On recent Broadcom chipsets PMU is present as separated core and it
can't be accessed using ChipCommon anymore as it fails with e.g.:
[    0.000577] Unhandled fault: external abort on non-linefetch (0x1008) at 0xf1000604

Solve it by using a new (PMU) core pointer set to ChipCommon or PMU
depending on the hardware capabilities.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/driver_chipcommon.c            |  2 +-
 drivers/bcma/driver_chipcommon_pmu.c        | 94 ++++++++++++++++-------------
 include/linux/bcma/bcma_driver_chipcommon.h | 19 ++++++
 3 files changed, 72 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index b7c8a8d4e6d1..36ee221e298f 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -185,7 +185,7 @@ u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks)
 			ticks = 2;
 		else if (ticks > maxt)
 			ticks = maxt;
-		bcma_cc_write32(cc, BCMA_CC_PMU_WATCHDOG, ticks);
+		bcma_pmu_write32(cc, BCMA_CC_PMU_WATCHDOG, ticks);
 	} else {
 		struct bcma_bus *bus = cc->core->bus;
 
diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c
index 472f39dc5a38..f1eb4d3e1d57 100644
--- a/drivers/bcma/driver_chipcommon_pmu.c
+++ b/drivers/bcma/driver_chipcommon_pmu.c
@@ -15,44 +15,44 @@
 
 u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset)
 {
-	bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset);
-	bcma_cc_read32(cc, BCMA_CC_PMU_PLLCTL_ADDR);
-	return bcma_cc_read32(cc, BCMA_CC_PMU_PLLCTL_DATA);
+	bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset);
+	bcma_pmu_read32(cc, BCMA_CC_PMU_PLLCTL_ADDR);
+	return bcma_pmu_read32(cc, BCMA_CC_PMU_PLLCTL_DATA);
 }
 EXPORT_SYMBOL_GPL(bcma_chipco_pll_read);
 
 void bcma_chipco_pll_write(struct bcma_drv_cc *cc, u32 offset, u32 value)
 {
-	bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset);
-	bcma_cc_read32(cc, BCMA_CC_PMU_PLLCTL_ADDR);
-	bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, value);
+	bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset);
+	bcma_pmu_read32(cc, BCMA_CC_PMU_PLLCTL_ADDR);
+	bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, value);
 }
 EXPORT_SYMBOL_GPL(bcma_chipco_pll_write);
 
 void bcma_chipco_pll_maskset(struct bcma_drv_cc *cc, u32 offset, u32 mask,
 			     u32 set)
 {
-	bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset);
-	bcma_cc_read32(cc, BCMA_CC_PMU_PLLCTL_ADDR);
-	bcma_cc_maskset32(cc, BCMA_CC_PMU_PLLCTL_DATA, mask, set);
+	bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset);
+	bcma_pmu_read32(cc, BCMA_CC_PMU_PLLCTL_ADDR);
+	bcma_pmu_maskset32(cc, BCMA_CC_PMU_PLLCTL_DATA, mask, set);
 }
 EXPORT_SYMBOL_GPL(bcma_chipco_pll_maskset);
 
 void bcma_chipco_chipctl_maskset(struct bcma_drv_cc *cc,
 				 u32 offset, u32 mask, u32 set)
 {
-	bcma_cc_write32(cc, BCMA_CC_PMU_CHIPCTL_ADDR, offset);
-	bcma_cc_read32(cc, BCMA_CC_PMU_CHIPCTL_ADDR);
-	bcma_cc_maskset32(cc, BCMA_CC_PMU_CHIPCTL_DATA, mask, set);
+	bcma_pmu_write32(cc, BCMA_CC_PMU_CHIPCTL_ADDR, offset);
+	bcma_pmu_read32(cc, BCMA_CC_PMU_CHIPCTL_ADDR);
+	bcma_pmu_maskset32(cc, BCMA_CC_PMU_CHIPCTL_DATA, mask, set);
 }
 EXPORT_SYMBOL_GPL(bcma_chipco_chipctl_maskset);
 
 void bcma_chipco_regctl_maskset(struct bcma_drv_cc *cc, u32 offset, u32 mask,
 				u32 set)
 {
-	bcma_cc_write32(cc, BCMA_CC_PMU_REGCTL_ADDR, offset);
-	bcma_cc_read32(cc, BCMA_CC_PMU_REGCTL_ADDR);
-	bcma_cc_maskset32(cc, BCMA_CC_PMU_REGCTL_DATA, mask, set);
+	bcma_pmu_write32(cc, BCMA_CC_PMU_REGCTL_ADDR, offset);
+	bcma_pmu_read32(cc, BCMA_CC_PMU_REGCTL_ADDR);
+	bcma_pmu_maskset32(cc, BCMA_CC_PMU_REGCTL_DATA, mask, set);
 }
 EXPORT_SYMBOL_GPL(bcma_chipco_regctl_maskset);
 
@@ -60,18 +60,18 @@ static u32 bcma_pmu_xtalfreq(struct bcma_drv_cc *cc)
 {
 	u32 ilp_ctl, alp_hz;
 
-	if (!(bcma_cc_read32(cc, BCMA_CC_PMU_STAT) &
+	if (!(bcma_pmu_read32(cc, BCMA_CC_PMU_STAT) &
 	      BCMA_CC_PMU_STAT_EXT_LPO_AVAIL))
 		return 0;
 
-	bcma_cc_write32(cc, BCMA_CC_PMU_XTAL_FREQ,
-			BIT(BCMA_CC_PMU_XTAL_FREQ_MEASURE_SHIFT));
+	bcma_pmu_write32(cc, BCMA_CC_PMU_XTAL_FREQ,
+			 BIT(BCMA_CC_PMU_XTAL_FREQ_MEASURE_SHIFT));
 	usleep_range(1000, 2000);
 
-	ilp_ctl = bcma_cc_read32(cc, BCMA_CC_PMU_XTAL_FREQ);
+	ilp_ctl = bcma_pmu_read32(cc, BCMA_CC_PMU_XTAL_FREQ);
 	ilp_ctl &= BCMA_CC_PMU_XTAL_FREQ_ILPCTL_MASK;
 
-	bcma_cc_write32(cc, BCMA_CC_PMU_XTAL_FREQ, 0);
+	bcma_pmu_write32(cc, BCMA_CC_PMU_XTAL_FREQ, 0);
 
 	alp_hz = ilp_ctl * 32768 / 4;
 	return (alp_hz + 50000) / 100000 * 100;
@@ -127,8 +127,8 @@ static void bcma_pmu2_pll_init0(struct bcma_drv_cc *cc, u32 xtalfreq)
 		mask = (u32)~(BCMA_RES_4314_HT_AVAIL |
 			      BCMA_RES_4314_MACPHY_CLK_AVAIL);
 
-		bcma_cc_mask32(cc, BCMA_CC_PMU_MINRES_MSK, mask);
-		bcma_cc_mask32(cc, BCMA_CC_PMU_MAXRES_MSK, mask);
+		bcma_pmu_mask32(cc, BCMA_CC_PMU_MINRES_MSK, mask);
+		bcma_pmu_mask32(cc, BCMA_CC_PMU_MAXRES_MSK, mask);
 		bcma_wait_value(cc->core, BCMA_CLKCTLST,
 				BCMA_CLKCTLST_HAVEHT, 0, 20000);
 		break;
@@ -140,7 +140,7 @@ static void bcma_pmu2_pll_init0(struct bcma_drv_cc *cc, u32 xtalfreq)
 
 	/* Flush */
 	if (cc->pmu.rev >= 2)
-		bcma_cc_set32(cc, BCMA_CC_PMU_CTL, BCMA_CC_PMU_CTL_PLL_UPD);
+		bcma_pmu_set32(cc, BCMA_CC_PMU_CTL, BCMA_CC_PMU_CTL_PLL_UPD);
 
 	/* TODO: Do we need to update OTP? */
 }
@@ -195,9 +195,9 @@ static void bcma_pmu_resources_init(struct bcma_drv_cc *cc)
 
 	/* Set the resource masks. */
 	if (min_msk)
-		bcma_cc_write32(cc, BCMA_CC_PMU_MINRES_MSK, min_msk);
+		bcma_pmu_write32(cc, BCMA_CC_PMU_MINRES_MSK, min_msk);
 	if (max_msk)
-		bcma_cc_write32(cc, BCMA_CC_PMU_MAXRES_MSK, max_msk);
+		bcma_pmu_write32(cc, BCMA_CC_PMU_MAXRES_MSK, max_msk);
 
 	/*
 	 * Add some delay; allow resources to come up and settle.
@@ -269,23 +269,33 @@ static void bcma_pmu_workarounds(struct bcma_drv_cc *cc)
 
 void bcma_pmu_early_init(struct bcma_drv_cc *cc)
 {
+	struct bcma_bus *bus = cc->core->bus;
 	u32 pmucap;
 
-	pmucap = bcma_cc_read32(cc, BCMA_CC_PMU_CAP);
+	if (cc->core->id.rev >= 35 &&
+	    cc->capabilities_ext & BCMA_CC_CAP_EXT_AOB_PRESENT) {
+		cc->pmu.core = bcma_find_core(bus, BCMA_CORE_PMU);
+		if (!cc->pmu.core)
+			bcma_warn(bus, "Couldn't find expected PMU core");
+	}
+	if (!cc->pmu.core)
+		cc->pmu.core = cc->core;
+
+	pmucap = bcma_pmu_read32(cc, BCMA_CC_PMU_CAP);
 	cc->pmu.rev = (pmucap & BCMA_CC_PMU_CAP_REVISION);
 
-	bcma_debug(cc->core->bus, "Found rev %u PMU (capabilities 0x%08X)\n",
-		   cc->pmu.rev, pmucap);
+	bcma_debug(bus, "Found rev %u PMU (capabilities 0x%08X)\n", cc->pmu.rev,
+		   pmucap);
 }
 
 void bcma_pmu_init(struct bcma_drv_cc *cc)
 {
 	if (cc->pmu.rev == 1)
-		bcma_cc_mask32(cc, BCMA_CC_PMU_CTL,
-			      ~BCMA_CC_PMU_CTL_NOILPONW);
+		bcma_pmu_mask32(cc, BCMA_CC_PMU_CTL,
+				~BCMA_CC_PMU_CTL_NOILPONW);
 	else
-		bcma_cc_set32(cc, BCMA_CC_PMU_CTL,
-			     BCMA_CC_PMU_CTL_NOILPONW);
+		bcma_pmu_set32(cc, BCMA_CC_PMU_CTL,
+			       BCMA_CC_PMU_CTL_NOILPONW);
 
 	bcma_pmu_pll_init(cc);
 	bcma_pmu_resources_init(cc);
@@ -472,8 +482,8 @@ u32 bcma_pmu_get_cpu_clock(struct bcma_drv_cc *cc)
 static void bcma_pmu_spuravoid_pll_write(struct bcma_drv_cc *cc, u32 offset,
 					 u32 value)
 {
-	bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset);
-	bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, value);
+	bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR, offset);
+	bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, value);
 }
 
 void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid)
@@ -497,20 +507,20 @@ void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid)
 		       bus->chipinfo.id == BCMA_CHIP_ID_BCM53572) ? 6 : 0;
 
 		/* RMW only the P1 divider */
-		bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR,
+		bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR,
 				BCMA_CC_PMU_PLL_CTL0 + phypll_offset);
-		tmp = bcma_cc_read32(cc, BCMA_CC_PMU_PLLCTL_DATA);
+		tmp = bcma_pmu_read32(cc, BCMA_CC_PMU_PLLCTL_DATA);
 		tmp &= (~(BCMA_CC_PMU1_PLL0_PC0_P1DIV_MASK));
 		tmp |= (bcm5357_bcm43236_p1div[spuravoid] << BCMA_CC_PMU1_PLL0_PC0_P1DIV_SHIFT);
-		bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, tmp);
+		bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, tmp);
 
 		/* RMW only the int feedback divider */
-		bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR,
+		bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_ADDR,
 				BCMA_CC_PMU_PLL_CTL2 + phypll_offset);
-		tmp = bcma_cc_read32(cc, BCMA_CC_PMU_PLLCTL_DATA);
+		tmp = bcma_pmu_read32(cc, BCMA_CC_PMU_PLLCTL_DATA);
 		tmp &= ~(BCMA_CC_PMU1_PLL0_PC2_NDIV_INT_MASK);
 		tmp |= (bcm5357_bcm43236_ndiv[spuravoid]) << BCMA_CC_PMU1_PLL0_PC2_NDIV_INT_SHIFT;
-		bcma_cc_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, tmp);
+		bcma_pmu_write32(cc, BCMA_CC_PMU_PLLCTL_DATA, tmp);
 
 		tmp = BCMA_CC_PMU_CTL_PLL_UPD;
 		break;
@@ -646,7 +656,7 @@ void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid)
 		break;
 	}
 
-	tmp |= bcma_cc_read32(cc, BCMA_CC_PMU_CTL);
-	bcma_cc_write32(cc, BCMA_CC_PMU_CTL, tmp);
+	tmp |= bcma_pmu_read32(cc, BCMA_CC_PMU_CTL);
+	bcma_pmu_write32(cc, BCMA_CC_PMU_CTL, tmp);
 }
 EXPORT_SYMBOL_GPL(bcma_pmu_spuravoid_pllupdate);
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 96d8d56f240f..700d0c6f7480 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -217,6 +217,11 @@
 #define	 BCMA_CC_CLKDIV_JTAG_SHIFT	8
 #define	 BCMA_CC_CLKDIV_UART		0x000000FF
 #define BCMA_CC_CAP_EXT			0x00AC		/* Capabilities */
+#define  BCMA_CC_CAP_EXT_SECI_PRESENT	0x00000001
+#define  BCMA_CC_CAP_EXT_GSIO_PRESENT	0x00000002
+#define  BCMA_CC_CAP_EXT_GCI_PRESENT	0x00000004
+#define  BCMA_CC_CAP_EXT_SECI_PUART_PRESENT		0x00000008    /* UART present */
+#define  BCMA_CC_CAP_EXT_AOB_PRESENT	0x00000040
 #define BCMA_CC_PLLONDELAY		0x00B0		/* Rev >= 4 only */
 #define BCMA_CC_FREFSELDELAY		0x00B4		/* Rev >= 4 only */
 #define BCMA_CC_SLOWCLKCTL		0x00B8		/* 6 <= Rev <= 9 only */
@@ -566,6 +571,7 @@
  * Check availability with ((struct bcma_chipcommon)->capabilities & BCMA_CC_CAP_PMU)
  */
 struct bcma_chipcommon_pmu {
+	struct bcma_device *core;	/* Can be separated core or just ChipCommon one */
 	u8 rev;			/* PMU revision */
 	u32 crystalfreq;	/* The active crystal frequency (in kHz) */
 };
@@ -660,6 +666,19 @@ struct bcma_drv_cc_b {
 #define bcma_cc_maskset32(cc, offset, mask, set) \
 	bcma_cc_write32(cc, offset, (bcma_cc_read32(cc, offset) & (mask)) | (set))
 
+/* PMU registers access */
+#define bcma_pmu_read32(cc, offset) \
+	bcma_read32((cc)->pmu.core, offset)
+#define bcma_pmu_write32(cc, offset, val) \
+	bcma_write32((cc)->pmu.core, offset, val)
+
+#define bcma_pmu_mask32(cc, offset, mask) \
+	bcma_pmu_write32(cc, offset, bcma_pmu_read32(cc, offset) & (mask))
+#define bcma_pmu_set32(cc, offset, set) \
+	bcma_pmu_write32(cc, offset, bcma_pmu_read32(cc, offset) | (set))
+#define bcma_pmu_maskset32(cc, offset, mask, set) \
+	bcma_pmu_write32(cc, offset, (bcma_pmu_read32(cc, offset) & (mask)) | (set))
+
 extern u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks);
 
 extern u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc);
-- 
cgit v1.2.3


From 61dba73cdbba8ec5c01b31beaf9e2debc2d2f273 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sun, 24 Jan 2016 16:37:33 +0100
Subject: bcma: add support for BCM47094
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's another SoC with 32 GPIOs and simplified watchdog handling. It was
tested on D-Link DIR-885L.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/driver_chipcommon.c | 1 +
 drivers/bcma/driver_gpio.c       | 1 +
 include/linux/bcma/bcma.h        | 1 +
 3 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index bdb73d97da63..b0f44a2937b9 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -197,6 +197,7 @@ u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks)
 		struct bcma_bus *bus = cc->core->bus;
 
 		if (bus->chipinfo.id != BCMA_CHIP_ID_BCM4707 &&
+		    bus->chipinfo.id != BCMA_CHIP_ID_BCM47094 &&
 		    bus->chipinfo.id != BCMA_CHIP_ID_BCM53018)
 			bcma_core_set_clockmode(cc->core,
 						ticks ? BCMA_CLKMODE_FAST : BCMA_CLKMODE_DYNAMIC);
diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c
index 504899a72966..77b0738fbe1b 100644
--- a/drivers/bcma/driver_gpio.c
+++ b/drivers/bcma/driver_gpio.c
@@ -197,6 +197,7 @@ int bcma_gpio_init(struct bcma_drv_cc *cc)
 	case BCMA_CHIP_ID_BCM4707:
 	case BCMA_CHIP_ID_BCM5357:
 	case BCMA_CHIP_ID_BCM53572:
+	case BCMA_CHIP_ID_BCM47094:
 		chip->ngpio	= 32;
 		break;
 	default:
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 991ebb4c2015..0367c63f5960 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -201,6 +201,7 @@ struct bcma_host_ops {
 #define  BCMA_PKG_ID_BCM4707	1
 #define  BCMA_PKG_ID_BCM4708	2
 #define  BCMA_PKG_ID_BCM4709	0
+#define BCMA_CHIP_ID_BCM47094	53030
 #define BCMA_CHIP_ID_BCM53018	53018
 
 /* Board types (on PCI usually equals to the subsystem dev id) */
-- 
cgit v1.2.3


From a4b5d606b957c6a58e991de63fe999492de1ab92 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 3 Feb 2016 23:35:23 +0100
Subject: usb: core: rename mutex usb_bus_list_lock to usb_bus_idr_lock

Now that usb_bus_list has been removed and switched to idr
rename the related mutex accordingly.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/devices.c |  6 +++---
 drivers/usb/core/hcd.c     | 30 +++++++++++++++---------------
 drivers/usb/core/hub.c     |  4 ++--
 drivers/usb/mon/mon_main.c |  4 ++--
 include/linux/usb/hcd.h    |  2 +-
 5 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c
index 6118a04f0b84..ef04b50e6bbb 100644
--- a/drivers/usb/core/devices.c
+++ b/drivers/usb/core/devices.c
@@ -620,7 +620,7 @@ static ssize_t usb_device_read(struct file *file, char __user *buf,
 	if (!access_ok(VERIFY_WRITE, buf, nbytes))
 		return -EFAULT;
 
-	mutex_lock(&usb_bus_list_lock);
+	mutex_lock(&usb_bus_idr_lock);
 	/* print devices for all busses */
 	idr_for_each_entry(&usb_bus_idr, bus, id) {
 		/* recurse through all children of the root hub */
@@ -631,12 +631,12 @@ static ssize_t usb_device_read(struct file *file, char __user *buf,
 				      bus->root_hub, bus, 0, 0, 0);
 		usb_unlock_device(bus->root_hub);
 		if (ret < 0) {
-			mutex_unlock(&usb_bus_list_lock);
+			mutex_unlock(&usb_bus_idr_lock);
 			return ret;
 		}
 		total_written += ret;
 	}
-	mutex_unlock(&usb_bus_list_lock);
+	mutex_unlock(&usb_bus_idr_lock);
 	return total_written;
 }
 
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index cf3eb22dbeb4..0b8a91004737 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -97,8 +97,8 @@ EXPORT_SYMBOL_GPL (usb_bus_idr);
 #define USB_MAXBUS		64
 
 /* used when updating list of hcds */
-DEFINE_MUTEX(usb_bus_list_lock);	/* exported only for usbfs */
-EXPORT_SYMBOL_GPL (usb_bus_list_lock);
+DEFINE_MUTEX(usb_bus_idr_lock);	/* exported only for usbfs */
+EXPORT_SYMBOL_GPL (usb_bus_idr_lock);
 
 /* used for controlling access to virtual root hubs */
 static DEFINE_SPINLOCK(hcd_root_hub_lock);
@@ -1014,14 +1014,14 @@ static int usb_register_bus(struct usb_bus *bus)
 	int result = -E2BIG;
 	int busnum;
 
-	mutex_lock(&usb_bus_list_lock);
+	mutex_lock(&usb_bus_idr_lock);
 	busnum = idr_alloc(&usb_bus_idr, bus, 1, USB_MAXBUS, GFP_KERNEL);
 	if (busnum < 0) {
 		pr_err("%s: failed to get bus number\n", usbcore_name);
 		goto error_find_busnum;
 	}
 	bus->busnum = busnum;
-	mutex_unlock(&usb_bus_list_lock);
+	mutex_unlock(&usb_bus_idr_lock);
 
 	usb_notify_add_bus(bus);
 
@@ -1030,7 +1030,7 @@ static int usb_register_bus(struct usb_bus *bus)
 	return 0;
 
 error_find_busnum:
-	mutex_unlock(&usb_bus_list_lock);
+	mutex_unlock(&usb_bus_idr_lock);
 	return result;
 }
 
@@ -1051,9 +1051,9 @@ static void usb_deregister_bus (struct usb_bus *bus)
 	 * controller code, as well as having it call this when cleaning
 	 * itself up
 	 */
-	mutex_lock(&usb_bus_list_lock);
+	mutex_lock(&usb_bus_idr_lock);
 	idr_remove(&usb_bus_idr, bus->busnum);
-	mutex_unlock(&usb_bus_list_lock);
+	mutex_unlock(&usb_bus_idr_lock);
 
 	usb_notify_remove_bus(bus);
 }
@@ -1083,12 +1083,12 @@ static int register_root_hub(struct usb_hcd *hcd)
 	set_bit (devnum, usb_dev->bus->devmap.devicemap);
 	usb_set_device_state(usb_dev, USB_STATE_ADDRESS);
 
-	mutex_lock(&usb_bus_list_lock);
+	mutex_lock(&usb_bus_idr_lock);
 
 	usb_dev->ep0.desc.wMaxPacketSize = cpu_to_le16(64);
 	retval = usb_get_device_descriptor(usb_dev, USB_DT_DEVICE_SIZE);
 	if (retval != sizeof usb_dev->descriptor) {
-		mutex_unlock(&usb_bus_list_lock);
+		mutex_unlock(&usb_bus_idr_lock);
 		dev_dbg (parent_dev, "can't read %s device descriptor %d\n",
 				dev_name(&usb_dev->dev), retval);
 		return (retval < 0) ? retval : -EMSGSIZE;
@@ -1099,7 +1099,7 @@ static int register_root_hub(struct usb_hcd *hcd)
 		if (!retval) {
 			usb_dev->lpm_capable = usb_device_supports_lpm(usb_dev);
 		} else if (usb_dev->speed >= USB_SPEED_SUPER) {
-			mutex_unlock(&usb_bus_list_lock);
+			mutex_unlock(&usb_bus_idr_lock);
 			dev_dbg(parent_dev, "can't read %s bos descriptor %d\n",
 					dev_name(&usb_dev->dev), retval);
 			return retval;
@@ -1119,7 +1119,7 @@ static int register_root_hub(struct usb_hcd *hcd)
 		if (HCD_DEAD(hcd))
 			usb_hc_died (hcd);	/* This time clean up */
 	}
-	mutex_unlock(&usb_bus_list_lock);
+	mutex_unlock(&usb_bus_idr_lock);
 
 	return retval;
 }
@@ -2885,9 +2885,9 @@ error_create_attr_group:
 #ifdef CONFIG_PM
 	cancel_work_sync(&hcd->wakeup_work);
 #endif
-	mutex_lock(&usb_bus_list_lock);
+	mutex_lock(&usb_bus_idr_lock);
 	usb_disconnect(&rhdev);		/* Sets rhdev to NULL */
-	mutex_unlock(&usb_bus_list_lock);
+	mutex_unlock(&usb_bus_idr_lock);
 err_register_root_hub:
 	hcd->rh_pollable = 0;
 	clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);
@@ -2954,9 +2954,9 @@ void usb_remove_hcd(struct usb_hcd *hcd)
 	cancel_work_sync(&hcd->wakeup_work);
 #endif
 
-	mutex_lock(&usb_bus_list_lock);
+	mutex_lock(&usb_bus_idr_lock);
 	usb_disconnect(&rhdev);		/* Sets rhdev to NULL */
-	mutex_unlock(&usb_bus_list_lock);
+	mutex_unlock(&usb_bus_idr_lock);
 
 	/*
 	 * tasklet_kill() isn't needed here because:
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index f912fe6bbc0b..3d46d03a785b 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2145,7 +2145,7 @@ static void hub_disconnect_children(struct usb_device *udev)
  * Something got disconnected. Get rid of it and all of its children.
  *
  * If *pdev is a normal device then the parent hub must already be locked.
- * If *pdev is a root hub then the caller must hold the usb_bus_list_lock,
+ * If *pdev is a root hub then the caller must hold the usb_bus_idr_lock,
  * which protects the set of root hubs as well as the list of buses.
  *
  * Only hub drivers (including virtual root hub drivers for host
@@ -2443,7 +2443,7 @@ static void set_usb_port_removable(struct usb_device *udev)
  * enumerated.  The device descriptor is available, but not descriptors
  * for any device configuration.  The caller must have locked either
  * the parent hub (if udev is a normal device) or else the
- * usb_bus_list_lock (if udev is a root hub).  The parent's pointer to
+ * usb_bus_idr_lock (if udev is a root hub).  The parent's pointer to
  * udev has already been installed, but udev is not yet visible through
  * sysfs or other filesystem code.
  *
diff --git a/drivers/usb/mon/mon_main.c b/drivers/usb/mon/mon_main.c
index 9b87efb0e50d..33ff49c4cea4 100644
--- a/drivers/usb/mon/mon_main.c
+++ b/drivers/usb/mon/mon_main.c
@@ -365,11 +365,11 @@ static int __init mon_init(void)
 	}
 	// MOD_INC_USE_COUNT(which_module?);
 
-	mutex_lock(&usb_bus_list_lock);
+	mutex_lock(&usb_bus_idr_lock);
 	idr_for_each_entry(&usb_bus_idr, ubus, id)
 		mon_bus_init(ubus);
 	usb_register_notify(&mon_nb);
-	mutex_unlock(&usb_bus_list_lock);
+	mutex_unlock(&usb_bus_idr_lock);
 	return 0;
 
 err_reg:
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index c293dd044599..b98f831dcda3 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -632,7 +632,7 @@ extern void usb_set_device_state(struct usb_device *udev,
 /* exported only within usbcore */
 
 extern struct idr usb_bus_idr;
-extern struct mutex usb_bus_list_lock;
+extern struct mutex usb_bus_idr_lock;
 extern wait_queue_head_t usb_kill_urb_queue;
 
 
-- 
cgit v1.2.3


From 2eaa790989e03900298ad24f77f1086dbbc1aebd Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 16 Jan 2016 15:23:39 -0800
Subject: earlycon: Use common framework for earlycon declarations

Use a single common table of struct earlycon_id for both command line
and devicetree. Re-define OF_EARLYCON_DECLARE() macro to instance a
unique earlycon declaration (the declaration is only guaranteed to be
unique within a compilation unit; separate compilation units must still
use unique earlycon names).

The semantics of OF_EARLYCON_DECLARE() is different; it declares an
earlycon which can matched either on the command line or by devicetree.
EARLYCON_DECLARE() is semantically unchanged; it declares an earlycon
which is matched by command line only. Remove redundant instances of
EARLYCON_DECLARE().

This enables all earlycons to properly initialize struct console
with the appropriate name and index, which improves diagnostics and
enables direct earlycon-to-console handoff.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/of/fdt.c                  | 14 +++++++-------
 drivers/tty/serial/amba-pl011.c   |  1 -
 drivers/tty/serial/arc_uart.c     |  1 -
 drivers/tty/serial/earlycon.c     | 10 +---------
 drivers/tty/serial/msm_serial.c   |  2 --
 drivers/tty/serial/samsung.c      |  6 ------
 drivers/tty/serial/sprd_serial.c  |  2 --
 include/asm-generic/vmlinux.lds.h |  6 ++----
 include/linux/serial_core.h       | 22 +++++++++++++---------
 9 files changed, 23 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 655f79db7899..168611867611 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -796,14 +796,13 @@ static inline void early_init_dt_check_for_initrd(unsigned long node)
 #endif /* CONFIG_BLK_DEV_INITRD */
 
 #ifdef CONFIG_SERIAL_EARLYCON
-extern struct of_device_id __earlycon_of_table[];
 
 static int __init early_init_dt_scan_chosen_serial(void)
 {
 	int offset;
 	const char *p;
 	int l;
-	const struct of_device_id *match = __earlycon_of_table;
+	const struct earlycon_id *match;
 	const void *fdt = initial_boot_params;
 
 	offset = fdt_path_offset(fdt, "/chosen");
@@ -826,19 +825,20 @@ static int __init early_init_dt_scan_chosen_serial(void)
 	if (offset < 0)
 		return -ENODEV;
 
-	while (match->compatible[0]) {
+	for (match = __earlycon_table; match < __earlycon_table_end; match++) {
 		u64 addr;
 
-		if (fdt_node_check_compatible(fdt, offset, match->compatible)) {
-			match++;
+		if (!match->compatible[0])
+			continue;
+
+		if (fdt_node_check_compatible(fdt, offset, match->compatible))
 			continue;
-		}
 
 		addr = fdt_translate_address(fdt, offset);
 		if (addr == OF_BAD_ADDR)
 			return -ENXIO;
 
-		of_setup_earlycon(addr, match->data);
+		of_setup_earlycon(addr, match->setup);
 		return 0;
 	}
 	return -ENODEV;
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index c0da0ccbbcf5..de846027ba47 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2327,7 +2327,6 @@ static int __init pl011_early_console_setup(struct earlycon_device *device,
 	device->con->write = pl011_early_write;
 	return 0;
 }
-EARLYCON_DECLARE(pl011, pl011_early_console_setup);
 OF_EARLYCON_DECLARE(pl011, "arm,pl011", pl011_early_console_setup);
 
 #else
diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c
index 03ebe401fff7..3a1de5c87cb4 100644
--- a/drivers/tty/serial/arc_uart.c
+++ b/drivers/tty/serial/arc_uart.c
@@ -576,7 +576,6 @@ static int __init arc_early_console_setup(struct earlycon_device *dev,
 	dev->con->write = arc_early_serial_write;
 	return 0;
 }
-EARLYCON_DECLARE(arc_uart, arc_early_console_setup);
 OF_EARLYCON_DECLARE(arc_uart, "snps,arc-uart", arc_early_console_setup);
 
 #endif	/* CONFIG_SERIAL_ARC_CONSOLE */
diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
index 54419a210dc3..d50b70053418 100644
--- a/drivers/tty/serial/earlycon.c
+++ b/drivers/tty/serial/earlycon.c
@@ -19,7 +19,6 @@
 #include <linux/io.h>
 #include <linux/serial_core.h>
 #include <linux/sizes.h>
-#include <linux/mod_devicetable.h>
 
 #ifdef CONFIG_FIX_EARLYCON_MEM
 #include <asm/fixmap.h>
@@ -37,13 +36,6 @@ static struct earlycon_device early_console_dev = {
 	.con = &early_con,
 };
 
-extern struct earlycon_id __earlycon_table[];
-static const struct earlycon_id __earlycon_table_sentinel
-	__used __section(__earlycon_table_end);
-
-static const struct of_device_id __earlycon_of_table_sentinel
-	__used __section(__earlycon_of_table_end);
-
 static void __iomem * __init earlycon_map(unsigned long paddr, size_t size)
 {
 	void __iomem *base;
@@ -166,7 +158,7 @@ int __init setup_earlycon(char *buf)
 	if (early_con.flags & CON_ENABLED)
 		return -EALREADY;
 
-	for (match = __earlycon_table; match->name[0]; match++) {
+	for (match = __earlycon_table; match < __earlycon_table_end; match++) {
 		size_t len = strlen(match->name);
 
 		if (strncmp(buf, match->name, len))
diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c
index dcde955475dc..96d3ce8dc2dc 100644
--- a/drivers/tty/serial/msm_serial.c
+++ b/drivers/tty/serial/msm_serial.c
@@ -1478,7 +1478,6 @@ msm_serial_early_console_setup(struct earlycon_device *device, const char *opt)
 	device->con->write = msm_serial_early_write;
 	return 0;
 }
-EARLYCON_DECLARE(msm_serial, msm_serial_early_console_setup);
 OF_EARLYCON_DECLARE(msm_serial, "qcom,msm-uart",
 		    msm_serial_early_console_setup);
 
@@ -1500,7 +1499,6 @@ msm_serial_early_console_setup_dm(struct earlycon_device *device,
 	device->con->write = msm_serial_early_write_dm;
 	return 0;
 }
-EARLYCON_DECLARE(msm_serial_dm, msm_serial_early_console_setup_dm);
 OF_EARLYCON_DECLARE(msm_serial_dm, "qcom,msm-uartdm",
 		    msm_serial_early_console_setup_dm);
 
diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
index d72cd736bdc6..fd9c47f2f29f 100644
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -2451,7 +2451,6 @@ static int __init s3c2410_early_console_setup(struct earlycon_device *device,
 }
 OF_EARLYCON_DECLARE(s3c2410, "samsung,s3c2410-uart",
 			s3c2410_early_console_setup);
-EARLYCON_DECLARE(s3c2410, s3c2410_early_console_setup);
 
 /* S3C2412, S3C2440, S3C64xx */
 static struct samsung_early_console_data s3c2440_early_console_data = {
@@ -2470,9 +2469,6 @@ OF_EARLYCON_DECLARE(s3c2440, "samsung,s3c2440-uart",
 			s3c2440_early_console_setup);
 OF_EARLYCON_DECLARE(s3c6400, "samsung,s3c6400-uart",
 			s3c2440_early_console_setup);
-EARLYCON_DECLARE(s3c2412, s3c2440_early_console_setup);
-EARLYCON_DECLARE(s3c2440, s3c2440_early_console_setup);
-EARLYCON_DECLARE(s3c6400, s3c2440_early_console_setup);
 
 /* S5PV210, EXYNOS */
 static struct samsung_early_console_data s5pv210_early_console_data = {
@@ -2489,8 +2485,6 @@ OF_EARLYCON_DECLARE(s5pv210, "samsung,s5pv210-uart",
 			s5pv210_early_console_setup);
 OF_EARLYCON_DECLARE(exynos4210, "samsung,exynos4210-uart",
 			s5pv210_early_console_setup);
-EARLYCON_DECLARE(s5pv210, s5pv210_early_console_setup);
-EARLYCON_DECLARE(exynos4210, s5pv210_early_console_setup);
 #endif
 
 MODULE_ALIAS("platform:samsung-uart");
diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c
index ef26c4a60be4..18971063f95f 100644
--- a/drivers/tty/serial/sprd_serial.c
+++ b/drivers/tty/serial/sprd_serial.c
@@ -624,8 +624,6 @@ static int __init sprd_early_console_setup(
 	device->con->write = sprd_early_write;
 	return 0;
 }
-
-EARLYCON_DECLARE(sprd_serial, sprd_early_console_setup);
 OF_EARLYCON_DECLARE(sprd_serial, "sprd,sc9836-uart",
 		    sprd_early_console_setup);
 
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c4bd0e2c173c..e9a81a6a109f 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -157,7 +157,7 @@
 #define EARLYCON_TABLE() STRUCT_ALIGN();			\
 			 VMLINUX_SYMBOL(__earlycon_table) = .;	\
 			 *(__earlycon_table)			\
-			 *(__earlycon_table_end)
+			 VMLINUX_SYMBOL(__earlycon_table_end) = .;
 #else
 #define EARLYCON_TABLE()
 #endif
@@ -179,7 +179,6 @@
 #define RESERVEDMEM_OF_TABLES()	OF_TABLE(CONFIG_OF_RESERVED_MEM, reservedmem)
 #define CPU_METHOD_OF_TABLES()	OF_TABLE(CONFIG_SMP, cpu_method)
 #define CPUIDLE_METHOD_OF_TABLES() OF_TABLE(CONFIG_CPU_IDLE, cpuidle_method)
-#define EARLYCON_OF_TABLES()	OF_TABLE(CONFIG_SERIAL_EARLYCON, earlycon)
 
 #ifdef CONFIG_ACPI
 #define ACPI_PROBE_TABLE(name)						\
@@ -526,8 +525,7 @@
 	IRQCHIP_OF_MATCH_TABLE()					\
 	ACPI_PROBE_TABLE(irqchip)					\
 	ACPI_PROBE_TABLE(clksrc)					\
-	EARLYCON_TABLE()						\
-	EARLYCON_OF_TABLES()
+	EARLYCON_TABLE()
 
 #define INIT_TEXT							\
 	*(.init.text)							\
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index e03d6ba5e5b4..25e05147f379 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -342,22 +342,26 @@ struct earlycon_device {
 
 struct earlycon_id {
 	char	name[16];
+	char	compatible[128];
 	int	(*setup)(struct earlycon_device *, const char *options);
 } __aligned(32);
 
+extern const struct earlycon_id __earlycon_table[];
+extern const struct earlycon_id __earlycon_table_end[];
+
+#define OF_EARLYCON_DECLARE(_name, compat, fn)				\
+	static const struct earlycon_id __UNIQUE_ID(__earlycon_##_name)	\
+	     __used __section(__earlycon_table)				\
+		= { .name = __stringify(_name),				\
+		    .compatible = compat,				\
+		    .setup = fn  }
+
+#define EARLYCON_DECLARE(_name, fn)	OF_EARLYCON_DECLARE(_name, "", fn)
+
 extern int setup_earlycon(char *buf);
 extern int of_setup_earlycon(unsigned long addr,
 			     int (*setup)(struct earlycon_device *, const char *));
 
-#define EARLYCON_DECLARE(_name, func)					\
-	static const struct earlycon_id __earlycon_##_name		\
-		__used __section(__earlycon_table)			\
-		 = { .name  = __stringify(_name),			\
-		     .setup = func  }
-
-#define OF_EARLYCON_DECLARE(name, compat, fn)				\
-	_OF_DECLARE(earlycon, name, compat, fn, void *)
-
 struct uart_port *uart_get_console(struct uart_port *ports, int nr,
 				   struct console *c);
 int uart_parse_earlycon(char *p, unsigned char *iotype, unsigned long *addr,
-- 
cgit v1.2.3


From 05d961320ba624c98b16d72b32f947307674b341 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 16 Jan 2016 15:23:41 -0800
Subject: of: earlycon: Fixup earlycon console name and index

Use the console name embedded in the OF earlycon table by the
OF_EARLYCON_DECLARE() macro to initialize the struct console 'name'
and 'index' fields.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/of/fdt.c              | 2 +-
 drivers/tty/serial/earlycon.c | 6 +++---
 include/linux/serial_core.h   | 3 +--
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 168611867611..ec1459517de6 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -838,7 +838,7 @@ static int __init early_init_dt_scan_chosen_serial(void)
 		if (addr == OF_BAD_ADDR)
 			return -ENXIO;
 
-		of_setup_earlycon(addr, match->setup);
+		of_setup_earlycon(addr, match);
 		return 0;
 	}
 	return -ENODEV;
diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
index 90b064faa1c4..47e54deb944b 100644
--- a/drivers/tty/serial/earlycon.c
+++ b/drivers/tty/serial/earlycon.c
@@ -218,7 +218,7 @@ early_param("earlycon", param_setup_earlycon);
 #ifdef CONFIG_OF_EARLY_FLATTREE
 
 int __init of_setup_earlycon(unsigned long addr,
-			     int (*setup)(struct earlycon_device *, const char *))
+			     const struct earlycon_id *match)
 {
 	int err;
 	struct uart_port *port = &early_console_dev.port;
@@ -229,8 +229,8 @@ int __init of_setup_earlycon(unsigned long addr,
 	port->uartclk = BASE_BAUD * 16;
 	port->membase = earlycon_map(addr, SZ_4K);
 
-	early_console_dev.con->data = &early_console_dev;
-	err = setup(&early_console_dev, NULL);
+	earlycon_init(&early_console_dev, match->name);
+	err = match->setup(&early_console_dev, NULL);
 	if (err < 0)
 		return err;
 	if (!early_console_dev.con->write)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 25e05147f379..63fdb55e4c9d 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -359,8 +359,7 @@ extern const struct earlycon_id __earlycon_table_end[];
 #define EARLYCON_DECLARE(_name, fn)	OF_EARLYCON_DECLARE(_name, "", fn)
 
 extern int setup_earlycon(char *buf);
-extern int of_setup_earlycon(unsigned long addr,
-			     int (*setup)(struct earlycon_device *, const char *));
+extern int of_setup_earlycon(unsigned long addr, const struct earlycon_id *match);
 
 struct uart_port *uart_get_console(struct uart_port *ports, int nr,
 				   struct console *c);
-- 
cgit v1.2.3


From 4d118c9a866590850dad8689262a95345d2c6e09 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 16 Jan 2016 15:23:42 -0800
Subject: of: earlycon: Add options string handling

Pass-through any options string in the 'stdout-path' property to the
earlycon "driver" setup.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/of/fdt.c              | 11 ++++++-----
 drivers/tty/serial/earlycon.c |  9 +++++++--
 include/linux/serial_core.h   |  3 ++-
 3 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index ec1459517de6..cfd3b35e8d81 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -800,7 +800,7 @@ static inline void early_init_dt_check_for_initrd(unsigned long node)
 static int __init early_init_dt_scan_chosen_serial(void)
 {
 	int offset;
-	const char *p;
+	const char *p, *q, *options = NULL;
 	int l;
 	const struct earlycon_id *match;
 	const void *fdt = initial_boot_params;
@@ -817,11 +817,12 @@ static int __init early_init_dt_scan_chosen_serial(void)
 	if (!p || !l)
 		return -ENOENT;
 
-	/* Remove console options if present */
-	l = strchrnul(p, ':') - p;
+	q = strchrnul(p, ':');
+	if (*q != '\0')
+		options = q + 1;
 
 	/* Get the node specified by stdout-path */
-	offset = fdt_path_offset_namelen(fdt, p, l);
+	offset = fdt_path_offset_namelen(fdt, p, q - p);
 	if (offset < 0)
 		return -ENODEV;
 
@@ -838,7 +839,7 @@ static int __init early_init_dt_scan_chosen_serial(void)
 		if (addr == OF_BAD_ADDR)
 			return -ENXIO;
 
-		of_setup_earlycon(addr, match);
+		of_setup_earlycon(addr, match, options);
 		return 0;
 	}
 	return -ENODEV;
diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
index 47e54deb944b..7509ee34de28 100644
--- a/drivers/tty/serial/earlycon.c
+++ b/drivers/tty/serial/earlycon.c
@@ -218,7 +218,8 @@ early_param("earlycon", param_setup_earlycon);
 #ifdef CONFIG_OF_EARLY_FLATTREE
 
 int __init of_setup_earlycon(unsigned long addr,
-			     const struct earlycon_id *match)
+			     const struct earlycon_id *match,
+			     const char *options)
 {
 	int err;
 	struct uart_port *port = &early_console_dev.port;
@@ -229,8 +230,12 @@ int __init of_setup_earlycon(unsigned long addr,
 	port->uartclk = BASE_BAUD * 16;
 	port->membase = earlycon_map(addr, SZ_4K);
 
+	if (options) {
+		strlcpy(early_console_dev.options, options,
+			sizeof(early_console_dev.options));
+	}
 	earlycon_init(&early_console_dev, match->name);
-	err = match->setup(&early_console_dev, NULL);
+	err = match->setup(&early_console_dev, options);
 	if (err < 0)
 		return err;
 	if (!early_console_dev.con->write)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 63fdb55e4c9d..62a4df05eaca 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -359,7 +359,8 @@ extern const struct earlycon_id __earlycon_table_end[];
 #define EARLYCON_DECLARE(_name, fn)	OF_EARLYCON_DECLARE(_name, "", fn)
 
 extern int setup_earlycon(char *buf);
-extern int of_setup_earlycon(unsigned long addr, const struct earlycon_id *match);
+extern int of_setup_earlycon(unsigned long addr, const struct earlycon_id *match,
+			     const char *options);
 
 struct uart_port *uart_get_console(struct uart_port *ports, int nr,
 				   struct console *c);
-- 
cgit v1.2.3


From 088da2a17619cf0113b62a76ad38c6a14470ffa6 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 16 Jan 2016 15:23:43 -0800
Subject: of: earlycon: Initialize port fields from DT properties

Read the optional "reg-offset", "reg-shift", "reg-io-width" and endianness
properties and initialize the respective struct uart_port field if found.

NB: These bindings are common to several drivers and the values merely
indicate the default value; the registering earlycon setup() method can
simply override the values if required.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/of/fdt.c              |  2 +-
 drivers/tty/serial/earlycon.c | 31 +++++++++++++++++++++++++++++++
 include/linux/serial_core.h   |  1 +
 3 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index cfd3b35e8d81..e8fd54a30802 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -839,7 +839,7 @@ static int __init early_init_dt_scan_chosen_serial(void)
 		if (addr == OF_BAD_ADDR)
 			return -ENXIO;
 
-		of_setup_earlycon(addr, match, options);
+		of_setup_earlycon(addr, match, offset, options);
 		return 0;
 	}
 	return -ENODEV;
diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
index 7509ee34de28..7089667bde93 100644
--- a/drivers/tty/serial/earlycon.c
+++ b/drivers/tty/serial/earlycon.c
@@ -19,6 +19,7 @@
 #include <linux/io.h>
 #include <linux/serial_core.h>
 #include <linux/sizes.h>
+#include <linux/of_fdt.h>
 
 #ifdef CONFIG_FIX_EARLYCON_MEM
 #include <asm/fixmap.h>
@@ -219,10 +220,13 @@ early_param("earlycon", param_setup_earlycon);
 
 int __init of_setup_earlycon(unsigned long addr,
 			     const struct earlycon_id *match,
+			     unsigned long node,
 			     const char *options)
 {
 	int err;
 	struct uart_port *port = &early_console_dev.port;
+	const __be32 *val;
+	bool big_endian;
 
 	spin_lock_init(&port->lock);
 	port->iotype = UPIO_MEM;
@@ -230,6 +234,33 @@ int __init of_setup_earlycon(unsigned long addr,
 	port->uartclk = BASE_BAUD * 16;
 	port->membase = earlycon_map(addr, SZ_4K);
 
+	val = of_get_flat_dt_prop(node, "reg-offset", NULL);
+	if (val)
+		port->mapbase += be32_to_cpu(*val);
+	val = of_get_flat_dt_prop(node, "reg-shift", NULL);
+	if (val)
+		port->regshift = be32_to_cpu(*val);
+	big_endian = of_get_flat_dt_prop(node, "big-endian", NULL) != NULL ||
+		(IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) &&
+		 of_get_flat_dt_prop(node, "native-endian", NULL) != NULL);
+	val = of_get_flat_dt_prop(node, "reg-io-width", NULL);
+	if (val) {
+		switch (be32_to_cpu(*val)) {
+		case 1:
+			port->iotype = UPIO_MEM;
+			break;
+		case 2:
+			port->iotype = UPIO_MEM16;
+			break;
+		case 4:
+			port->iotype = (big_endian) ? UPIO_MEM32BE : UPIO_MEM32;
+			break;
+		default:
+			pr_warn("[%s] unsupported reg-io-width\n", match->name);
+			return -EINVAL;
+		}
+	}
+
 	if (options) {
 		strlcpy(early_console_dev.options, options,
 			sizeof(early_console_dev.options));
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 62a4df05eaca..6d1bed821181 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -360,6 +360,7 @@ extern const struct earlycon_id __earlycon_table_end[];
 
 extern int setup_earlycon(char *buf);
 extern int of_setup_earlycon(unsigned long addr, const struct earlycon_id *match,
+			     unsigned long node,
 			     const char *options);
 
 struct uart_port *uart_get_console(struct uart_port *ports, int nr,
-- 
cgit v1.2.3


From c90fe9c0394b068ceca16f66e9f35bcd8d948295 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 16 Jan 2016 15:23:44 -0800
Subject: of: earlycon: Move address translation to of_setup_earlycon()

Cleanup the early DT/earlycon separation; remove the 'addr' parameter
from of_setup_earlycon() and get the uart phys addr directly with a
new wrapper function, of_flat_dt_translate_addr(). Limit
fdt_translate_address() to file scope.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/of/fdt.c              |  8 +-------
 drivers/of/fdt_address.c      | 11 ++++++++++-
 drivers/tty/serial/earlycon.c | 12 +++++++++---
 include/linux/of_fdt.h        |  2 +-
 include/linux/serial_core.h   |  2 +-
 5 files changed, 22 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index e8fd54a30802..918809e6f913 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -827,19 +827,13 @@ static int __init early_init_dt_scan_chosen_serial(void)
 		return -ENODEV;
 
 	for (match = __earlycon_table; match < __earlycon_table_end; match++) {
-		u64 addr;
-
 		if (!match->compatible[0])
 			continue;
 
 		if (fdt_node_check_compatible(fdt, offset, match->compatible))
 			continue;
 
-		addr = fdt_translate_address(fdt, offset);
-		if (addr == OF_BAD_ADDR)
-			return -ENXIO;
-
-		of_setup_earlycon(addr, match, offset, options);
+		of_setup_earlycon(match, offset, options);
 		return 0;
 	}
 	return -ENODEV;
diff --git a/drivers/of/fdt_address.c b/drivers/of/fdt_address.c
index 8d3dc6fbdb7a..dca8f9b93745 100644
--- a/drivers/of/fdt_address.c
+++ b/drivers/of/fdt_address.c
@@ -161,7 +161,7 @@ static int __init fdt_translate_one(const void *blob, int parent,
  * that can be mapped to a cpu physical address). This is not really specified
  * that way, but this is traditionally the way IBM at least do things
  */
-u64 __init fdt_translate_address(const void *blob, int node_offset)
+static u64 __init fdt_translate_address(const void *blob, int node_offset)
 {
 	int parent, len;
 	const struct of_bus *bus, *pbus;
@@ -239,3 +239,12 @@ u64 __init fdt_translate_address(const void *blob, int node_offset)
  bail:
 	return result;
 }
+
+/**
+ * of_flat_dt_translate_address - translate DT addr into CPU phys addr
+ * @node: node in the flat blob
+ */
+u64 __init of_flat_dt_translate_address(unsigned long node)
+{
+	return fdt_translate_address(initial_boot_params, node);
+}
diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
index 7089667bde93..baee9ad59af7 100644
--- a/drivers/tty/serial/earlycon.c
+++ b/drivers/tty/serial/earlycon.c
@@ -19,6 +19,7 @@
 #include <linux/io.h>
 #include <linux/serial_core.h>
 #include <linux/sizes.h>
+#include <linux/of.h>
 #include <linux/of_fdt.h>
 
 #ifdef CONFIG_FIX_EARLYCON_MEM
@@ -218,8 +219,7 @@ early_param("earlycon", param_setup_earlycon);
 
 #ifdef CONFIG_OF_EARLY_FLATTREE
 
-int __init of_setup_earlycon(unsigned long addr,
-			     const struct earlycon_id *match,
+int __init of_setup_earlycon(const struct earlycon_id *match,
 			     unsigned long node,
 			     const char *options)
 {
@@ -227,12 +227,18 @@ int __init of_setup_earlycon(unsigned long addr,
 	struct uart_port *port = &early_console_dev.port;
 	const __be32 *val;
 	bool big_endian;
+	u64 addr;
 
 	spin_lock_init(&port->lock);
 	port->iotype = UPIO_MEM;
+	addr = of_flat_dt_translate_address(node);
+	if (addr == OF_BAD_ADDR) {
+		pr_warn("[%s] bad address\n", match->name);
+		return -ENXIO;
+	}
 	port->mapbase = addr;
 	port->uartclk = BASE_BAUD * 16;
-	port->membase = earlycon_map(addr, SZ_4K);
+	port->membase = earlycon_map(port->mapbase, SZ_4K);
 
 	val = of_get_flat_dt_prop(node, "reg-offset", NULL);
 	if (val)
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index df9ef3801812..2fbe8682a66f 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -88,7 +88,7 @@ extern void unflatten_device_tree(void);
 extern void unflatten_and_copy_device_tree(void);
 extern void early_init_devtree(void *);
 extern void early_get_first_memblock_info(void *, phys_addr_t *);
-extern u64 fdt_translate_address(const void *blob, int node_offset);
+extern u64 of_flat_dt_translate_address(unsigned long node);
 extern void of_fdt_limit_memory(int limit);
 #else /* CONFIG_OF_FLATTREE */
 static inline void early_init_fdt_scan_reserved_mem(void) {}
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 6d1bed821181..cbfcf38e220d 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -359,7 +359,7 @@ extern const struct earlycon_id __earlycon_table_end[];
 #define EARLYCON_DECLARE(_name, fn)	OF_EARLYCON_DECLARE(_name, "", fn)
 
 extern int setup_earlycon(char *buf);
-extern int of_setup_earlycon(unsigned long addr, const struct earlycon_id *match,
+extern int of_setup_earlycon(const struct earlycon_id *match,
 			     unsigned long node,
 			     const char *options);
 
-- 
cgit v1.2.3


From e490c9144cfaa8e2242c1e5d5187230928f27417 Mon Sep 17 00:00:00 2001
From: "Matwey V. Kornilov" <matwey@sai.msu.ru>
Date: Mon, 1 Feb 2016 21:09:21 +0300
Subject: tty: Add software emulated RS485 support for 8250

Implementation of software emulation of RS485 direction handling is based
on omap_serial driver.
Before and after transmission RTS is set to the appropriate value.

Note that before calling serial8250_em485_init() the caller has to
ensure that UART will interrupt when shift register empty. Otherwise,
emultaion cannot be used.

Both serial8250_em485_init() and serial8250_em485_destroy() are
idempotent functions.

Signed-off-by: Matwey V. Kornilov <matwey@sai.msu.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250.h      |   2 +
 drivers/tty/serial/8250/8250_port.c | 223 +++++++++++++++++++++++++++++++++++-
 include/linux/serial_8250.h         |   8 ++
 3 files changed, 229 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index d54dcd87c67e..ce587c03efc3 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -117,6 +117,8 @@ static inline void serial_dl_write(struct uart_8250_port *up, int value)
 struct uart_8250_port *serial8250_get_port(int line);
 void serial8250_rpm_get(struct uart_8250_port *p);
 void serial8250_rpm_put(struct uart_8250_port *p);
+int serial8250_em485_init(struct uart_8250_port *p);
+void serial8250_em485_destroy(struct uart_8250_port *p);
 
 #if defined(__alpha__) && !defined(CONFIG_PCI)
 /*
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 04681e5745de..01a85a7e7427 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -37,6 +37,7 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/pm_runtime.h>
+#include <linux/timer.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -522,6 +523,20 @@ static void serial8250_clear_fifos(struct uart_8250_port *p)
 	}
 }
 
+static inline void serial8250_em485_rts_after_send(struct uart_8250_port *p)
+{
+	unsigned char mcr = serial_in(p, UART_MCR);
+
+	if (p->port.rs485.flags & SER_RS485_RTS_AFTER_SEND)
+		mcr |= UART_MCR_RTS;
+	else
+		mcr &= ~UART_MCR_RTS;
+	serial_out(p, UART_MCR, mcr);
+}
+
+static void serial8250_em485_handle_start_tx(unsigned long arg);
+static void serial8250_em485_handle_stop_tx(unsigned long arg);
+
 void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p)
 {
 	serial8250_clear_fifos(p);
@@ -546,6 +561,73 @@ void serial8250_rpm_put(struct uart_8250_port *p)
 }
 EXPORT_SYMBOL_GPL(serial8250_rpm_put);
 
+/**
+ *	serial8250_em485_init() - put uart_8250_port into rs485 emulating
+ *	@p:	uart_8250_port port instance
+ *
+ *	The function is used to start rs485 software emulating on the
+ *	&struct uart_8250_port* @p. Namely, RTS is switched before/after
+ *	transmission. The function is idempotent, so it is safe to call it
+ *	multiple times.
+ *
+ *	The caller MUST enable interrupt on empty shift register before
+ *	calling serial8250_em485_init(). This interrupt is not a part of
+ *	8250 standard, but implementation defined.
+ *
+ *	The function is supposed to be called from .rs485_config callback
+ *	or from any other callback protected with p->port.lock spinlock.
+ *
+ *	See also serial8250_em485_destroy()
+ *
+ *	Return 0 - success, -errno - otherwise
+ */
+int serial8250_em485_init(struct uart_8250_port *p)
+{
+	if (p->em485 != NULL)
+		return 0;
+
+	p->em485 = kmalloc(sizeof(struct uart_8250_em485), GFP_KERNEL);
+	if (p->em485 == NULL)
+		return -ENOMEM;
+
+	setup_timer(&p->em485->stop_tx_timer,
+		serial8250_em485_handle_stop_tx, (unsigned long)p);
+	setup_timer(&p->em485->start_tx_timer,
+		serial8250_em485_handle_start_tx, (unsigned long)p);
+	p->em485->active_timer = NULL;
+
+	serial8250_em485_rts_after_send(p);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(serial8250_em485_init);
+
+/**
+ *	serial8250_em485_destroy() - put uart_8250_port into normal state
+ *	@p:	uart_8250_port port instance
+ *
+ *	The function is used to stop rs485 software emulating on the
+ *	&struct uart_8250_port* @p. The function is idempotent, so it is safe to
+ *	call it multiple times.
+ *
+ *	The function is supposed to be called from .rs485_config callback
+ *	or from any other callback protected with p->port.lock spinlock.
+ *
+ *	See also serial8250_em485_init()
+ */
+void serial8250_em485_destroy(struct uart_8250_port *p)
+{
+	if (p->em485 == NULL)
+		return;
+
+	del_timer(&p->em485->start_tx_timer);
+	del_timer(&p->em485->stop_tx_timer);
+
+	kfree(p->em485);
+	p->em485 = NULL;
+}
+EXPORT_SYMBOL_GPL(serial8250_em485_destroy);
+
 /*
  * These two wrappers ensure that enable_runtime_pm_tx() can be called more than
  * once and disable_runtime_pm_tx() will still disable RPM because the fifo is
@@ -1317,7 +1399,56 @@ static void serial8250_stop_rx(struct uart_port *port)
 	serial8250_rpm_put(up);
 }
 
-static inline void __stop_tx(struct uart_8250_port *p)
+static void __do_stop_tx_rs485(struct uart_8250_port *p)
+{
+	if (!p->em485)
+		return;
+
+	serial8250_em485_rts_after_send(p);
+	/*
+	 * Empty the RX FIFO, we are not interested in anything
+	 * received during the half-duplex transmission.
+	 */
+	if (!(p->port.rs485.flags & SER_RS485_RX_DURING_TX))
+		serial8250_clear_fifos(p);
+}
+
+static void serial8250_em485_handle_stop_tx(unsigned long arg)
+{
+	struct uart_8250_port *p = (struct uart_8250_port *)arg;
+	struct uart_8250_em485 *em485 = p->em485;
+	unsigned long flags;
+
+	spin_lock_irqsave(&p->port.lock, flags);
+	if (em485 &&
+	    em485->active_timer == &em485->stop_tx_timer) {
+		__do_stop_tx_rs485(p);
+		em485->active_timer = NULL;
+	}
+	spin_unlock_irqrestore(&p->port.lock, flags);
+}
+
+static void __stop_tx_rs485(struct uart_8250_port *p)
+{
+	struct uart_8250_em485 *em485 = p->em485;
+
+	if (!em485)
+		return;
+
+	/*
+	 * __do_stop_tx_rs485 is going to set RTS according to config
+	 * AND flush RX FIFO if required.
+	 */
+	if (p->port.rs485.delay_rts_after_send > 0) {
+		em485->active_timer = &em485->stop_tx_timer;
+		mod_timer(&em485->stop_tx_timer, jiffies +
+			p->port.rs485.delay_rts_after_send * HZ / 1000);
+	} else {
+		__do_stop_tx_rs485(p);
+	}
+}
+
+static inline void __do_stop_tx(struct uart_8250_port *p)
 {
 	if (p->ier & UART_IER_THRI) {
 		p->ier &= ~UART_IER_THRI;
@@ -1326,6 +1457,28 @@ static inline void __stop_tx(struct uart_8250_port *p)
 	}
 }
 
+static inline void __stop_tx(struct uart_8250_port *p)
+{
+	struct uart_8250_em485 *em485 = p->em485;
+
+	if (em485) {
+		unsigned char lsr = serial_in(p, UART_LSR);
+		/*
+		 * To provide required timeing and allow FIFO transfer,
+		 * __stop_tx_rs485 must be called only when both FIFO and
+		 * shift register are empty. It is for device driver to enable
+		 * interrupt on TEMT.
+		 */
+		if ((lsr & BOTH_EMPTY) != BOTH_EMPTY)
+			return;
+
+		del_timer(&em485->start_tx_timer);
+		em485->active_timer = NULL;
+	}
+	__do_stop_tx(p);
+	__stop_tx_rs485(p);
+}
+
 static void serial8250_stop_tx(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
@@ -1343,12 +1496,10 @@ static void serial8250_stop_tx(struct uart_port *port)
 	serial8250_rpm_put(up);
 }
 
-static void serial8250_start_tx(struct uart_port *port)
+static inline void __start_tx(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 
-	serial8250_rpm_get_tx(up);
-
 	if (up->dma && !up->dma->tx_dma(up))
 		return;
 
@@ -1374,6 +1525,70 @@ static void serial8250_start_tx(struct uart_port *port)
 	}
 }
 
+static inline void start_tx_rs485(struct uart_port *port)
+{
+	struct uart_8250_port *up = up_to_u8250p(port);
+	struct uart_8250_em485 *em485 = up->em485;
+	unsigned char mcr;
+
+	if (!(up->port.rs485.flags & SER_RS485_RX_DURING_TX))
+		serial8250_stop_rx(&up->port);
+
+	del_timer(&em485->stop_tx_timer);
+	em485->active_timer = NULL;
+
+	mcr = serial_in(up, UART_MCR);
+	if (!!(up->port.rs485.flags & SER_RS485_RTS_ON_SEND) !=
+	    !!(mcr & UART_MCR_RTS)) {
+		if (up->port.rs485.flags & SER_RS485_RTS_ON_SEND)
+			mcr |= UART_MCR_RTS;
+		else
+			mcr &= ~UART_MCR_RTS;
+		serial_out(up, UART_MCR, mcr);
+
+		if (up->port.rs485.delay_rts_before_send > 0) {
+			em485->active_timer = &em485->start_tx_timer;
+			mod_timer(&em485->start_tx_timer, jiffies +
+				up->port.rs485.delay_rts_before_send * HZ / 1000);
+			return;
+		}
+	}
+
+	__start_tx(port);
+}
+
+static void serial8250_em485_handle_start_tx(unsigned long arg)
+{
+	struct uart_8250_port *p = (struct uart_8250_port *)arg;
+	struct uart_8250_em485 *em485 = p->em485;
+	unsigned long flags;
+
+	spin_lock_irqsave(&p->port.lock, flags);
+	if (em485 &&
+	    em485->active_timer == &em485->start_tx_timer) {
+		__start_tx(&p->port);
+		em485->active_timer = NULL;
+	}
+	spin_unlock_irqrestore(&p->port.lock, flags);
+}
+
+static void serial8250_start_tx(struct uart_port *port)
+{
+	struct uart_8250_port *up = up_to_u8250p(port);
+	struct uart_8250_em485 *em485 = up->em485;
+
+	serial8250_rpm_get_tx(up);
+
+	if (em485 &&
+	    em485->active_timer == &em485->start_tx_timer)
+		return;
+
+	if (em485)
+		start_tx_rs485(port);
+	else
+		__start_tx(port);
+}
+
 static void serial8250_throttle(struct uart_port *port)
 {
 	port->throttle(port);
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index faa0e0370ce7..434879759725 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -76,6 +76,12 @@ struct uart_8250_ops {
 	void		(*release_irq)(struct uart_8250_port *);
 };
 
+struct uart_8250_em485 {
+	struct timer_list	start_tx_timer; /* "rs485 start tx" timer */
+	struct timer_list	stop_tx_timer;  /* "rs485 stop tx" timer */
+	struct timer_list	*active_timer;  /* pointer to active timer */
+};
+
 /*
  * This should be used by drivers which want to register
  * their own 8250 ports without registering their own
@@ -122,6 +128,8 @@ struct uart_8250_port {
 	/* 8250 specific callbacks */
 	int			(*dl_read)(struct uart_8250_port *);
 	void			(*dl_write)(struct uart_8250_port *, int);
+
+	struct uart_8250_em485 *em485;
 };
 
 static inline struct uart_8250_port *up_to_u8250p(struct uart_port *up)
-- 
cgit v1.2.3


From 1349ba02bf72caafa36a2d6878cdba340d65b2ac Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Thu, 21 Jan 2016 09:46:12 +0100
Subject: ARM: OMAP: serial: Rename DRIVER_NAME

DRIVER_NAME is too generic to be used in a driver-specific platform
data file. Use a name specific to the driver instead, to avoid
collisions.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Acked-by: Tony Lindgren <tony@atomide.com>
Cc: Jiri Slaby <jslaby@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/serial.c              | 2 +-
 drivers/tty/serial/omap-serial.c          | 2 +-
 include/linux/platform_data/serial-omap.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index f164c6b32ce2..8e072de89fed 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -252,7 +252,7 @@ void __init omap_serial_init_port(struct omap_board_data *bdata,
 		info = omap_serial_default_info;
 
 	oh = uart->oh;
-	name = DRIVER_NAME;
+	name = OMAP_SERIAL_DRIVER_NAME;
 
 	omap_up.dma_enabled = info->dma_enabled;
 	omap_up.uartclk = OMAP24XX_BASE_BAUD * 16;
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index b645f9228ed7..1401dc9cde35 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -1866,7 +1866,7 @@ static struct platform_driver serial_omap_driver = {
 	.probe          = serial_omap_probe,
 	.remove         = serial_omap_remove,
 	.driver		= {
-		.name	= DRIVER_NAME,
+		.name	= OMAP_SERIAL_DRIVER_NAME,
 		.pm	= &serial_omap_dev_pm_ops,
 		.of_match_table = of_match_ptr(omap_serial_of_match),
 	},
diff --git a/include/linux/platform_data/serial-omap.h b/include/linux/platform_data/serial-omap.h
index d09275f3cde3..2ba2c34ca3d3 100644
--- a/include/linux/platform_data/serial-omap.h
+++ b/include/linux/platform_data/serial-omap.h
@@ -21,7 +21,7 @@
 #include <linux/device.h>
 #include <linux/pm_qos.h>
 
-#define DRIVER_NAME	"omap_uart"
+#define OMAP_SERIAL_DRIVER_NAME	"omap_uart"
 
 /*
  * Use tty device name as ttyO, [O -> OMAP]
-- 
cgit v1.2.3


From 47b0eeb3dc8a01848ad62908000b1051d1833eaf Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Tue, 2 Feb 2016 17:24:56 -0800
Subject: clk: Deprecate CLK_IS_ROOT

We don't use CLK_IS_ROOT but in a few places in the common clk
framework core. Let's replace those checks with a check for the
number of parents a clk has instead of the flag, freeing up one
flag for something else. We don't remove the flag yet so that
things keep building, but we'll remove it once all drivers have
removed their flag usage.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk.c            | 6 +++---
 include/linux/clk-provider.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index bb01ed6cc63e..cd8382c83f48 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -385,7 +385,7 @@ static unsigned long clk_core_get_rate_nolock(struct clk_core *core)
 
 	ret = core->rate;
 
-	if (core->flags & CLK_IS_ROOT)
+	if (!core->num_parents)
 		goto out;
 
 	if (!core->parent)
@@ -2351,7 +2351,7 @@ static int __clk_core_init(struct clk_core *core)
 	/*
 	 * Populate core->parent if parent has already been __clk_init'd.  If
 	 * parent has not yet been __clk_init'd then place clk in the orphan
-	 * list.  If clk has set the CLK_IS_ROOT flag then place it in the root
+	 * list.  If clk doesn't have any parents then place it in the root
 	 * clk list.
 	 *
 	 * Every time a new clk is clk_init'd then we walk the list of orphan
@@ -2362,7 +2362,7 @@ static int __clk_core_init(struct clk_core *core)
 		hlist_add_head(&core->child_node,
 				&core->parent->children);
 		core->orphan = core->parent->orphan;
-	} else if (core->flags & CLK_IS_ROOT) {
+	} else if (!core->num_parents) {
 		hlist_add_head(&core->child_node, &clk_root_list);
 		core->orphan = false;
 	} else {
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index fabe5bedbba6..9c3a18c1a984 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -25,7 +25,7 @@
 #define CLK_SET_PARENT_GATE	BIT(1) /* must be gated across re-parent */
 #define CLK_SET_RATE_PARENT	BIT(2) /* propagate rate change up one level */
 #define CLK_IGNORE_UNUSED	BIT(3) /* do not gate even if unused */
-#define CLK_IS_ROOT		BIT(4) /* root clk, has no parent */
+#define CLK_IS_ROOT		BIT(4) /* Deprecated: Don't use */
 #define CLK_IS_BASIC		BIT(5) /* Basic clk, can't do a to_clk_foo() */
 #define CLK_GET_RATE_NOCACHE	BIT(6) /* do not use the cached clk rate */
 #define CLK_SET_RATE_NO_REPARENT BIT(7) /* don't re-parent on rate change */
-- 
cgit v1.2.3


From 1e42754eacdb1c8632c3af47263c56967e04cbd1 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Sat, 6 Feb 2016 23:34:55 -0800
Subject: clk: provider: Remove of_gpio_{gate,mux}_clk_setup() prototypes

These functions either never existed or were only used in
OF_CLK_DECLARE() macros. Remove the dead prototypes.

Cc: Jyri Sarha <jsarha@ti.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 include/linux/clk-provider.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 9c3a18c1a984..fce7f027f8a7 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -626,8 +626,6 @@ struct clk *clk_register_gpio_gate(struct device *dev, const char *name,
 		const char *parent_name, unsigned gpio, bool active_low,
 		unsigned long flags);
 
-void of_gpio_clk_gate_setup(struct device_node *node);
-
 /**
  * struct clk_gpio_mux - gpio controlled clock multiplexer
  *
@@ -643,8 +641,6 @@ struct clk *clk_register_gpio_mux(struct device *dev, const char *name,
 		const char * const *parent_names, u8 num_parents, unsigned gpio,
 		bool active_low, unsigned long flags);
 
-void of_gpio_mux_clk_setup(struct device_node *node);
-
 /**
  * clk_register - allocate a new clock, register it and return an opaque cookie
  * @dev: device that is registering this clock
-- 
cgit v1.2.3


From ddf1af6fa00e772fdb67a7d22cb83fac2b8968a8 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Tue, 2 Feb 2016 10:33:06 -0800
Subject: tcp: new delivery accounting

This patch changes the accounting of how many packets are
newly acked or sacked when the sender receives an ACK.

The current approach basically computes

   newly_acked_sacked = (prior_packets - prior_sacked) -
                        (tp->packets_out - tp->sacked_out)

   where prior_packets and prior_sacked out are snapshot
   at the beginning of the ACK processing.

The new approach tracks the delivery information via a new
TCP state variable "delivered" which monotically increases
as new packets are delivered in order or out-of-order.

The reason for this change is that the current approach is
brittle that produces negative or inaccurate estimate.

   1) For non-SACK connections, an ACK that advances the SND.UNA
   could reset the DUPACK counters (tp->sacked_out) in
   tcp_process_loss() or tcp_fastretrans_alert(). This inflates
   the inflight suddenly and causes under-estimate or even
   negative estimate. Here is a real example:

                   before   after (processing ACK)
   packets_out     75       73
   sacked_out      23        0
   ca state        Loss     Open

   The old approach computes (75-23) - (73 - 0) = -21 delivered
   while the new approach computes 1 delivered since it
   considers the 2nd-24th packets are delivered OOO.

   2) MSS change would re-count packets_out and sacked_out so
   the estimate is in-accurate and can even become negative.
   E.g., the inflight is doubled when MSS is halved.

   3) Spurious retransmission signaled by DSACK is not accounted

The new approach is simpler and more robust. For SACK connections,
tp->delivered increments as packets are being acked or sacked in
SACK and ACK processing.

For non-sack connections, it's done in tcp_remove_reno_sacks() and
tcp_add_reno_sack(). When an ACK advances the SND.UNA, tp->delivered
is incremented by the number of packets ACKed (less the current
number of DUPACKs received plus one packet hole).  Upon receiving
a DUPACK, tp->delivered is incremented assuming one out-of-order
packet is delivered.

Upon receiving a DSACK, tp->delivered is incremtened assuming one
retransmission is delivered in tcp_sacktag_write_queue().

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h  |  1 +
 net/ipv4/tcp_input.c | 21 +++++++++++++++------
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index b386361ba3e8..d909feeeaea2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -256,6 +256,7 @@ struct tcp_sock {
 	u32	prr_delivered;	/* Number of newly delivered packets to
 				 * receiver in Recovery. */
 	u32	prr_out;	/* Total number of pkts sent during Recovery. */
+	u32	delivered;	/* Total data packets delivered incl. rexmits */
 
  	u32	rcv_wnd;	/* Current receiver window		*/
 	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index dc810df53e90..2d690b3f0a7b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1214,6 +1214,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
 		sacked |= TCPCB_SACKED_ACKED;
 		state->flag |= FLAG_DATA_SACKED;
 		tp->sacked_out += pcount;
+		tp->delivered += pcount;  /* Out-of-order packets delivered */
 
 		fack_count += pcount;
 
@@ -1825,8 +1826,12 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
 static void tcp_add_reno_sack(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	u32 prior_sacked = tp->sacked_out;
+
 	tp->sacked_out++;
 	tcp_check_reno_reordering(sk, 0);
+	if (tp->sacked_out > prior_sacked)
+		tp->delivered++; /* Some out-of-order packet is delivered */
 	tcp_verify_left_out(tp);
 }
 
@@ -1838,6 +1843,7 @@ static void tcp_remove_reno_sacks(struct sock *sk, int acked)
 
 	if (acked > 0) {
 		/* One ACK acked hole. The rest eat duplicate ACKs. */
+		tp->delivered += max_t(int, acked - tp->sacked_out, 1);
 		if (acked - 1 >= tp->sacked_out)
 			tp->sacked_out = 0;
 		else
@@ -3156,10 +3162,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 				flag |= FLAG_ORIG_SACK_ACKED;
 		}
 
-		if (sacked & TCPCB_SACKED_ACKED)
+		if (sacked & TCPCB_SACKED_ACKED) {
 			tp->sacked_out -= acked_pcount;
-		else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb))
-			tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
+		} else if (tcp_is_sack(tp)) {
+			tp->delivered += acked_pcount;
+			if (!tcp_skb_spurious_retrans(tp, skb))
+				tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
+		}
 		if (sacked & TCPCB_LOST)
 			tp->lost_out -= acked_pcount;
 
@@ -3541,9 +3550,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	bool is_dupack = false;
 	u32 prior_fackets;
 	int prior_packets = tp->packets_out;
-	const int prior_unsacked = tp->packets_out - tp->sacked_out;
+	u32 prior_delivered = tp->delivered;
 	int acked = 0; /* Number of packets newly acked */
-	int acked_sacked; /* Number of packets newly acked or sacked */
+	u32 acked_sacked; /* Number of packets newly acked or sacked */
 	int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
 
 	sack_state.first_sackt.v64 = 0;
@@ -3645,7 +3654,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (tp->tlp_high_seq)
 		tcp_process_tlp_ack(sk, ack, flag);
 
-	acked_sacked = prior_unsacked - (tp->packets_out - tp->sacked_out);
+	acked_sacked = tp->delivered - prior_delivered;
 	/* Advance cwnd if state allows */
 	if (tcp_in_cwnd_reduction(sk)) {
 		/* Reduce cwnd if state mandates */
-- 
cgit v1.2.3


From 8b37524962b9c54423374717786198f5c0820a28 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 8 Feb 2016 11:21:50 +1100
Subject: quota: add new quotactl Q_XGETNEXTQUOTA

Q_XGETNEXTQUOTA is exactly like Q_XGETQUOTA, except that it
will return quota information for the id equal to or greater
than the id requested.  In other words, if the requested id has
no quota, the command will return quota information for the
next higher id which does have a quota set.  If no higher id
has an active quota, -ESRCH is returned.

This allows filesystems to do efficient iteration in kernelspace,
much like extN filesystems do in userspace when asked to report
all active quotas.

The patch adds a d_id field to struct qc_dqblk so that we can
pass back the id of the quota which was found, and return it
to userspace.

Today, filesystems such as XFS require getpwent-style iterations,
and for systems which have i.e. LDAP backends, this can be very
slow, or even impossible if iteration is not allowed in the
configuration.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/quota/quota.c               | 31 +++++++++++++++++++++++++++++++
 include/linux/quota.h          |  2 ++
 include/uapi/linux/dqblk_xfs.h |  1 +
 3 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index ea6667083b3e..0a6dd71ea309 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -625,6 +625,34 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
 	return ret;
 }
 
+/*
+ * Return quota for next active quota >= this id, if any exists,
+ * otherwise return -ESRCH via ->get_nextdqblk.
+ */
+static int quota_getnextxquota(struct super_block *sb, int type, qid_t id,
+			    void __user *addr)
+{
+	struct fs_disk_quota fdq;
+	struct qc_dqblk qdq;
+	struct kqid qid;
+	qid_t id_out;
+	int ret;
+
+	if (!sb->s_qcop->get_nextdqblk)
+		return -ENOSYS;
+	qid = make_kqid(current_user_ns(), type, id);
+	if (!qid_valid(qid))
+		return -EINVAL;
+	ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq);
+	if (ret)
+		return ret;
+	id_out = from_kqid(current_user_ns(), qid);
+	copy_to_xfs_dqblk(&fdq, &qdq, type, id_out);
+	if (copy_to_user(addr, &fdq, sizeof(fdq)))
+		return -EFAULT;
+	return ret;
+}
+
 static int quota_rmxquota(struct super_block *sb, void __user *addr)
 {
 	__u32 flags;
@@ -690,6 +718,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 		return quota_setxquota(sb, type, id, addr);
 	case Q_XGETQUOTA:
 		return quota_getxquota(sb, type, id, addr);
+	case Q_XGETNEXTQUOTA:
+		return quota_getnextxquota(sb, type, id, addr);
 	case Q_XQUOTASYNC:
 		if (sb->s_flags & MS_RDONLY)
 			return -EROFS;
@@ -712,6 +742,7 @@ static int quotactl_cmd_write(int cmd)
 	case Q_XGETQSTAT:
 	case Q_XGETQSTATV:
 	case Q_XGETQUOTA:
+	case Q_XGETNEXTQUOTA:
 	case Q_XQUOTASYNC:
 		return 0;
 	}
diff --git a/include/linux/quota.h b/include/linux/quota.h
index b2505acfd3c0..fba92f5c1a63 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -425,6 +425,8 @@ struct quotactl_ops {
 	int (*quota_sync)(struct super_block *, int);
 	int (*set_info)(struct super_block *, int, struct qc_info *);
 	int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
+	int (*get_nextdqblk)(struct super_block *, struct kqid *,
+			     struct qc_dqblk *);
 	int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
 	int (*get_state)(struct super_block *, struct qc_state *);
 	int (*rm_xquota)(struct super_block *, unsigned int);
diff --git a/include/uapi/linux/dqblk_xfs.h b/include/uapi/linux/dqblk_xfs.h
index dcd75cc26196..11b3b31faf14 100644
--- a/include/uapi/linux/dqblk_xfs.h
+++ b/include/uapi/linux/dqblk_xfs.h
@@ -39,6 +39,7 @@
 #define Q_XQUOTARM	XQM_CMD(6)	/* free disk space used by dquots */
 #define Q_XQUOTASYNC	XQM_CMD(7)	/* delalloc flush, updates dquots */
 #define Q_XGETQSTATV	XQM_CMD(8)	/* newer version of get quota */
+#define Q_XGETNEXTQUOTA	XQM_CMD(9)	/* get disk limits and usage >= ID */
 
 /*
  * fs_disk_quota structure:
-- 
cgit v1.2.3


From b55b54b5db330e36cd465adb3fbe274ffb1061d3 Mon Sep 17 00:00:00 2001
From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Date: Thu, 21 Jan 2016 10:49:21 -0200
Subject: staging/android: remove struct sync_pt

struct sync_pt was just wrapping around struct fence and creating an
extra abstraction layer. The only two members of struct sync_pt, child_list
and active_list, were moved to struct fence in an earlier commit. After
removing those two members struct sync_pt is nothing more than struct
fence, so remove it all and use struct fence directly.

Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/android/sw_sync.c    |  23 ++++----
 drivers/staging/android/sw_sync.h    |   8 +--
 drivers/staging/android/sync.c       | 101 +++++++++++++++--------------------
 drivers/staging/android/sync.h       |  63 +++++++---------------
 drivers/staging/android/sync_debug.c |  49 ++++++++---------
 drivers/staging/android/trace/sync.h |  14 ++---
 include/linux/fence.h                |   2 +
 7 files changed, 113 insertions(+), 147 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/android/sw_sync.c b/drivers/staging/android/sw_sync.c
index f491dbce5696..3bee959b41d8 100644
--- a/drivers/staging/android/sw_sync.c
+++ b/drivers/staging/android/sw_sync.c
@@ -25,7 +25,7 @@
 
 #include "sw_sync.h"
 
-struct sync_pt *sw_sync_pt_create(struct sw_sync_timeline *obj, u32 value)
+struct fence *sw_sync_pt_create(struct sw_sync_timeline *obj, u32 value)
 {
 	struct sw_sync_pt *pt;
 
@@ -34,23 +34,23 @@ struct sync_pt *sw_sync_pt_create(struct sw_sync_timeline *obj, u32 value)
 
 	pt->value = value;
 
-	return (struct sync_pt *)pt;
+	return (struct fence *)pt;
 }
 EXPORT_SYMBOL(sw_sync_pt_create);
 
-static int sw_sync_pt_has_signaled(struct sync_pt *sync_pt)
+static int sw_sync_fence_has_signaled(struct fence *fence)
 {
-	struct sw_sync_pt *pt = (struct sw_sync_pt *)sync_pt;
+	struct sw_sync_pt *pt = (struct sw_sync_pt *)fence;
 	struct sw_sync_timeline *obj =
-		(struct sw_sync_timeline *)sync_pt_parent(sync_pt);
+		(struct sw_sync_timeline *)fence_parent(fence);
 
 	return (pt->value > obj->value) ? 0 : 1;
 }
 
-static int sw_sync_fill_driver_data(struct sync_pt *sync_pt,
+static int sw_sync_fill_driver_data(struct fence *fence,
 				    void *data, int size)
 {
-	struct sw_sync_pt *pt = (struct sw_sync_pt *)sync_pt;
+	struct sw_sync_pt *pt = (struct sw_sync_pt *)fence;
 
 	if (size < sizeof(pt->value))
 		return -ENOMEM;
@@ -68,20 +68,19 @@ static void sw_sync_timeline_value_str(struct sync_timeline *sync_timeline,
 	snprintf(str, size, "%d", timeline->value);
 }
 
-static void sw_sync_pt_value_str(struct sync_pt *sync_pt,
-				 char *str, int size)
+static void sw_sync_fence_value_str(struct fence *fence, char *str, int size)
 {
-	struct sw_sync_pt *pt = (struct sw_sync_pt *)sync_pt;
+	struct sw_sync_pt *pt = (struct sw_sync_pt *)fence;
 
 	snprintf(str, size, "%d", pt->value);
 }
 
 static struct sync_timeline_ops sw_sync_timeline_ops = {
 	.driver_name = "sw_sync",
-	.has_signaled = sw_sync_pt_has_signaled,
+	.has_signaled = sw_sync_fence_has_signaled,
 	.fill_driver_data = sw_sync_fill_driver_data,
 	.timeline_value_str = sw_sync_timeline_value_str,
-	.pt_value_str = sw_sync_pt_value_str,
+	.fence_value_str = sw_sync_fence_value_str,
 };
 
 struct sw_sync_timeline *sw_sync_timeline_create(const char *name)
diff --git a/drivers/staging/android/sw_sync.h b/drivers/staging/android/sw_sync.h
index c87ae9ebf267..e18667bfb0ca 100644
--- a/drivers/staging/android/sw_sync.h
+++ b/drivers/staging/android/sw_sync.h
@@ -29,7 +29,7 @@ struct sw_sync_timeline {
 };
 
 struct sw_sync_pt {
-	struct sync_pt		pt;
+	struct fence		pt;
 
 	u32			value;
 };
@@ -38,7 +38,7 @@ struct sw_sync_pt {
 struct sw_sync_timeline *sw_sync_timeline_create(const char *name);
 void sw_sync_timeline_inc(struct sw_sync_timeline *obj, u32 inc);
 
-struct sync_pt *sw_sync_pt_create(struct sw_sync_timeline *obj, u32 value);
+struct fence *sw_sync_pt_create(struct sw_sync_timeline *obj, u32 value);
 #else
 static inline struct sw_sync_timeline *sw_sync_timeline_create(const char *name)
 {
@@ -49,8 +49,8 @@ static inline void sw_sync_timeline_inc(struct sw_sync_timeline *obj, u32 inc)
 {
 }
 
-static inline struct sync_pt *sw_sync_pt_create(struct sw_sync_timeline *obj,
-						u32 value)
+static inline struct fence *sw_sync_pt_create(struct sw_sync_timeline *obj,
+					      u32 value)
 {
 	return NULL;
 }
diff --git a/drivers/staging/android/sync.c b/drivers/staging/android/sync.c
index 22b1d9b03fd6..3c2c8d07df62 100644
--- a/drivers/staging/android/sync.c
+++ b/drivers/staging/android/sync.c
@@ -102,51 +102,45 @@ void sync_timeline_signal(struct sync_timeline *obj)
 {
 	unsigned long flags;
 	LIST_HEAD(signaled_pts);
-	struct sync_pt *pt, *next;
+	struct fence *fence, *next;
 
 	trace_sync_timeline(obj);
 
 	spin_lock_irqsave(&obj->child_list_lock, flags);
 
-	list_for_each_entry_safe(pt, next, &obj->active_list_head,
+	list_for_each_entry_safe(fence, next, &obj->active_list_head,
 				 active_list) {
-		if (fence_is_signaled_locked(&pt->base))
-			list_del_init(&pt->active_list);
+		if (fence_is_signaled_locked(fence))
+			list_del_init(&fence->active_list);
 	}
 
 	spin_unlock_irqrestore(&obj->child_list_lock, flags);
 }
 EXPORT_SYMBOL(sync_timeline_signal);
 
-struct sync_pt *sync_pt_create(struct sync_timeline *obj, int size)
+struct fence *sync_pt_create(struct sync_timeline *obj, int size)
 {
 	unsigned long flags;
-	struct sync_pt *pt;
+	struct fence *fence;
 
-	if (size < sizeof(struct sync_pt))
+	if (size < sizeof(*fence))
 		return NULL;
 
-	pt = kzalloc(size, GFP_KERNEL);
-	if (!pt)
+	fence = kzalloc(size, GFP_KERNEL);
+	if (!fence)
 		return NULL;
 
 	spin_lock_irqsave(&obj->child_list_lock, flags);
 	sync_timeline_get(obj);
-	fence_init(&pt->base, &android_fence_ops, &obj->child_list_lock,
+	fence_init(fence, &android_fence_ops, &obj->child_list_lock,
 		   obj->context, ++obj->value);
-	list_add_tail(&pt->child_list, &obj->child_list_head);
-	INIT_LIST_HEAD(&pt->active_list);
+	list_add_tail(&fence->child_list, &obj->child_list_head);
+	INIT_LIST_HEAD(&fence->active_list);
 	spin_unlock_irqrestore(&obj->child_list_lock, flags);
-	return pt;
+	return fence;
 }
 EXPORT_SYMBOL(sync_pt_create);
 
-void sync_pt_free(struct sync_pt *pt)
-{
-	fence_put(&pt->base);
-}
-EXPORT_SYMBOL(sync_pt_free);
-
 static struct sync_file *sync_file_alloc(int size, const char *name)
 {
 	struct sync_file *sync_file;
@@ -184,8 +178,8 @@ static void fence_check_cb_func(struct fence *f, struct fence_cb *cb)
 		wake_up_all(&sync_file->wq);
 }
 
-/* TODO: implement a create which takes more that one sync_pt */
-struct sync_file *sync_file_create_dma(const char *name, struct fence *pt)
+/* TODO: implement a create which takes more that one fence */
+struct sync_file *sync_file_create_dma(const char *name, struct fence *fence)
 {
 	struct sync_file *sync_file;
 
@@ -197,9 +191,10 @@ struct sync_file *sync_file_create_dma(const char *name, struct fence *pt)
 	sync_file->num_fences = 1;
 	atomic_set(&sync_file->status, 1);
 
-	sync_file->cbs[0].fence = pt;
+	sync_file->cbs[0].fence = fence;
 	sync_file->cbs[0].sync_file = sync_file;
-	if (fence_add_callback(pt, &sync_file->cbs[0].cb, fence_check_cb_func))
+	if (fence_add_callback(fence, &sync_file->cbs[0].cb,
+			       fence_check_cb_func))
 		atomic_dec(&sync_file->status);
 
 	sync_file_debug_add(sync_file);
@@ -208,9 +203,9 @@ struct sync_file *sync_file_create_dma(const char *name, struct fence *pt)
 }
 EXPORT_SYMBOL(sync_file_create_dma);
 
-struct sync_file *sync_file_create(const char *name, struct sync_pt *pt)
+struct sync_file *sync_file_create(const char *name, struct fence *fence)
 {
-	return sync_file_create_dma(name, &pt->base);
+	return sync_file_create_dma(name, fence);
 }
 EXPORT_SYMBOL(sync_file_create);
 
@@ -245,14 +240,14 @@ void sync_file_install(struct sync_file *sync_file, int fd)
 EXPORT_SYMBOL(sync_file_install);
 
 static void sync_file_add_pt(struct sync_file *sync_file, int *i,
-			     struct fence *pt)
+			     struct fence *fence)
 {
-	sync_file->cbs[*i].fence = pt;
+	sync_file->cbs[*i].fence = fence;
 	sync_file->cbs[*i].sync_file = sync_file;
 
-	if (!fence_add_callback(pt, &sync_file->cbs[*i].cb,
+	if (!fence_add_callback(fence, &sync_file->cbs[*i].cb,
 				fence_check_cb_func)) {
-		fence_get(pt);
+		fence_get(fence);
 		(*i)++;
 	}
 }
@@ -328,7 +323,7 @@ int sync_file_wait(struct sync_file *sync_file, long timeout)
 
 	trace_sync_wait(sync_file, 1);
 	for (i = 0; i < sync_file->num_fences; ++i)
-		trace_sync_pt(sync_file->cbs[i].fence);
+		trace_fence(sync_file->cbs[i].fence);
 	ret = wait_event_interruptible_timeout(sync_file->wq,
 					       atomic_read(&sync_file->status) <= 0,
 					       timeout);
@@ -356,43 +351,39 @@ EXPORT_SYMBOL(sync_file_wait);
 
 static const char *android_fence_get_driver_name(struct fence *fence)
 {
-	struct sync_pt *pt = container_of(fence, struct sync_pt, base);
-	struct sync_timeline *parent = sync_pt_parent(pt);
+	struct sync_timeline *parent = fence_parent(fence);
 
 	return parent->ops->driver_name;
 }
 
 static const char *android_fence_get_timeline_name(struct fence *fence)
 {
-	struct sync_pt *pt = container_of(fence, struct sync_pt, base);
-	struct sync_timeline *parent = sync_pt_parent(pt);
+	struct sync_timeline *parent = fence_parent(fence);
 
 	return parent->name;
 }
 
 static void android_fence_release(struct fence *fence)
 {
-	struct sync_pt *pt = container_of(fence, struct sync_pt, base);
-	struct sync_timeline *parent = sync_pt_parent(pt);
+	struct sync_timeline *parent = fence_parent(fence);
 	unsigned long flags;
 
 	spin_lock_irqsave(fence->lock, flags);
-	list_del(&pt->child_list);
-	if (WARN_ON_ONCE(!list_empty(&pt->active_list)))
-		list_del(&pt->active_list);
+	list_del(&fence->child_list);
+	if (WARN_ON_ONCE(!list_empty(&fence->active_list)))
+		list_del(&fence->active_list);
 	spin_unlock_irqrestore(fence->lock, flags);
 
 	sync_timeline_put(parent);
-	fence_free(&pt->base);
+	fence_free(fence);
 }
 
 static bool android_fence_signaled(struct fence *fence)
 {
-	struct sync_pt *pt = container_of(fence, struct sync_pt, base);
-	struct sync_timeline *parent = sync_pt_parent(pt);
+	struct sync_timeline *parent = fence_parent(fence);
 	int ret;
 
-	ret = parent->ops->has_signaled(pt);
+	ret = parent->ops->has_signaled(fence);
 	if (ret < 0)
 		fence->status = ret;
 	return ret;
@@ -400,46 +391,42 @@ static bool android_fence_signaled(struct fence *fence)
 
 static bool android_fence_enable_signaling(struct fence *fence)
 {
-	struct sync_pt *pt = container_of(fence, struct sync_pt, base);
-	struct sync_timeline *parent = sync_pt_parent(pt);
+	struct sync_timeline *parent = fence_parent(fence);
 
 	if (android_fence_signaled(fence))
 		return false;
 
-	list_add_tail(&pt->active_list, &parent->active_list_head);
+	list_add_tail(&fence->active_list, &parent->active_list_head);
 	return true;
 }
 
 static int android_fence_fill_driver_data(struct fence *fence,
 					  void *data, int size)
 {
-	struct sync_pt *pt = container_of(fence, struct sync_pt, base);
-	struct sync_timeline *parent = sync_pt_parent(pt);
+	struct sync_timeline *parent = fence_parent(fence);
 
 	if (!parent->ops->fill_driver_data)
 		return 0;
-	return parent->ops->fill_driver_data(pt, data, size);
+	return parent->ops->fill_driver_data(fence, data, size);
 }
 
 static void android_fence_value_str(struct fence *fence,
 				    char *str, int size)
 {
-	struct sync_pt *pt = container_of(fence, struct sync_pt, base);
-	struct sync_timeline *parent = sync_pt_parent(pt);
+	struct sync_timeline *parent = fence_parent(fence);
 
-	if (!parent->ops->pt_value_str) {
+	if (!parent->ops->fence_value_str) {
 		if (size)
 			*str = 0;
 		return;
 	}
-	parent->ops->pt_value_str(pt, str, size);
+	parent->ops->fence_value_str(fence, str, size);
 }
 
 static void android_fence_timeline_value_str(struct fence *fence,
 					     char *str, int size)
 {
-	struct sync_pt *pt = container_of(fence, struct sync_pt, base);
-	struct sync_timeline *parent = sync_pt_parent(pt);
+	struct sync_timeline *parent = fence_parent(fence);
 
 	if (!parent->ops->timeline_value_str) {
 		if (size)
@@ -624,9 +611,9 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
 	len = sizeof(struct sync_file_info_data);
 
 	for (i = 0; i < sync_file->num_fences; ++i) {
-		struct fence *pt = sync_file->cbs[i].fence;
+		struct fence *fence = sync_file->cbs[i].fence;
 
-		ret = sync_fill_pt_info(pt, (u8 *)data + len, size - len);
+		ret = sync_fill_pt_info(fence, (u8 *)data + len, size - len);
 
 		if (ret < 0)
 			goto out;
diff --git a/drivers/staging/android/sync.h b/drivers/staging/android/sync.h
index a18d1e3365fb..8cdac1a12e4f 100644
--- a/drivers/staging/android/sync.h
+++ b/drivers/staging/android/sync.h
@@ -24,7 +24,6 @@
 #include "uapi/sync.h"
 
 struct sync_timeline;
-struct sync_pt;
 struct sync_file;
 
 /**
@@ -39,23 +38,23 @@ struct sync_file;
  *			  as specified by size.  This information is returned
  *			  to userspace by SYNC_IOC_FENCE_INFO.
  * @timeline_value_str: fill str with the value of the sync_timeline's counter
- * @pt_value_str:	fill str with the value of the sync_pt
+ * @fence_value_str:	fill str with the value of the fence
  */
 struct sync_timeline_ops {
 	const char *driver_name;
 
 	/* required */
-	int (*has_signaled)(struct sync_pt *pt);
+	int (*has_signaled)(struct fence *fence);
 
 	/* optional */
-	int (*fill_driver_data)(struct sync_pt *syncpt, void *data, int size);
+	int (*fill_driver_data)(struct fence *fence, void *data, int size);
 
 	/* optional */
 	void (*timeline_value_str)(struct sync_timeline *timeline, char *str,
 				   int size);
 
 	/* optional */
-	void (*pt_value_str)(struct sync_pt *pt, char *str, int size);
+	void (*fence_value_str)(struct fence *fence, char *str, int size);
 };
 
 /**
@@ -66,7 +65,7 @@ struct sync_timeline_ops {
  * @destroyed:		set when sync_timeline is destroyed
  * @child_list_head:	list of children sync_pts for this sync_timeline
  * @child_list_lock:	lock protecting @child_list_head, destroyed, and
- *			  sync_pt.status
+ *			fence.status
  * @active_list_head:	list of active (unsignaled/errored) sync_pts
  * @sync_timeline_list:	membership in global sync_timeline_list
  */
@@ -89,22 +88,9 @@ struct sync_timeline {
 #endif
 };
 
-/**
- * struct sync_pt - sync point
- * @base:		base fence class
- * @child_list:		membership in sync_timeline.child_list_head
- * @active_list:	membership in sync_timeline.active_list_head
- */
-struct sync_pt {
-	struct fence base;
-
-	struct list_head	child_list;
-	struct list_head	active_list;
-};
-
-static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
+static inline struct sync_timeline *fence_parent(struct fence *fence)
 {
-	return container_of(pt->base.lock, struct sync_timeline,
+	return container_of(fence->lock, struct sync_timeline,
 			    child_list_lock);
 }
 
@@ -164,7 +150,7 @@ struct sync_timeline *sync_timeline_create(const struct sync_timeline_ops *ops,
  *
  * A sync implementation should call this when the @obj is going away
  * (i.e. module unload.)  @obj won't actually be freed until all its children
- * sync_pts are freed.
+ * fences are freed.
  */
 void sync_timeline_destroy(struct sync_timeline *obj);
 
@@ -172,41 +158,32 @@ void sync_timeline_destroy(struct sync_timeline *obj);
  * sync_timeline_signal() - signal a status change on a sync_timeline
  * @obj:	sync_timeline to signal
  *
- * A sync implementation should call this any time one of it's sync_pts
+ * A sync implementation should call this any time one of it's fences
  * has signaled or has an error condition.
  */
 void sync_timeline_signal(struct sync_timeline *obj);
 
 /**
  * sync_pt_create() - creates a sync pt
- * @parent:	sync_pt's parent sync_timeline
+ * @parent:	fence's parent sync_timeline
  * @size:	size to allocate for this pt
  *
- * Creates a new sync_pt as a child of @parent.  @size bytes will be
+ * Creates a new fence as a child of @parent.  @size bytes will be
  * allocated allowing for implementation specific data to be kept after
- * the generic sync_timeline struct. Returns the sync_pt object or
+ * the generic sync_timeline struct. Returns the fence object or
  * NULL in case of error.
  */
-struct sync_pt *sync_pt_create(struct sync_timeline *parent, int size);
+struct fence *sync_pt_create(struct sync_timeline *parent, int size);
 
 /**
- * sync_pt_free() - frees a sync pt
- * @pt:		sync_pt to free
+ * sync_fence_create() - creates a sync fence
+ * @name:	name of fence to create
+ * @fence:	fence to add to the sync_fence
  *
- * This should only be called on sync_pts which have been created but
- * not added to a fence.
- */
-void sync_pt_free(struct sync_pt *pt);
-
-/**
- * sync_file_create() - creates a sync file
- * @name:	name of file to create
- * @pt:		sync_pt to add to the file
- *
- * Creates a sync_file containg @pt. Once this is called, the sync_file takes
- * ownership of @pt.
+ * Creates a sync_file containg @fence. Once this is called, the sync_file
+ * takes ownership of @fence.
  */
-struct sync_file *sync_file_create(const char *name, struct sync_pt *pt);
+struct sync_file *sync_file_create(const char *name, struct fence *fence);
 
 /**
  * sync_file_create_dma() - creates a sync file from dma-fence
@@ -228,7 +205,7 @@ struct sync_file *sync_file_create_dma(const char *name, struct fence *pt);
  * @a:		sync_file a
  * @b:		sync_file b
  *
- * Creates a new sync_file which contains copies of all the sync_pts in both
+ * Creates a new sync_file which contains copies of all the fences in both
  * @a and @b.  @a and @b remain valid, independent sync_file. Returns the
  * new merged sync_file or NULL in case of error.
  */
diff --git a/drivers/staging/android/sync_debug.c b/drivers/staging/android/sync_debug.c
index 85ae98a63a1e..fd13f1e885e5 100644
--- a/drivers/staging/android/sync_debug.c
+++ b/drivers/staging/android/sync_debug.c
@@ -85,39 +85,40 @@ static const char *sync_status_str(int status)
 	return "error";
 }
 
-static void sync_print_pt(struct seq_file *s, struct fence *pt, bool fence)
+static void sync_print_fence(struct seq_file *s, struct fence *fence, bool show)
 {
 	int status = 1;
+	struct sync_timeline *parent = fence_parent(fence);
 
-	if (fence_is_signaled_locked(pt))
-		status = pt->status;
+	if (fence_is_signaled_locked(fence))
+		status = fence->status;
 
-	seq_printf(s, "  %s%spt %s",
-		   fence && pt->ops->get_timeline_name ?
-		   pt->ops->get_timeline_name(pt) : "",
-		   fence ? "_" : "",
+	seq_printf(s, "  %s%sfence %s",
+		   show ? parent->name : "",
+		   show ? "_" : "",
 		   sync_status_str(status));
 
 	if (status <= 0) {
 		struct timespec64 ts64 =
-			ktime_to_timespec64(pt->timestamp);
+			ktime_to_timespec64(fence->timestamp);
 
 		seq_printf(s, "@%lld.%09ld", (s64)ts64.tv_sec, ts64.tv_nsec);
 	}
 
-	if ((!fence || pt->ops->timeline_value_str) &&
-	    pt->ops->fence_value_str) {
+	if ((!fence || fence->ops->timeline_value_str) &&
+		fence->ops->fence_value_str) {
 		char value[64];
 		bool success;
 
-		pt->ops->fence_value_str(pt, value, sizeof(value));
+		fence->ops->fence_value_str(fence, value, sizeof(value));
 		success = strlen(value);
 
 		if (success)
 			seq_printf(s, ": %s", value);
 
 		if (success && fence) {
-			pt->ops->timeline_value_str(pt, value, sizeof(value));
+			fence->ops->timeline_value_str(fence, value,
+						       sizeof(value));
 
 			if (strlen(value))
 				seq_printf(s, " / %s", value);
@@ -145,25 +146,25 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
 
 	spin_lock_irqsave(&obj->child_list_lock, flags);
 	list_for_each(pos, &obj->child_list_head) {
-		struct sync_pt *pt =
-			container_of(pos, struct sync_pt, child_list);
-		sync_print_pt(s, &pt->base, false);
+		struct fence *fence =
+			container_of(pos, struct fence, child_list);
+		sync_print_fence(s, fence, false);
 	}
 	spin_unlock_irqrestore(&obj->child_list_lock, flags);
 }
 
 static void sync_print_sync_file(struct seq_file *s,
 				  struct sync_file *sync_file)
- {
+{
 	int i;
 
 	seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name,
 		   sync_status_str(atomic_read(&sync_file->status)));
 
 	for (i = 0; i < sync_file->num_fences; ++i)
-		sync_print_pt(s, sync_file->cbs[i].fence, true);
- }
- 
+		sync_print_fence(s, sync_file->cbs[i].fence, true);
+}
+
 static int sync_debugfs_show(struct seq_file *s, void *unused)
 {
 	unsigned long flags;
@@ -244,7 +245,7 @@ static long sw_sync_ioctl_create_fence(struct sw_sync_timeline *obj,
 {
 	int fd = get_unused_fd_flags(O_CLOEXEC);
 	int err;
-	struct sync_pt *pt;
+	struct fence *fence;
 	struct sync_file *sync_file;
 	struct sw_sync_create_fence_data data;
 
@@ -256,16 +257,16 @@ static long sw_sync_ioctl_create_fence(struct sw_sync_timeline *obj,
 		goto err;
 	}
 
-	pt = sw_sync_pt_create(obj, data.value);
-	if (!pt) {
+	fence = sw_sync_pt_create(obj, data.value);
+	if (!fence) {
 		err = -ENOMEM;
 		goto err;
 	}
 
 	data.name[sizeof(data.name) - 1] = '\0';
-	sync_file = sync_file_create(data.name, pt);
+	sync_file = sync_file_create(data.name, fence);
 	if (!sync_file) {
-		sync_pt_free(pt);
+		fence_put(fence);
 		err = -ENOMEM;
 		goto err;
 	}
diff --git a/drivers/staging/android/trace/sync.h b/drivers/staging/android/trace/sync.h
index 80f5da4f2aa5..87c60e9f584e 100644
--- a/drivers/staging/android/trace/sync.h
+++ b/drivers/staging/android/trace/sync.h
@@ -53,20 +53,20 @@ TRACE_EVENT(sync_wait,
 			__get_str(name), __entry->status)
 );
 
-TRACE_EVENT(sync_pt,
-	TP_PROTO(struct fence *pt),
+TRACE_EVENT(fence,
+	TP_PROTO(struct fence *fence),
 
-	TP_ARGS(pt),
+	TP_ARGS(fence),
 
 	TP_STRUCT__entry(
-		__string(timeline, pt->ops->get_timeline_name(pt))
+		__string(timeline, fence->ops->get_timeline_name(fence))
 		__array(char, value, 32)
 	),
 
 	TP_fast_assign(
-		__assign_str(timeline, pt->ops->get_timeline_name(pt));
-		if (pt->ops->fence_value_str) {
-			pt->ops->fence_value_str(pt, __entry->value,
+		__assign_str(timeline, fence->ops->get_timeline_name(fence));
+		if (fence->ops->fence_value_str) {
+			fence->ops->fence_value_str(fence, __entry->value,
 							sizeof(__entry->value));
 		} else {
 			__entry->value[0] = '\0';
diff --git a/include/linux/fence.h b/include/linux/fence.h
index bb522011383b..605bd88246a6 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -79,6 +79,8 @@ struct fence {
 	unsigned long flags;
 	ktime_t timestamp;
 	int status;
+	struct list_head child_list;
+	struct list_head active_list;
 };
 
 enum fence_flag_bits {
-- 
cgit v1.2.3


From 9b1b282ccd8195e3264dbb3cd15b9d60b6345230 Mon Sep 17 00:00:00 2001
From: "J. German Rivera" <German.Rivera@freescale.com>
Date: Wed, 6 Jan 2016 16:03:19 -0600
Subject: irqdomain: Added domain bus token DOMAIN_BUS_FSL_MC_MSI

Since an FSL-MC bus is a new bus type that is neither PCI nor
PLATFORM, we need a new domain bus token to disambiguate the
IRQ domain for FSL-MC MSIs.

Signed-off-by: J. German Rivera <German.Rivera@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/irqdomain.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 04579d9fbce4..0934d06c8b00 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -74,6 +74,7 @@ enum irq_domain_bus_token {
 	DOMAIN_BUS_PCI_MSI,
 	DOMAIN_BUS_PLATFORM_MSI,
 	DOMAIN_BUS_NEXUS,
+	DOMAIN_BUS_FSL_MC_MSI,
 };
 
 /**
-- 
cgit v1.2.3


From 550308e48cddbc2aa74f7236941b17b55b2e78e9 Mon Sep 17 00:00:00 2001
From: "J. German Rivera" <German.Rivera@freescale.com>
Date: Wed, 6 Jan 2016 16:03:20 -0600
Subject: fsl-mc: msi: Added FSL-MC-specific member to the msi_desc's union

FSL-MC is a bus type different from PCI and platform, so it needs
its own member in the msi_desc's union.

Signed-off-by: J. German Rivera <German.Rivera@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/msi.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index a2a0068a8387..8b425c66305a 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -32,6 +32,14 @@ struct platform_msi_desc {
 	u16				msi_index;
 };
 
+/**
+ * fsl_mc_msi_desc - FSL-MC device specific msi descriptor data
+ * @msi_index:		The index of the MSI descriptor
+ */
+struct fsl_mc_msi_desc {
+	u16				msi_index;
+};
+
 /**
  * struct msi_desc - Descriptor structure for MSI based interrupts
  * @list:	List head for management
@@ -87,6 +95,7 @@ struct msi_desc {
 		 * tree wide cleanup.
 		 */
 		struct platform_msi_desc platform;
+		struct fsl_mc_msi_desc fsl_mc;
 	};
 };
 
-- 
cgit v1.2.3


From 187372a3b9faff68ed61c291d0135e6739e0dbdf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Feb 2016 14:40:51 +1100
Subject: direct-io: always call ->end_io if non-NULL

This way we can pass back errors to the file system, and allow for
cleanup required for all direct I/O invocations.

Also allow the ->end_io handlers to return errors on their own, so that
I/O completion errors can be passed on to the callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/dax.c           |  9 +++++++--
 fs/direct-io.c     |  9 +++++++--
 fs/ext4/inode.c    |  9 +++++++--
 fs/ocfs2/aops.c    |  7 ++++++-
 fs/xfs/xfs_aops.c  | 13 +++++++------
 include/linux/fs.h |  2 +-
 6 files changed, 35 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dax.c b/fs/dax.c
index 4fd6b0c5c6b5..e38b2c589b54 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -267,8 +267,13 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
 	if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
 		inode_unlock(inode);
 
-	if ((retval > 0) && end_io)
-		end_io(iocb, pos, retval, bh.b_private);
+	if (end_io) {
+		int err;
+
+		err = end_io(iocb, pos, retval, bh.b_private);
+		if (err)
+			retval = err;
+	}
 
 	if (!(flags & DIO_SKIP_DIO_COUNT))
 		inode_dio_end(inode);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1b2f7ffc8b84..9c6f885cc518 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -253,8 +253,13 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
 	if (ret == 0)
 		ret = transferred;
 
-	if (dio->end_io && dio->result)
-		dio->end_io(dio->iocb, offset, transferred, dio->private);
+	if (dio->end_io) {
+		int err;
+
+		err = dio->end_io(dio->iocb, offset, ret, dio->private);
+		if (err)
+			ret = err;
+	}
 
 	if (!(dio->flags & DIO_SKIP_DIO_COUNT))
 		inode_dio_end(dio->inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 83bc8bfb3bea..9db04dd9b88a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3161,14 +3161,17 @@ out:
 }
 #endif
 
-static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
+static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 			    ssize_t size, void *private)
 {
         ext4_io_end_t *io_end = iocb->private;
 
+	if (size <= 0)
+		return 0;
+
 	/* if not async direct IO just return */
 	if (!io_end)
-		return;
+		return 0;
 
 	ext_debug("ext4_end_io_dio(): io_end 0x%p "
 		  "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
@@ -3179,6 +3182,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 	io_end->offset = offset;
 	io_end->size = size;
 	ext4_put_io_end(io_end);
+
+	return 0;
 }
 
 /*
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 794fd1587f34..5dcc5f5a842e 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -620,7 +620,7 @@ bail:
  * particularly interested in the aio/dio case.  We use the rw_lock DLM lock
  * to protect io on one node from truncation on another.
  */
-static void ocfs2_dio_end_io(struct kiocb *iocb,
+static int ocfs2_dio_end_io(struct kiocb *iocb,
 			     loff_t offset,
 			     ssize_t bytes,
 			     void *private)
@@ -628,6 +628,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 	struct inode *inode = file_inode(iocb->ki_filp);
 	int level;
 
+	if (bytes <= 0)
+		return 0;
+
 	/* this io's submitter should not have unlocked this before we could */
 	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
 
@@ -644,6 +647,8 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 		level = ocfs2_iocb_rw_locked_level(iocb);
 		ocfs2_rw_unlock(inode, level);
 	}
+
+	return 0;
 }
 
 static int ocfs2_releasepage(struct page *page, gfp_t wait)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 379c089fb051..295aaffea78e 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1645,7 +1645,7 @@ out_end_io:
  * case the completion can be called in interrupt context, whereas if we have an
  * ioend we will always be called in task context (i.e. from a workqueue).
  */
-STATIC void
+STATIC int
 xfs_end_io_direct_write(
 	struct kiocb		*iocb,
 	loff_t			offset,
@@ -1655,15 +1655,19 @@ xfs_end_io_direct_write(
 	struct inode		*inode = file_inode(iocb->ki_filp);
 	struct xfs_ioend	*ioend = private;
 
+	if (size <= 0)
+		return 0;
+
 	trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size,
 				     ioend ? ioend->io_type : 0, NULL);
 
 	if (!ioend) {
 		ASSERT(offset + size <= i_size_read(inode));
-		return;
+		return 0;
 	}
 
 	__xfs_end_io_direct_write(inode, ioend, offset, size);
+	return 0;
 }
 
 static inline ssize_t
@@ -1672,10 +1676,7 @@ xfs_vm_do_dio(
 	struct kiocb		*iocb,
 	struct iov_iter		*iter,
 	loff_t			offset,
-	void			(*endio)(struct kiocb	*iocb,
-					 loff_t		offset,
-					 ssize_t	size,
-					 void		*private),
+	dio_iodone_t		endio,
 	int			flags)
 {
 	struct block_device	*bdev;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1a2046275cdf..d7f37bfcbdce 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -70,7 +70,7 @@ extern int sysctl_protected_hardlinks;
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create);
-typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
+typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 			ssize_t bytes, void *private);
 typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);
 
-- 
cgit v1.2.3


From 7047f17d70fc0599563d30d0791692cb5fe42ae6 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Fri, 25 Dec 2015 20:00:30 -0800
Subject: Drivers: hv: vmbus: Add vendor and device atttributes

Add vendor and device attributes to VMBUS devices. These will be used
by Hyper-V tools as well user-level RDMA libraries that will use the
vendor/device tuple to discover the RDMA device.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/stable/sysfs-bus-vmbus |  14 +++
 drivers/hv/channel_mgmt.c                | 166 +++++++++++++++++++++++--------
 drivers/hv/vmbus_drv.c                   |  21 ++++
 include/linux/hyperv.h                   |  28 ++++++
 4 files changed, 186 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/stable/sysfs-bus-vmbus b/Documentation/ABI/stable/sysfs-bus-vmbus
index 636e938d5e33..5d0125f7bcaf 100644
--- a/Documentation/ABI/stable/sysfs-bus-vmbus
+++ b/Documentation/ABI/stable/sysfs-bus-vmbus
@@ -27,3 +27,17 @@ Description:	The mapping of which primary/sub channels are bound to which
 		Virtual Processors.
 		Format: <channel's child_relid:the bound cpu's number>
 Users:		tools/hv/lsvmbus
+
+What:		/sys/bus/vmbus/devices/vmbus_*/device
+Date:		Dec. 2015
+KernelVersion:	4.5
+Contact:	K. Y. Srinivasan <kys@microsoft.com>
+Description:	The 16 bit device ID of the device
+Users:		tools/hv/lsvmbus and user level RDMA libraries
+
+What:		/sys/bus/vmbus/devices/vmbus_*/vendor
+Date:		Dec. 2015
+KernelVersion:	4.5
+Contact:	K. Y. Srinivasan <kys@microsoft.com>
+Description:	The 16 bit vendor ID of the device
+Users:		tools/hv/lsvmbus and user level RDMA libraries
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 1c1ad47042c5..107d72f9834d 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -32,8 +32,122 @@
 
 #include "hyperv_vmbus.h"
 
-static void init_vp_index(struct vmbus_channel *channel,
-			  const uuid_le *type_guid);
+static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
+
+static const struct vmbus_device vmbus_devs[] = {
+	/* IDE */
+	{ .dev_type = HV_IDE,
+	  HV_IDE_GUID,
+	  .perf_device = true,
+	},
+
+	/* SCSI */
+	{ .dev_type = HV_SCSI,
+	  HV_SCSI_GUID,
+	  .perf_device = true,
+	},
+
+	/* Fibre Channel */
+	{ .dev_type = HV_FC,
+	  HV_SYNTHFC_GUID,
+	  .perf_device = true,
+	},
+
+	/* Synthetic NIC */
+	{ .dev_type = HV_NIC,
+	  HV_NIC_GUID,
+	  .perf_device = true,
+	},
+
+	/* Network Direct */
+	{ .dev_type = HV_ND,
+	  HV_ND_GUID,
+	  .perf_device = true,
+	},
+
+	/* PCIE */
+	{ .dev_type = HV_PCIE,
+	  HV_PCIE_GUID,
+	  .perf_device = true,
+	},
+
+	/* Synthetic Frame Buffer */
+	{ .dev_type = HV_FB,
+	  HV_SYNTHVID_GUID,
+	  .perf_device = false,
+	},
+
+	/* Synthetic Keyboard */
+	{ .dev_type = HV_KBD,
+	  HV_KBD_GUID,
+	  .perf_device = false,
+	},
+
+	/* Synthetic MOUSE */
+	{ .dev_type = HV_MOUSE,
+	  HV_MOUSE_GUID,
+	  .perf_device = false,
+	},
+
+	/* KVP */
+	{ .dev_type = HV_KVP,
+	  HV_KVP_GUID,
+	  .perf_device = false,
+	},
+
+	/* Time Synch */
+	{ .dev_type = HV_TS,
+	  HV_TS_GUID,
+	  .perf_device = false,
+	},
+
+	/* Heartbeat */
+	{ .dev_type = HV_HB,
+	  HV_HEART_BEAT_GUID,
+	  .perf_device = false,
+	},
+
+	/* Shutdown */
+	{ .dev_type = HV_SHUTDOWN,
+	  HV_SHUTDOWN_GUID,
+	  .perf_device = false,
+	},
+
+	/* File copy */
+	{ .dev_type = HV_FCOPY,
+	  HV_FCOPY_GUID,
+	  .perf_device = false,
+	},
+
+	/* Backup */
+	{ .dev_type = HV_BACKUP,
+	  HV_VSS_GUID,
+	  .perf_device = false,
+	},
+
+	/* Dynamic Memory */
+	{ .dev_type = HV_DM,
+	  HV_DM_GUID,
+	  .perf_device = false,
+	},
+
+	/* Unknown GUID */
+	{ .dev_type = HV_UNKOWN,
+	  .perf_device = false,
+	},
+};
+
+static u16 hv_get_dev_type(const uuid_le *guid)
+{
+	u16 i;
+
+	for (i = HV_IDE; i < HV_UNKOWN; i++) {
+		if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
+			return i;
+	}
+	pr_info("Unknown GUID: %pUl\n", guid);
+	return i;
+}
 
 /**
  * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
@@ -251,6 +365,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 	struct vmbus_channel *channel;
 	bool fnew = true;
 	unsigned long flags;
+	u16 dev_type;
 
 	/* Make sure this is a new offer */
 	mutex_lock(&vmbus_connection.channel_mutex);
@@ -288,7 +403,9 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 			goto err_free_chan;
 	}
 
-	init_vp_index(newchannel, &newchannel->offermsg.offer.if_type);
+	dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type);
+
+	init_vp_index(newchannel, dev_type);
 
 	if (newchannel->target_cpu != get_cpu()) {
 		put_cpu();
@@ -325,6 +442,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 	if (!newchannel->device_obj)
 		goto err_deq_chan;
 
+	newchannel->device_obj->device_id = dev_type;
 	/*
 	 * Add the new device to the bus. This will kick off device-driver
 	 * binding which eventually invokes the device driver's AddDevice()
@@ -358,37 +476,6 @@ err_free_chan:
 	free_channel(newchannel);
 }
 
-enum {
-	IDE = 0,
-	SCSI,
-	FC,
-	NIC,
-	ND_NIC,
-	PCIE,
-	MAX_PERF_CHN,
-};
-
-/*
- * This is an array of device_ids (device types) that are performance critical.
- * We attempt to distribute the interrupt load for these devices across
- * all available CPUs.
- */
-static const struct hv_vmbus_device_id hp_devs[] = {
-	/* IDE */
-	{ HV_IDE_GUID, },
-	/* Storage - SCSI */
-	{ HV_SCSI_GUID, },
-	/* Storage - FC */
-	{ HV_SYNTHFC_GUID, },
-	/* Network */
-	{ HV_NIC_GUID, },
-	/* NetworkDirect Guest RDMA */
-	{ HV_ND_GUID, },
-	/* PCI Express Pass Through */
-	{ HV_PCIE_GUID, },
-};
-
-
 /*
  * We use this state to statically distribute the channel interrupt load.
  */
@@ -405,22 +492,15 @@ static int next_numa_node_id;
  * For pre-win8 hosts or non-performance critical channels we assign the
  * first CPU in the first NUMA node.
  */
-static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
+static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 {
 	u32 cur_cpu;
-	int i;
-	bool perf_chn = false;
+	bool perf_chn = vmbus_devs[dev_type].perf_device;
 	struct vmbus_channel *primary = channel->primary_channel;
 	int next_node;
 	struct cpumask available_mask;
 	struct cpumask *alloced_mask;
 
-	for (i = IDE; i < MAX_PERF_CHN; i++) {
-		if (!uuid_le_cmp(*type_guid, hp_devs[i].guid)) {
-			perf_chn = true;
-			break;
-		}
-	}
 	if ((vmbus_proto_version == VERSION_WS2008) ||
 	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
 		/*
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 328e4c3808e0..3668a95778ec 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -477,6 +477,24 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(channel_vp_mapping);
 
+static ssize_t vendor_show(struct device *dev,
+			   struct device_attribute *dev_attr,
+			   char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+	return sprintf(buf, "0x%x\n", hv_dev->vendor_id);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t device_show(struct device *dev,
+			   struct device_attribute *dev_attr,
+			   char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+	return sprintf(buf, "0x%x\n", hv_dev->device_id);
+}
+static DEVICE_ATTR_RO(device);
+
 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
 static struct attribute *vmbus_attrs[] = {
 	&dev_attr_id.attr,
@@ -502,6 +520,8 @@ static struct attribute *vmbus_attrs[] = {
 	&dev_attr_in_read_bytes_avail.attr,
 	&dev_attr_in_write_bytes_avail.attr,
 	&dev_attr_channel_vp_mapping.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_device.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(vmbus);
@@ -957,6 +977,7 @@ struct hv_device *vmbus_device_create(const uuid_le *type,
 	memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le));
 	memcpy(&child_device_obj->dev_instance, instance,
 	       sizeof(uuid_le));
+	child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */
 
 
 	return child_device_obj;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 753dbad0bf94..3172521df805 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -633,6 +633,32 @@ enum hv_signal_policy {
 	HV_SIGNAL_POLICY_EXPLICIT,
 };
 
+enum vmbus_device_type {
+	HV_IDE = 0,
+	HV_SCSI,
+	HV_FC,
+	HV_NIC,
+	HV_ND,
+	HV_PCIE,
+	HV_FB,
+	HV_KBD,
+	HV_MOUSE,
+	HV_KVP,
+	HV_TS,
+	HV_HB,
+	HV_SHUTDOWN,
+	HV_FCOPY,
+	HV_BACKUP,
+	HV_DM,
+	HV_UNKOWN,
+};
+
+struct vmbus_device {
+	u16  dev_type;
+	uuid_le guid;
+	bool perf_device;
+};
+
 struct vmbus_channel {
 	/* Unique channel id */
 	int id;
@@ -959,6 +985,8 @@ struct hv_device {
 
 	/* the device instance id of this device */
 	uuid_le dev_instance;
+	u16 vendor_id;
+	u16 device_id;
 
 	struct device device;
 
-- 
cgit v1.2.3


From 3c75354d043ad546148d6992e40033ecaefc5ea5 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:37 -0800
Subject: Drivers: hv: vmbus: add a helper function to set a channel's pending
 send size

This will be used by the coming net/hvsock driver.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 3172521df805..4af51a3f98d9 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -816,6 +816,12 @@ static inline void *get_per_channel_state(struct vmbus_channel *c)
 	return c->per_channel_state;
 }
 
+static inline void set_channel_pending_send_size(struct vmbus_channel *c,
+						 u32 size)
+{
+	c->outbound.ring_buffer->pending_send_sz = size;
+}
+
 void vmbus_onmessage(void *context);
 
 int vmbus_request_offers(void);
-- 
cgit v1.2.3


From e8d6ca023efce3bd80050dcd9e708ee3cf8babd4 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:38 -0800
Subject: Drivers: hv: vmbus: define the new offer type for Hyper-V socket
 (hvsock)

A helper function is also added.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 4af51a3f98d9..79c4aa70eff3 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -235,6 +235,7 @@ struct vmbus_channel_offer {
 #define VMBUS_CHANNEL_LOOPBACK_OFFER			0x100
 #define VMBUS_CHANNEL_PARENT_OFFER			0x200
 #define VMBUS_CHANNEL_REQUEST_MONITORED_NOTIFICATION	0x400
+#define VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER		0x2000
 
 struct vmpacket_descriptor {
 	u16 type;
@@ -795,6 +796,12 @@ struct vmbus_channel {
 	enum hv_signal_policy  signal_policy;
 };
 
+static inline bool is_hvsock_channel(const struct vmbus_channel *c)
+{
+	return !!(c->offermsg.offer.chn_flags &
+		  VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER);
+}
+
 static inline void set_channel_signal_state(struct vmbus_channel *c,
 					    enum hv_signal_policy policy)
 {
-- 
cgit v1.2.3


From 5c23a1a5c60b0f472cfa61cd7d8279f8aaeb5b64 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:40 -0800
Subject: Drivers: hv: vmbus: define a new VMBus message type for hvsock

A function to send the type of message is also added.

The coming net/hvsock driver will use this function to proactively request
the host to offer a VMBus channel for a new hvsock connection.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c      | 15 +++++++++++++++
 drivers/hv/channel_mgmt.c |  4 ++++
 include/linux/hyperv.h    | 13 +++++++++++++
 3 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 3f0453302146..fcab234796ef 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -219,6 +219,21 @@ error0:
 }
 EXPORT_SYMBOL_GPL(vmbus_open);
 
+/* Used for Hyper-V Socket: a guest client's connect() to the host */
+int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id,
+				  const uuid_le *shv_host_servie_id)
+{
+	struct vmbus_channel_tl_connect_request conn_msg;
+
+	memset(&conn_msg, 0, sizeof(conn_msg));
+	conn_msg.header.msgtype = CHANNELMSG_TL_CONNECT_REQUEST;
+	conn_msg.guest_endpoint_id = *shv_guest_servie_id;
+	conn_msg.host_service_id = *shv_host_servie_id;
+
+	return vmbus_post_msg(&conn_msg, sizeof(conn_msg));
+}
+EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request);
+
 /*
  * create_gpadl_header - Creates a gpadl for the specified buffer
  */
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index d6c611457601..60ca25b93b4c 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -958,6 +958,10 @@ struct vmbus_channel_message_table_entry
 	{CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response},
 	{CHANNELMSG_UNLOAD,			0, NULL},
 	{CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response},
+	{CHANNELMSG_18,				0, NULL},
+	{CHANNELMSG_19,				0, NULL},
+	{CHANNELMSG_20,				0, NULL},
+	{CHANNELMSG_TL_CONNECT_REQUEST,		0, NULL},
 };
 
 /*
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 79c4aa70eff3..898eac9e8c13 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -392,6 +392,10 @@ enum vmbus_channel_message_type {
 	CHANNELMSG_VERSION_RESPONSE		= 15,
 	CHANNELMSG_UNLOAD			= 16,
 	CHANNELMSG_UNLOAD_RESPONSE		= 17,
+	CHANNELMSG_18				= 18,
+	CHANNELMSG_19				= 19,
+	CHANNELMSG_20				= 20,
+	CHANNELMSG_TL_CONNECT_REQUEST		= 21,
 	CHANNELMSG_COUNT
 };
 
@@ -562,6 +566,13 @@ struct vmbus_channel_initiate_contact {
 	u64 monitor_page2;
 } __packed;
 
+/* Hyper-V socket: guest's connect()-ing to host */
+struct vmbus_channel_tl_connect_request {
+	struct vmbus_channel_message_header header;
+	uuid_le guest_endpoint_id;
+	uuid_le host_service_id;
+} __packed;
+
 struct vmbus_channel_version_response {
 	struct vmbus_channel_message_header header;
 	u8 version_supported;
@@ -1283,4 +1294,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
 
 extern __u32 vmbus_proto_version;
 
+int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id,
+				  const uuid_le *shv_host_servie_id);
 #endif /* _HYPERV_H */
-- 
cgit v1.2.3


From 8981da320a11217589aa3c50f9e891bcdef07ece Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:41 -0800
Subject: Drivers: hv: vmbus: add a hvsock flag in struct hv_driver

Only the coming hv_sock driver has a "true" value for this flag.

We treat the hvsock offers/channels as special VMBus devices.
Since the hv_sock driver handles all the hvsock offers/channels, we need to
tweak vmbus_match() for hv_sock driver, so we introduce this flag.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/vmbus_drv.c |  4 ++++
 include/linux/hyperv.h | 14 ++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 3668a95778ec..063e5f53ca78 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -582,6 +582,10 @@ static int vmbus_match(struct device *device, struct device_driver *driver)
 	struct hv_driver *drv = drv_to_hv_drv(driver);
 	struct hv_device *hv_dev = device_to_hv_device(device);
 
+	/* The hv_sock driver handles all hv_sock offers. */
+	if (is_hvsock_channel(hv_dev->channel))
+		return drv->hvsock;
+
 	if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type))
 		return 1;
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 898eac9e8c13..f636f91f104b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -990,6 +990,20 @@ extern void vmbus_ontimer(unsigned long data);
 struct hv_driver {
 	const char *name;
 
+	/*
+	 * A hvsock offer, which has a VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER
+	 * channel flag, actually doesn't mean a synthetic device because the
+	 * offer's if_type/if_instance can change for every new hvsock
+	 * connection.
+	 *
+	 * However, to facilitate the notification of new-offer/rescind-offer
+	 * from vmbus driver to hvsock driver, we can handle hvsock offer as
+	 * a special vmbus device, and hence we need the below flag to
+	 * indicate if the driver is the hvsock driver or not: we need to
+	 * specially treat the hvosck offer & driver in vmbus_match().
+	 */
+	bool hvsock;
+
 	/* the device type supported by this driver */
 	uuid_le dev_type;
 	const struct hv_vmbus_device_id *id_table;
-- 
cgit v1.2.3


From 499e8401a515d04daa986b995da710d2b9737764 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:42 -0800
Subject: Drivers: hv: vmbus: add a per-channel rescind callback

This will be used by the coming hv_sock driver.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 11 +++++++++++
 include/linux/hyperv.h    |  9 +++++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 60ca25b93b4c..76864c98a110 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -741,6 +741,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 	spin_unlock_irqrestore(&channel->lock, flags);
 
 	if (channel->device_obj) {
+		if (channel->chn_rescind_callback) {
+			channel->chn_rescind_callback(channel);
+			return;
+		}
 		/*
 		 * We will have to unregister this device from the
 		 * driver core.
@@ -1110,3 +1114,10 @@ bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
+
+void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
+		void (*chn_rescind_cb)(struct vmbus_channel *))
+{
+	channel->chn_rescind_callback = chn_rescind_cb;
+}
+EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index f636f91f104b..2e54e34e5feb 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -765,6 +765,12 @@ struct vmbus_channel {
 	 */
 	void (*sc_creation_callback)(struct vmbus_channel *new_sc);
 
+	/*
+	 * Channel rescind callback. Some channels (the hvsock ones), need to
+	 * register a callback which is invoked in vmbus_onoffer_rescind().
+	 */
+	void (*chn_rescind_callback)(struct vmbus_channel *channel);
+
 	/*
 	 * The spinlock to protect the structure. It is being used to protect
 	 * test-and-set access to various attributes of the structure as well
@@ -851,6 +857,9 @@ int vmbus_request_offers(void);
 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
 			void (*sc_cr_cb)(struct vmbus_channel *new_sc));
 
+void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
+		void (*chn_rescind_cb)(struct vmbus_channel *));
+
 /*
  * Retrieve the (sub) channel on which to send an outgoing request.
  * When a primary channel has multiple sub-channels, we choose a
-- 
cgit v1.2.3


From 85d9aa705184a4504d0330017e3956fcdae8a9d6 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:43 -0800
Subject: Drivers: hv: vmbus: add an API vmbus_hvsock_device_unregister()

The hvsock driver needs this API to release all the resources related
to the channel.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 33 ++++++++++++++++++++++++++++-----
 drivers/hv/connection.c   |  4 ++--
 include/linux/hyperv.h    |  2 ++
 3 files changed, 32 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 76864c98a110..cf311be88cb4 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -310,6 +310,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 	vmbus_release_relid(relid);
 
 	BUG_ON(!channel->rescind);
+	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
 
 	if (channel->target_cpu != get_cpu()) {
 		put_cpu();
@@ -321,9 +322,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 	}
 
 	if (channel->primary_channel == NULL) {
-		mutex_lock(&vmbus_connection.channel_mutex);
 		list_del(&channel->listentry);
-		mutex_unlock(&vmbus_connection.channel_mutex);
 
 		primary_channel = channel;
 	} else {
@@ -367,6 +366,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 	bool fnew = true;
 	unsigned long flags;
 	u16 dev_type;
+	int ret;
 
 	/* Make sure this is a new offer */
 	mutex_lock(&vmbus_connection.channel_mutex);
@@ -449,7 +449,11 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 	 * binding which eventually invokes the device driver's AddDevice()
 	 * method.
 	 */
-	if (vmbus_device_register(newchannel->device_obj) != 0) {
+	mutex_lock(&vmbus_connection.channel_mutex);
+	ret = vmbus_device_register(newchannel->device_obj);
+	mutex_unlock(&vmbus_connection.channel_mutex);
+
+	if (ret != 0) {
 		pr_err("unable to add child device object (relid %d)\n",
 			newchannel->offermsg.child_relid);
 		kfree(newchannel->device_obj);
@@ -725,6 +729,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 	struct device *dev;
 
 	rescind = (struct vmbus_channel_rescind_offer *)hdr;
+
+	mutex_lock(&vmbus_connection.channel_mutex);
 	channel = relid2channel(rescind->child_relid);
 
 	if (channel == NULL) {
@@ -733,7 +739,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 		 * vmbus_process_offer(), we have already invoked
 		 * vmbus_release_relid() on error.
 		 */
-		return;
+		goto out;
 	}
 
 	spin_lock_irqsave(&channel->lock, flags);
@@ -743,7 +749,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 	if (channel->device_obj) {
 		if (channel->chn_rescind_callback) {
 			channel->chn_rescind_callback(channel);
-			return;
+			goto out;
 		}
 		/*
 		 * We will have to unregister this device from the
@@ -758,8 +764,25 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 		hv_process_channel_removal(channel,
 			channel->offermsg.child_relid);
 	}
+
+out:
+	mutex_unlock(&vmbus_connection.channel_mutex);
 }
 
+void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
+{
+	mutex_lock(&vmbus_connection.channel_mutex);
+
+	BUG_ON(!is_hvsock_channel(channel));
+
+	channel->rescind = true;
+	vmbus_device_unregister(channel->device_obj);
+
+	mutex_unlock(&vmbus_connection.channel_mutex);
+}
+EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
+
+
 /*
  * vmbus_onoffers_delivered -
  * This is invoked when all offers have been delivered.
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 4a320e60641a..fa86b2cb28b8 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -288,7 +288,8 @@ struct vmbus_channel *relid2channel(u32 relid)
 	struct list_head *cur, *tmp;
 	struct vmbus_channel *cur_sc;
 
-	mutex_lock(&vmbus_connection.channel_mutex);
+	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
+
 	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 		if (channel->offermsg.child_relid == relid) {
 			found_channel = channel;
@@ -307,7 +308,6 @@ struct vmbus_channel *relid2channel(u32 relid)
 			}
 		}
 	}
-	mutex_unlock(&vmbus_connection.channel_mutex);
 
 	return found_channel;
 }
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2e54e34e5feb..c056f058dcf8 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1069,6 +1069,8 @@ int __must_check __vmbus_driver_register(struct hv_driver *hv_driver,
 					 const char *mod_name);
 void vmbus_driver_unregister(struct hv_driver *hv_driver);
 
+void vmbus_hvsock_device_unregister(struct vmbus_channel *channel);
+
 int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
 			resource_size_t min, resource_size_t max,
 			resource_size_t size, resource_size_t align,
-- 
cgit v1.2.3


From fe760e4d64fe5c17c39e86c410d41f6587ee88bc Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:45 -0800
Subject: Drivers: hv: vmbus: Give control over how the ring access is
 serialized

On the channel send side, many of the VMBUS
device drivers explicity serialize access to the
outgoing ring buffer. Give more control to the
VMBUS device drivers in terms how to serialize
accesss to the outgoing ring buffer.
The default behavior will be to aquire the
ring lock to preserve the current behavior.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c      | 15 +++++++++++----
 drivers/hv/channel_mgmt.c |  1 +
 drivers/hv/hyperv_vmbus.h |  2 +-
 drivers/hv/ring_buffer.c  | 13 ++++++++-----
 include/linux/hyperv.h    | 16 ++++++++++++++++
 5 files changed, 37 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index fcab234796ef..56dd261f7142 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -639,6 +639,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
 	u64 aligned_data = 0;
 	int ret;
 	bool signal = false;
+	bool lock = channel->acquire_ring_lock;
 	int num_vecs = ((bufferlen != 0) ? 3 : 1);
 
 
@@ -658,7 +659,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
 	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs,
-				  &signal);
+				  &signal, lock);
 
 	/*
 	 * Signalling the host is conditional on many factors:
@@ -738,6 +739,7 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
 	struct kvec bufferlist[3];
 	u64 aligned_data = 0;
 	bool signal = false;
+	bool lock = channel->acquire_ring_lock;
 
 	if (pagecount > MAX_PAGE_BUFFER_COUNT)
 		return -EINVAL;
@@ -774,7 +776,8 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
 	bufferlist[2].iov_base = &aligned_data;
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
-	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
+	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
+				  &signal, lock);
 
 	/*
 	 * Signalling the host is conditional on many factors:
@@ -837,6 +840,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
 	struct kvec bufferlist[3];
 	u64 aligned_data = 0;
 	bool signal = false;
+	bool lock = channel->acquire_ring_lock;
 
 	packetlen = desc_size + bufferlen;
 	packetlen_aligned = ALIGN(packetlen, sizeof(u64));
@@ -856,7 +860,8 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
 	bufferlist[2].iov_base = &aligned_data;
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
-	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
+	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
+				  &signal, lock);
 
 	if (ret == 0 && signal)
 		vmbus_setevent(channel);
@@ -881,6 +886,7 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
 	struct kvec bufferlist[3];
 	u64 aligned_data = 0;
 	bool signal = false;
+	bool lock = channel->acquire_ring_lock;
 	u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset,
 					 multi_pagebuffer->len);
 
@@ -919,7 +925,8 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
 	bufferlist[2].iov_base = &aligned_data;
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
-	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
+	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
+				  &signal, lock);
 
 	if (ret == 0 && signal)
 		vmbus_setevent(channel);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index cf311be88cb4..b40f429aaa13 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -259,6 +259,7 @@ static struct vmbus_channel *alloc_channel(void)
 		return NULL;
 
 	channel->id = atomic_inc_return(&chan_num);
+	channel->acquire_ring_lock = true;
 	spin_lock_init(&channel->inbound_lock);
 	spin_lock_init(&channel->lock);
 
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index ac7aa303c37d..b9ea7f59036b 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -529,7 +529,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
 
 int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info,
 		    struct kvec *kv_list,
-		    u32 kv_count, bool *signal);
+		    u32 kv_count, bool *signal, bool lock);
 
 int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
 		       void *buffer, u32 buflen, u32 *buffer_actual_len,
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 1145f3b8e4e0..5613e2b5cff7 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -314,7 +314,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
 
 /* Write to the ring buffer. */
 int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
-		    struct kvec *kv_list, u32 kv_count, bool *signal)
+		    struct kvec *kv_list, u32 kv_count, bool *signal, bool lock)
 {
 	int i = 0;
 	u32 bytes_avail_towrite;
@@ -324,14 +324,15 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
 	u32 next_write_location;
 	u32 old_write;
 	u64 prev_indices = 0;
-	unsigned long flags;
+	unsigned long flags = 0;
 
 	for (i = 0; i < kv_count; i++)
 		totalbytes_towrite += kv_list[i].iov_len;
 
 	totalbytes_towrite += sizeof(u64);
 
-	spin_lock_irqsave(&outring_info->ring_lock, flags);
+	if (lock)
+		spin_lock_irqsave(&outring_info->ring_lock, flags);
 
 	hv_get_ringbuffer_availbytes(outring_info,
 				&bytes_avail_toread,
@@ -343,7 +344,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
 	 * is empty since the read index == write index.
 	 */
 	if (bytes_avail_towrite <= totalbytes_towrite) {
-		spin_unlock_irqrestore(&outring_info->ring_lock, flags);
+		if (lock)
+			spin_unlock_irqrestore(&outring_info->ring_lock, flags);
 		return -EAGAIN;
 	}
 
@@ -374,7 +376,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
 	hv_set_next_write_location(outring_info, next_write_location);
 
 
-	spin_unlock_irqrestore(&outring_info->ring_lock, flags);
+	if (lock)
+		spin_unlock_irqrestore(&outring_info->ring_lock, flags);
 
 	*signal = hv_need_to_signal(old_write, outring_info);
 	return 0;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index c056f058dcf8..d23dab0d770b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -811,8 +811,24 @@ struct vmbus_channel {
 	 * signaling control.
 	 */
 	enum hv_signal_policy  signal_policy;
+	/*
+	 * On the channel send side, many of the VMBUS
+	 * device drivers explicity serialize access to the
+	 * outgoing ring buffer. Give more control to the
+	 * VMBUS device drivers in terms how to serialize
+	 * accesss to the outgoing ring buffer.
+	 * The default behavior will be to aquire the
+	 * ring lock to preserve the current behavior.
+	 */
+	bool acquire_ring_lock;
+
 };
 
+static inline void set_channel_lock_state(struct vmbus_channel *c, bool state)
+{
+	c->acquire_ring_lock = state;
+}
+
 static inline bool is_hvsock_channel(const struct vmbus_channel *c)
 {
 	return !!(c->offermsg.offer.chn_flags &
-- 
cgit v1.2.3


From f42a0fd13bd281811e7457b28d939c8e8b808868 Mon Sep 17 00:00:00 2001
From: Jorgen Hansen <jhansen@vmware.com>
Date: Thu, 12 Nov 2015 01:29:32 -0800
Subject: VMCI: Use 32bit atomics for queue headers on X86_32

This change restricts the reading and setting of the head and tail
pointers on 32bit X86 to 32bit for both correctness and
performance reasons. On uniprocessor X86_32, the atomic64_read
may be implemented as a non-locked cmpxchg8b. This may result in
updates to the pointers done by the VMCI device being overwritten.
On MP systems, there is no such correctness issue, but using 32bit
atomics avoids the overhead of the locked 64bit operation. All this
is safe because the queue size on 32bit systems will never exceed
a 32bit value.

Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/vmw_vmci/vmci_driver.c |  2 +-
 include/linux/vmw_vmci_defs.h       | 43 +++++++++++++++++++++++++++++++++----
 2 files changed, 40 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c
index b823f9a6e464..896be150e28f 100644
--- a/drivers/misc/vmw_vmci/vmci_driver.c
+++ b/drivers/misc/vmw_vmci/vmci_driver.c
@@ -113,5 +113,5 @@ module_exit(vmci_drv_exit);
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface.");
-MODULE_VERSION("1.1.3.0-k");
+MODULE_VERSION("1.1.4.0-k");
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 65ac54c61c18..1bd31a38c51e 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -733,6 +733,41 @@ static inline void *vmci_event_data_payload(struct vmci_event_data *ev_data)
 	return (void *)vmci_event_data_const_payload(ev_data);
 }
 
+/*
+ * Helper to read a value from a head or tail pointer. For X86_32, the
+ * pointer is treated as a 32bit value, since the pointer value
+ * never exceeds a 32bit value in this case. Also, doing an
+ * atomic64_read on X86_32 uniprocessor systems may be implemented
+ * as a non locked cmpxchg8b, that may end up overwriting updates done
+ * by the VMCI device to the memory location. On 32bit SMP, the lock
+ * prefix will be used, so correctness isn't an issue, but using a
+ * 64bit operation still adds unnecessary overhead.
+ */
+static inline u64 vmci_q_read_pointer(atomic64_t *var)
+{
+#if defined(CONFIG_X86_32)
+	return atomic_read((atomic_t *)var);
+#else
+	return atomic64_read(var);
+#endif
+}
+
+/*
+ * Helper to set the value of a head or tail pointer. For X86_32, the
+ * pointer is treated as a 32bit value, since the pointer value
+ * never exceeds a 32bit value in this case. On 32bit SMP, using a
+ * locked cmpxchg8b adds unnecessary overhead.
+ */
+static inline void vmci_q_set_pointer(atomic64_t *var,
+				      u64 new_val)
+{
+#if defined(CONFIG_X86_32)
+	return atomic_set((atomic_t *)var, (u32)new_val);
+#else
+	return atomic64_set(var, new_val);
+#endif
+}
+
 /*
  * Helper to add a given offset to a head or tail pointer. Wraps the
  * value of the pointer around the max size of the queue.
@@ -741,14 +776,14 @@ static inline void vmci_qp_add_pointer(atomic64_t *var,
 				       size_t add,
 				       u64 size)
 {
-	u64 new_val = atomic64_read(var);
+	u64 new_val = vmci_q_read_pointer(var);
 
 	if (new_val >= size - add)
 		new_val -= size;
 
 	new_val += add;
 
-	atomic64_set(var, new_val);
+	vmci_q_set_pointer(var, new_val);
 }
 
 /*
@@ -758,7 +793,7 @@ static inline u64
 vmci_q_header_producer_tail(const struct vmci_queue_header *q_header)
 {
 	struct vmci_queue_header *qh = (struct vmci_queue_header *)q_header;
-	return atomic64_read(&qh->producer_tail);
+	return vmci_q_read_pointer(&qh->producer_tail);
 }
 
 /*
@@ -768,7 +803,7 @@ static inline u64
 vmci_q_header_consumer_head(const struct vmci_queue_header *q_header)
 {
 	struct vmci_queue_header *qh = (struct vmci_queue_header *)q_header;
-	return atomic64_read(&qh->consumer_head);
+	return vmci_q_read_pointer(&qh->consumer_head);
 }
 
 /*
-- 
cgit v1.2.3


From 52210c8745e418f82f3f0aeeee01d7bc4858812a Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Tue, 2 Feb 2016 14:14:01 -0700
Subject: coresight: implementing 'cpu_id()' API

Other than plainly parsing the device tree there is no way to
know which CPU a tracer is affined to.  As such adding an
interface to lookup the CPU field enclosed in the etm_drvdata
structure that was initialised at boot time.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight-etm3x.c | 8 ++++++++
 drivers/hwtracing/coresight/coresight-etm4x.c | 8 ++++++++
 include/linux/coresight.h                     | 3 +++
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
index 5981fcc69960..aae80e14508d 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -315,6 +315,13 @@ static void etm_enable_hw(void *info)
 	dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu);
 }
 
+static int etm_cpu_id(struct coresight_device *csdev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	return drvdata->cpu;
+}
+
 static int etm_trace_id(struct coresight_device *csdev)
 {
 	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -421,6 +428,7 @@ static void etm_disable(struct coresight_device *csdev)
 }
 
 static const struct coresight_ops_source etm_source_ops = {
+	.cpu_id		= etm_cpu_id,
 	.trace_id	= etm_trace_id,
 	.enable		= etm_enable,
 	.disable	= etm_disable,
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index 167004f9c42b..b6ae9cb6ff57 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -63,6 +63,13 @@ static bool etm4_arch_supported(u8 arch)
 	return true;
 }
 
+static int etm4_cpu_id(struct coresight_device *csdev)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	return drvdata->cpu;
+}
+
 static int etm4_trace_id(struct coresight_device *csdev)
 {
 	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -262,6 +269,7 @@ static void etm4_disable(struct coresight_device *csdev)
 }
 
 static const struct coresight_ops_source etm4_source_ops = {
+	.cpu_id		= etm4_cpu_id,
 	.trace_id	= etm4_trace_id,
 	.enable		= etm4_enable,
 	.disable	= etm4_disable,
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index a7cabfa23b55..bf62b265bf52 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -205,12 +205,15 @@ struct coresight_ops_link {
 /**
  * struct coresight_ops_source - basic operations for a source
  * Operations available for sources.
+ * @cpu_id:	returns the value of the CPU number this component
+ *		is associated to.
  * @trace_id:	returns the value of the component's trace ID as known
 		to the HW.
  * @enable:	enables tracing for a source.
  * @disable:	disables tracing for a source.
  */
 struct coresight_ops_source {
+	int (*cpu_id)(struct coresight_device *csdev);
 	int (*trace_id)(struct coresight_device *csdev);
 	int (*enable)(struct coresight_device *csdev);
 	void (*disable)(struct coresight_device *csdev);
-- 
cgit v1.2.3


From 94e1dec7a9c111419fee328cf6f082059d8bd0f8 Mon Sep 17 00:00:00 2001
From: Jiaxing Wang <hello.wjx@gmail.com>
Date: Sun, 22 Nov 2015 16:05:10 +0800
Subject: debugfs: Add stub function for debugfs_create_automount().

Add stub for debugfs_create_automount() for when debugfs is not configured
in.

Signed-off-by: Jiaxing Wang <hello.wjx@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/debugfs.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 19c066dce1da..981e53ab84e8 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -162,6 +162,14 @@ static inline struct dentry *debugfs_create_symlink(const char *name,
 	return ERR_PTR(-ENODEV);
 }
 
+static inline struct dentry *debugfs_create_automount(const char *name,
+					struct dentry *parent,
+					struct vfsmount *(*f)(void *),
+					void *data)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline void debugfs_remove(struct dentry *dentry)
 { }
 
-- 
cgit v1.2.3


From 2c92c04b6f1f06c3e9752abb02e62b6ee188109d Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 28 Dec 2015 14:39:20 +0100
Subject: memory: omap-gpmc: Add support for AAD timings

In order to support extended timings parameters on hardware supporting the
"AAD" mode like the AM335x or DM816x, add these entries into the GPMC driver
if the hardware is capable.

Tested on DM816x and AM335x.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Roger Quadros <rogerq@ti.com>
---
 drivers/memory/omap-gpmc.c | 30 ++++++++++++++++++++++++++++++
 include/linux/omap-gpmc.h  |  5 +++++
 2 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index 6515dfc2b805..21825ddce4a3 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -541,9 +541,20 @@ static void gpmc_cs_show_timings(int cs, const char *desc)
 	GPMC_GET_TICKS(GPMC_CS_CONFIG3,  0,  3, "adv-on-ns");
 	GPMC_GET_TICKS(GPMC_CS_CONFIG3,  8, 12, "adv-rd-off-ns");
 	GPMC_GET_TICKS(GPMC_CS_CONFIG3, 16, 20, "adv-wr-off-ns");
+	if (gpmc_capability & GPMC_HAS_MUX_AAD) {
+		GPMC_GET_TICKS(GPMC_CS_CONFIG3, 4, 6, "adv-aad-mux-on-ns");
+		GPMC_GET_TICKS(GPMC_CS_CONFIG3, 24, 26,
+				"adv-aad-mux-rd-off-ns");
+		GPMC_GET_TICKS(GPMC_CS_CONFIG3, 28, 30,
+				"adv-aad-mux-wr-off-ns");
+	}
 
 	GPMC_GET_TICKS(GPMC_CS_CONFIG4,  0,  3, "oe-on-ns");
 	GPMC_GET_TICKS(GPMC_CS_CONFIG4,  8, 12, "oe-off-ns");
+	if (gpmc_capability & GPMC_HAS_MUX_AAD) {
+		GPMC_GET_TICKS(GPMC_CS_CONFIG4,  4,  6, "oe-aad-mux-on-ns");
+		GPMC_GET_TICKS(GPMC_CS_CONFIG4, 13, 15, "oe-aad-mux-off-ns");
+	}
 	GPMC_GET_TICKS(GPMC_CS_CONFIG4, 16, 19, "we-on-ns");
 	GPMC_GET_TICKS(GPMC_CS_CONFIG4, 24, 28, "we-off-ns");
 
@@ -734,9 +745,18 @@ int gpmc_cs_set_timings(int cs, const struct gpmc_timings *t,
 	GPMC_SET_ONE(GPMC_CS_CONFIG3,  0,  3, adv_on);
 	GPMC_SET_ONE(GPMC_CS_CONFIG3,  8, 12, adv_rd_off);
 	GPMC_SET_ONE(GPMC_CS_CONFIG3, 16, 20, adv_wr_off);
+	if (gpmc_capability & GPMC_HAS_MUX_AAD) {
+		GPMC_SET_ONE(GPMC_CS_CONFIG3,  4,  6, adv_aad_mux_on);
+		GPMC_SET_ONE(GPMC_CS_CONFIG3, 24, 26, adv_aad_mux_rd_off);
+		GPMC_SET_ONE(GPMC_CS_CONFIG3, 28, 30, adv_aad_mux_wr_off);
+	}
 
 	GPMC_SET_ONE(GPMC_CS_CONFIG4,  0,  3, oe_on);
 	GPMC_SET_ONE(GPMC_CS_CONFIG4,  8, 12, oe_off);
+	if (gpmc_capability & GPMC_HAS_MUX_AAD) {
+		GPMC_SET_ONE(GPMC_CS_CONFIG4,  4,  6, oe_aad_mux_on);
+		GPMC_SET_ONE(GPMC_CS_CONFIG4, 13, 15, oe_aad_mux_off);
+	}
 	GPMC_SET_ONE(GPMC_CS_CONFIG4, 16, 19, we_on);
 	GPMC_SET_ONE(GPMC_CS_CONFIG4, 24, 28, we_off);
 
@@ -1722,6 +1742,12 @@ static void __maybe_unused gpmc_read_timings_dt(struct device_node *np,
 	of_property_read_u32(np, "gpmc,adv-on-ns", &gpmc_t->adv_on);
 	of_property_read_u32(np, "gpmc,adv-rd-off-ns", &gpmc_t->adv_rd_off);
 	of_property_read_u32(np, "gpmc,adv-wr-off-ns", &gpmc_t->adv_wr_off);
+	of_property_read_u32(np, "gpmc,adv-aad-mux-on-ns",
+			     &gpmc_t->adv_aad_mux_on);
+	of_property_read_u32(np, "gpmc,adv-aad-mux-rd-off-ns",
+			     &gpmc_t->adv_aad_mux_rd_off);
+	of_property_read_u32(np, "gpmc,adv-aad-mux-wr-off-ns",
+			     &gpmc_t->adv_aad_mux_wr_off);
 
 	/* WE signal timings */
 	of_property_read_u32(np, "gpmc,we-on-ns", &gpmc_t->we_on);
@@ -1730,6 +1756,10 @@ static void __maybe_unused gpmc_read_timings_dt(struct device_node *np,
 	/* OE signal timings */
 	of_property_read_u32(np, "gpmc,oe-on-ns", &gpmc_t->oe_on);
 	of_property_read_u32(np, "gpmc,oe-off-ns", &gpmc_t->oe_off);
+	of_property_read_u32(np, "gpmc,oe-aad-mux-on-ns",
+			     &gpmc_t->oe_aad_mux_on);
+	of_property_read_u32(np, "gpmc,oe-aad-mux-off-ns",
+			     &gpmc_t->oe_aad_mux_off);
 
 	/* access and cycle timings */
 	of_property_read_u32(np, "gpmc,page-burst-access-ns",
diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h
index 7dee00143afd..d833eb4dd446 100644
--- a/include/linux/omap-gpmc.h
+++ b/include/linux/omap-gpmc.h
@@ -51,6 +51,9 @@ struct gpmc_timings {
 	u32 adv_on;		/* Assertion time */
 	u32 adv_rd_off;		/* Read deassertion time */
 	u32 adv_wr_off;		/* Write deassertion time */
+	u32 adv_aad_mux_on;	/* ADV assertion time for AAD */
+	u32 adv_aad_mux_rd_off;	/* ADV read deassertion time for AAD */
+	u32 adv_aad_mux_wr_off;	/* ADV write deassertion time for AAD */
 
 	/* WE signals timings corresponding to GPMC_CONFIG4 */
 	u32 we_on;		/* WE assertion time */
@@ -59,6 +62,8 @@ struct gpmc_timings {
 	/* OE signals timings corresponding to GPMC_CONFIG4 */
 	u32 oe_on;		/* OE assertion time */
 	u32 oe_off;		/* OE deassertion time */
+	u32 oe_aad_mux_on;	/* OE assertion time for AAD */
+	u32 oe_aad_mux_off;	/* OE deassertion time for AAD */
 
 	/* Access time and cycle time timings corresponding to GPMC_CONFIG5 */
 	u32 page_burst_access;	/* Multiple access word delay */
-- 
cgit v1.2.3


From ee7683a31683c6368a66a83c555600dfd704e543 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 5 Feb 2016 17:31:39 -0800
Subject: spi: Document max_transfer_size

Fix kernel-doc warning for missing struct field notation.

..//include/linux/spi/spi.h:540: warning: No description found for parameter 'max_transfer_size'

[Meaningful subject -- broonie]

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 53be3a4c60cb..e9290c335cc4 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -303,6 +303,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @min_speed_hz: Lowest supported transfer speed
  * @max_speed_hz: Highest supported transfer speed
  * @flags: other constraints relevant to this driver
+ * @max_transfer_size: function that returns the max transfer size for
+ *	a &spi_device; may be %NULL, so the default %SIZE_MAX will be used.
  * @bus_lock_spinlock: spinlock for SPI bus locking
  * @bus_lock_mutex: mutex for SPI bus locking
  * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use
-- 
cgit v1.2.3


From b41c29b0527c7fd6a95d0f71274abb79933bf960 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 8 Feb 2016 15:38:32 +0100
Subject: Kbuild: provide a __UNIQUE_ID for clang

The default __UNIQUE_ID macro in compiler.h fails to work for some drivers:

drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c:615:1: error: redefinition of
      '__UNIQUE_ID_firmware615'
BRCMF_FW_NVRAM_DEF(4354, "brcmfmac4354-sdio.bin", "brcmfmac4354-sdio.txt");

This adds a copy of the version we use for gcc-4.3 and higher, as the same
one works with all versions of clang that I could find in svn (2.6 and higher).

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 include/linux/compiler-clang.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index d1e49d52b640..de179993e039 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -10,3 +10,8 @@
 #undef uninitialized_var
 #define uninitialized_var(x) x = *(&(x))
 #endif
+
+/* same as gcc, this was present in clang-2.6 so we can assume it works
+ * with any version that can compile the kernel
+ */
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
-- 
cgit v1.2.3


From 0118de7b4c04cba1b275d889ad14a39950f30021 Mon Sep 17 00:00:00 2001
From: Daniel Baluta <daniel.baluta@intel.com>
Date: Mon, 8 Feb 2016 17:03:15 +0200
Subject: iio: Fix documentation for iio_dev mlock

mlock *must* be used by core and drivers to protect access
to devices state changes.

Signed-off-by: Daniel Baluta <daniel.baluta@intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/iio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index b5894118755f..ce9e9c1adf34 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -448,7 +448,7 @@ struct iio_buffer_setup_ops {
  * @buffer:		[DRIVER] any buffer present
  * @buffer_list:	[INTERN] list of all buffers currently attached
  * @scan_bytes:		[INTERN] num bytes captured to be fed to buffer demux
- * @mlock:		[INTERN] lock used to prevent simultaneous device state
+ * @mlock:		[DRIVER] lock used to prevent simultaneous device state
  *			changes
  * @available_scan_masks: [DRIVER] optional array of allowed bitmasks
  * @masklength:		[INTERN] the length of the mask established from
-- 
cgit v1.2.3


From a3499e9bf0feeea593a9daae855986c685561893 Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Wed, 23 Dec 2015 17:57:19 +0530
Subject: devm: add helper devm_add_action_or_reset()

Add a helper function devm_add_action_or_reset() which will internally
call devm_add_action(). But if devm_add_action() fails then it will
execute the action mentioned and return the error code.

Signed-off-by: Sudip Mukherjee <sudip@vectorindia.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 include/linux/device.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 6d6f1fec092f..74674e098315 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -682,6 +682,18 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res);
 int devm_add_action(struct device *dev, void (*action)(void *), void *data);
 void devm_remove_action(struct device *dev, void (*action)(void *), void *data);
 
+static inline int devm_add_action_or_reset(struct device *dev,
+					   void (*action)(void *), void *data)
+{
+	int ret;
+
+	ret = devm_add_action(dev, action, data);
+	if (ret)
+		action(data);
+
+	return ret;
+}
+
 struct device_dma_parameters {
 	/*
 	 * a low level driver may set these to teach IOMMU code about
-- 
cgit v1.2.3


From 6d5bbed30f89acd2ae0d23b3fff5b13b307525d9 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Fri, 22 Jan 2016 19:06:50 +0800
Subject: dmaengine: core: expose max burst capability to clients

This patch add max_burst to dma_get_slave_caps for clients
to get the burst capability of slave dma controller.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Caesar Wang <wxt@rock-chips.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dmaengine.c   | 1 +
 include/linux/dmaengine.h | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index c50a247be2e0..0cb259c59916 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -496,6 +496,7 @@ int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
 	caps->src_addr_widths = device->src_addr_widths;
 	caps->dst_addr_widths = device->dst_addr_widths;
 	caps->directions = device->directions;
+	caps->max_burst = device->max_burst;
 	caps->residue_granularity = device->residue_granularity;
 	caps->descriptor_reuse = device->descriptor_reuse;
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 16a1cad30c33..0a9a0ba1998b 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -401,6 +401,7 @@ enum dma_residue_granularity {
  * 	since the enum dma_transfer_direction is not defined as bits for each
  * 	type of direction, the dma controller should fill (1 << <TYPE>) and same
  * 	should be checked by controller as well
+ * @max_burst: max burst capability per-transfer
  * @cmd_pause: true, if pause and thereby resume is supported
  * @cmd_terminate: true, if terminate cmd is supported
  * @residue_granularity: granularity of the reported transfer residue
@@ -411,6 +412,7 @@ struct dma_slave_caps {
 	u32 src_addr_widths;
 	u32 dst_addr_widths;
 	u32 directions;
+	u32 max_burst;
 	bool cmd_pause;
 	bool cmd_terminate;
 	enum dma_residue_granularity residue_granularity;
@@ -654,6 +656,7 @@ struct dma_filter {
  * 	the enum dma_transfer_direction is not defined as bits for
  * 	each type of direction, the dma controller should fill (1 <<
  * 	<TYPE>) and same should be checked by controller as well
+ * @max_burst: max burst capability per-transfer
  * @residue_granularity: granularity of the transfer residue reported
  *	by tx_status
  * @device_alloc_chan_resources: allocate resources and return the
@@ -712,6 +715,7 @@ struct dma_device {
 	u32 src_addr_widths;
 	u32 dst_addr_widths;
 	u32 directions;
+	u32 max_burst;
 	bool descriptor_reuse;
 	enum dma_residue_granularity residue_granularity;
 
-- 
cgit v1.2.3


From 831e9da7dc5c22fd2a5fb64e999f6e077a4338c3 Mon Sep 17 00:00:00 2001
From: Tiago Vignatti <tiago.vignatti@intel.com>
Date: Tue, 22 Dec 2015 19:36:45 -0200
Subject: dma-buf: Remove range-based flush
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch removes range-based information used for optimizations in
begin_cpu_access and end_cpu_access.

We don't have any user nor implementation using range-based flush. It seems a
consensus that if we ever want something like that again (or even more robust
using 2D, 3D sub-range regions) we can use the upcoming dma-buf sync ioctl for
such.

Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Tiago Vignatti <tiago.vignatti@intel.com>
Reviewed-by: Stéphane Marchesin <marcheu@chromium.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1450820214-12509-3-git-send-email-tiago.vignatti@intel.com
---
 Documentation/dma-buf-sharing.txt         | 19 ++++++++-----------
 drivers/dma-buf/dma-buf.c                 | 13 ++++---------
 drivers/gpu/drm/i915/i915_gem_dmabuf.c    |  2 +-
 drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c |  4 ++--
 drivers/gpu/drm/udl/udl_fb.c              |  2 --
 drivers/staging/android/ion/ion.c         |  6 ++----
 drivers/staging/android/ion/ion_test.c    |  4 ++--
 include/linux/dma-buf.h                   | 12 +++++-------
 8 files changed, 24 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
index 480c8de3c2c4..4f4a84b6903a 100644
--- a/Documentation/dma-buf-sharing.txt
+++ b/Documentation/dma-buf-sharing.txt
@@ -257,17 +257,15 @@ Access to a dma_buf from the kernel context involves three steps:
 
    Interface:
       int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
-				   size_t start, size_t len,
 				   enum dma_data_direction direction)
 
    This allows the exporter to ensure that the memory is actually available for
    cpu access - the exporter might need to allocate or swap-in and pin the
    backing storage. The exporter also needs to ensure that cpu access is
-   coherent for the given range and access direction. The range and access
-   direction can be used by the exporter to optimize the cache flushing, i.e.
-   access outside of the range or with a different direction (read instead of
-   write) might return stale or even bogus data (e.g. when the exporter needs to
-   copy the data to temporary storage).
+   coherent for the access direction. The direction can be used by the exporter
+   to optimize the cache flushing, i.e. access with a different direction (read
+   instead of write) might return stale or even bogus data (e.g. when the
+   exporter needs to copy the data to temporary storage).
 
    This step might fail, e.g. in oom conditions.
 
@@ -322,14 +320,13 @@ Access to a dma_buf from the kernel context involves three steps:
 
 3. Finish access
 
-   When the importer is done accessing the range specified in begin_cpu_access,
-   it needs to announce this to the exporter (to facilitate cache flushing and
-   unpinning of any pinned resources). The result of any dma_buf kmap calls
-   after end_cpu_access is undefined.
+   When the importer is done accessing the CPU, it needs to announce this to
+   the exporter (to facilitate cache flushing and unpinning of any pinned
+   resources). The result of any dma_buf kmap calls after end_cpu_access is
+   undefined.
 
    Interface:
       void dma_buf_end_cpu_access(struct dma_buf *dma_buf,
-				  size_t start, size_t len,
 				  enum dma_data_direction dir);
 
 
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 155c1464948e..b2ac13b4ddaa 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -539,13 +539,11 @@ EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
  * preparations. Coherency is only guaranteed in the specified range for the
  * specified access direction.
  * @dmabuf:	[in]	buffer to prepare cpu access for.
- * @start:	[in]	start of range for cpu access.
- * @len:	[in]	length of range for cpu access.
  * @direction:	[in]	length of range for cpu access.
  *
  * Can return negative error values, returns 0 on success.
  */
-int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
+int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
 			     enum dma_data_direction direction)
 {
 	int ret = 0;
@@ -554,8 +552,7 @@ int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
 		return -EINVAL;
 
 	if (dmabuf->ops->begin_cpu_access)
-		ret = dmabuf->ops->begin_cpu_access(dmabuf, start,
-							len, direction);
+		ret = dmabuf->ops->begin_cpu_access(dmabuf, direction);
 
 	return ret;
 }
@@ -567,19 +564,17 @@ EXPORT_SYMBOL_GPL(dma_buf_begin_cpu_access);
  * actions. Coherency is only guaranteed in the specified range for the
  * specified access direction.
  * @dmabuf:	[in]	buffer to complete cpu access for.
- * @start:	[in]	start of range for cpu access.
- * @len:	[in]	length of range for cpu access.
  * @direction:	[in]	length of range for cpu access.
  *
  * This call must always succeed.
  */
-void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
+void dma_buf_end_cpu_access(struct dma_buf *dmabuf,
 			    enum dma_data_direction direction)
 {
 	WARN_ON(!dmabuf);
 
 	if (dmabuf->ops->end_cpu_access)
-		dmabuf->ops->end_cpu_access(dmabuf, start, len, direction);
+		dmabuf->ops->end_cpu_access(dmabuf, direction);
 }
 EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index e9c2bfd85b52..65ab2bd54af5 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -196,7 +196,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 	return -EINVAL;
 }
 
-static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction)
+static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index 27c297672076..aebae1c2dab2 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -79,7 +79,7 @@ static void omap_gem_dmabuf_release(struct dma_buf *buffer)
 
 
 static int omap_gem_dmabuf_begin_cpu_access(struct dma_buf *buffer,
-		size_t start, size_t len, enum dma_data_direction dir)
+		enum dma_data_direction dir)
 {
 	struct drm_gem_object *obj = buffer->priv;
 	struct page **pages;
@@ -94,7 +94,7 @@ static int omap_gem_dmabuf_begin_cpu_access(struct dma_buf *buffer,
 }
 
 static void omap_gem_dmabuf_end_cpu_access(struct dma_buf *buffer,
-		size_t start, size_t len, enum dma_data_direction dir)
+		enum dma_data_direction dir)
 {
 	struct drm_gem_object *obj = buffer->priv;
 	omap_gem_put_pages(obj);
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index 200419d4d43c..c427499133d6 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -409,7 +409,6 @@ static int udl_user_framebuffer_dirty(struct drm_framebuffer *fb,
 
 	if (ufb->obj->base.import_attach) {
 		ret = dma_buf_begin_cpu_access(ufb->obj->base.import_attach->dmabuf,
-					       0, ufb->obj->base.size,
 					       DMA_FROM_DEVICE);
 		if (ret)
 			goto unlock;
@@ -425,7 +424,6 @@ static int udl_user_framebuffer_dirty(struct drm_framebuffer *fb,
 
 	if (ufb->obj->base.import_attach) {
 		dma_buf_end_cpu_access(ufb->obj->base.import_attach->dmabuf,
-				       0, ufb->obj->base.size,
 				       DMA_FROM_DEVICE);
 	}
 
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index e237e9f3312d..0754a37c9674 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -1057,8 +1057,7 @@ static void ion_dma_buf_kunmap(struct dma_buf *dmabuf, unsigned long offset,
 {
 }
 
-static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start,
-					size_t len,
+static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
 					enum dma_data_direction direction)
 {
 	struct ion_buffer *buffer = dmabuf->priv;
@@ -1076,8 +1075,7 @@ static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start,
 	return PTR_ERR_OR_ZERO(vaddr);
 }
 
-static void ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t start,
-				       size_t len,
+static void ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
 				       enum dma_data_direction direction)
 {
 	struct ion_buffer *buffer = dmabuf->priv;
diff --git a/drivers/staging/android/ion/ion_test.c b/drivers/staging/android/ion/ion_test.c
index b8dcf5a26cc4..da34bc12cd7c 100644
--- a/drivers/staging/android/ion/ion_test.c
+++ b/drivers/staging/android/ion/ion_test.c
@@ -109,7 +109,7 @@ static int ion_handle_test_kernel(struct dma_buf *dma_buf, void __user *ptr,
 	if (offset > dma_buf->size || size > dma_buf->size - offset)
 		return -EINVAL;
 
-	ret = dma_buf_begin_cpu_access(dma_buf, offset, size, dir);
+	ret = dma_buf_begin_cpu_access(dma_buf, dir);
 	if (ret)
 		return ret;
 
@@ -139,7 +139,7 @@ static int ion_handle_test_kernel(struct dma_buf *dma_buf, void __user *ptr,
 		copy_offset = 0;
 	}
 err:
-	dma_buf_end_cpu_access(dma_buf, offset, size, dir);
+	dma_buf_end_cpu_access(dma_buf, dir);
 	return ret;
 }
 
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index f98bd7068d55..532108ea0c1c 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -54,7 +54,7 @@ struct dma_buf_attachment;
  * @release: release this buffer; to be called after the last dma_buf_put.
  * @begin_cpu_access: [optional] called before cpu access to invalidate cpu
  * 		      caches and allocate backing storage (if not yet done)
- * 		      respectively pin the objet into memory.
+ * 		      respectively pin the object into memory.
  * @end_cpu_access: [optional] called after cpu access to flush caches.
  * @kmap_atomic: maps a page from the buffer into kernel address
  * 		 space, users may not block until the subsequent unmap call.
@@ -93,10 +93,8 @@ struct dma_buf_ops {
 	/* after final dma_buf_put() */
 	void (*release)(struct dma_buf *);
 
-	int (*begin_cpu_access)(struct dma_buf *, size_t, size_t,
-				enum dma_data_direction);
-	void (*end_cpu_access)(struct dma_buf *, size_t, size_t,
-			       enum dma_data_direction);
+	int (*begin_cpu_access)(struct dma_buf *, enum dma_data_direction);
+	void (*end_cpu_access)(struct dma_buf *, enum dma_data_direction);
 	void *(*kmap_atomic)(struct dma_buf *, unsigned long);
 	void (*kunmap_atomic)(struct dma_buf *, unsigned long, void *);
 	void *(*kmap)(struct dma_buf *, unsigned long);
@@ -224,9 +222,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
 					enum dma_data_direction);
 void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
 				enum dma_data_direction);
-int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t len,
+int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 			     enum dma_data_direction dir);
-void dma_buf_end_cpu_access(struct dma_buf *dma_buf, size_t start, size_t len,
+void dma_buf_end_cpu_access(struct dma_buf *dma_buf,
 			    enum dma_data_direction dir);
 void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long);
 void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *);
-- 
cgit v1.2.3


From ff2b1359229927563addbf2f5ad480660c350903 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 20 Oct 2015 11:10:38 +0200
Subject: gpio: make the gpiochip a real device

GPIO chips have been around for years, but were never real devices,
instead they were piggy-backing on a parent device (such as a
platform_device or amba_device) but this was always optional.
GPIO chips could also exist without any device at all, with its
struct device *parent (ex *dev) pointer being set to null.

When sysfs was in use, a mock device would be created, with the
optional parent assigned, or just floating orphaned with NULL
as parent.

If sysfs is active, it will use this device as parent.

We now create a gpio_device struct containing a real
struct device and move the subsystem over to using that. The
list of struct gpio_chip:s is augmented to hold struct
gpio_device:s and we find gpio_chips:s by first looking up
the struct gpio_device.

The struct gpio_device is designed to stay around even if the
gpio_chip is removed, so as to satisfy users in userspace
that need a backing data structure to hold the state of the
session initiated with e.g. a character device even if there is
no physical chip anymore.

From this point on, gpiochips are devices.

Cc: Johan Hovold <johan@kernel.org>
Cc: Michael Welling <mwelling@ieee.org>
Cc: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-sysfs.c |  12 ++-
 drivers/gpio/gpiolib.c       | 232 ++++++++++++++++++++++++++++++-------------
 drivers/gpio/gpiolib.h       |  27 ++++-
 include/linux/gpio/driver.h  |   7 +-
 4 files changed, 201 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
index 405dfcaadc4c..28d3bf2328aa 100644
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -547,6 +547,7 @@ static struct class gpio_class = {
 int gpiod_export(struct gpio_desc *desc, bool direction_may_change)
 {
 	struct gpio_chip	*chip;
+	struct gpio_device	*gdev;
 	struct gpiod_data	*data;
 	unsigned long		flags;
 	int			status;
@@ -566,6 +567,7 @@ int gpiod_export(struct gpio_desc *desc, bool direction_may_change)
 	}
 
 	chip = desc->chip;
+	gdev = chip->gpiodev;
 
 	mutex_lock(&sysfs_lock);
 
@@ -605,7 +607,7 @@ int gpiod_export(struct gpio_desc *desc, bool direction_may_change)
 	if (chip->names && chip->names[offset])
 		ioname = chip->names[offset];
 
-	dev = device_create_with_groups(&gpio_class, chip->parent,
+	dev = device_create_with_groups(&gpio_class, &gdev->dev,
 					MKDEV(0, 0), data, gpio_groups,
 					ioname ? ioname : "gpio%u",
 					desc_to_gpio(desc));
@@ -771,7 +773,7 @@ static int __init gpiolib_sysfs_init(void)
 {
 	int		status;
 	unsigned long	flags;
-	struct gpio_chip *chip;
+	struct gpio_device *gdev;
 
 	status = class_register(&gpio_class);
 	if (status < 0)
@@ -784,8 +786,8 @@ static int __init gpiolib_sysfs_init(void)
 	 * registered, and so arch_initcall() can always gpio_export().
 	 */
 	spin_lock_irqsave(&gpio_lock, flags);
-	list_for_each_entry(chip, &gpio_chips, list) {
-		if (chip->cdev)
+	list_for_each_entry(gdev, &gpio_devices, list) {
+		if (gdev->chip->cdev)
 			continue;
 
 		/*
@@ -798,7 +800,7 @@ static int __init gpiolib_sysfs_init(void)
 		 * gpio_lock prevents us from doing this.
 		 */
 		spin_unlock_irqrestore(&gpio_lock, flags);
-		status = gpiochip_sysfs_register(chip);
+		status = gpiochip_sysfs_register(gdev->chip);
 		spin_lock_irqsave(&gpio_lock, flags);
 	}
 	spin_unlock_irqrestore(&gpio_lock, flags);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 5c1ba879f889..3a073ab5e863 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -16,6 +16,7 @@
 #include <linux/gpio/driver.h>
 #include <linux/gpio/machine.h>
 #include <linux/pinctrl/consumer.h>
+#include <linux/idr.h>
 
 #include "gpiolib.h"
 
@@ -42,6 +43,9 @@
 #define	extra_checks	0
 #endif
 
+/* Device and char device-related information */
+static DEFINE_IDA(gpio_ida);
+
 /* gpio_lock prevents conflicts during gpio_desc[] table updates.
  * While any GPIO is requested, its gpio_chip is not removable;
  * each GPIO's "requested" flag serves as a lock and refcount.
@@ -50,8 +54,7 @@ DEFINE_SPINLOCK(gpio_lock);
 
 static DEFINE_MUTEX(gpio_lookup_lock);
 static LIST_HEAD(gpio_lookup_list);
-LIST_HEAD(gpio_chips);
-
+LIST_HEAD(gpio_devices);
 
 static void gpiochip_free_hogs(struct gpio_chip *chip);
 static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
@@ -67,15 +70,16 @@ static inline void desc_set_label(struct gpio_desc *d, const char *label)
  */
 struct gpio_desc *gpio_to_desc(unsigned gpio)
 {
-	struct gpio_chip *chip;
+	struct gpio_device *gdev;
 	unsigned long flags;
 
 	spin_lock_irqsave(&gpio_lock, flags);
 
-	list_for_each_entry(chip, &gpio_chips, list) {
-		if (chip->base <= gpio && chip->base + chip->ngpio > gpio) {
+	list_for_each_entry(gdev, &gpio_devices, list) {
+		if (gdev->chip->base <= gpio &&
+		    gdev->chip->base + gdev->chip->ngpio > gpio) {
 			spin_unlock_irqrestore(&gpio_lock, flags);
-			return &chip->desc[gpio - chip->base];
+			return &gdev->chip->desc[gpio - gdev->chip->base];
 		}
 	}
 
@@ -125,16 +129,16 @@ EXPORT_SYMBOL_GPL(gpiod_to_chip);
 /* dynamic allocation of GPIOs, e.g. on a hotplugged device */
 static int gpiochip_find_base(int ngpio)
 {
-	struct gpio_chip *chip;
+	struct gpio_device *gdev;
 	int base = ARCH_NR_GPIOS - ngpio;
 
-	list_for_each_entry_reverse(chip, &gpio_chips, list) {
+	list_for_each_entry_reverse(gdev, &gpio_devices, list) {
 		/* found a free space? */
-		if (chip->base + chip->ngpio <= base)
+		if (gdev->chip->base + gdev->chip->ngpio <= base)
 			break;
 		else
 			/* nope, check the space right before the chip */
-			base = chip->base - ngpio;
+			base = gdev->chip->base - ngpio;
 	}
 
 	if (gpio_is_valid(base)) {
@@ -187,18 +191,28 @@ EXPORT_SYMBOL_GPL(gpiod_get_direction);
  * Return -EBUSY if the new chip overlaps with some other chip's integer
  * space.
  */
-static int gpiochip_add_to_list(struct gpio_chip *chip)
+static int gpiodev_add_to_list(struct gpio_device *gdev)
 {
-	struct gpio_chip *iterator;
-	struct gpio_chip *previous = NULL;
+	struct gpio_device *iterator;
+	struct gpio_device *previous = NULL;
+
+	if (!gdev->chip)
+		return -EINVAL;
 
-	if (list_empty(&gpio_chips)) {
-		list_add_tail(&chip->list, &gpio_chips);
+	if (list_empty(&gpio_devices)) {
+		list_add_tail(&gdev->list, &gpio_devices);
 		return 0;
 	}
 
-	list_for_each_entry(iterator, &gpio_chips, list) {
-		if (iterator->base >= chip->base + chip->ngpio) {
+	list_for_each_entry(iterator, &gpio_devices, list) {
+		/*
+		 * The list may contain dangling GPIO devices with no
+		 * live chip assigned.
+		 */
+		if (!iterator->chip)
+			continue;
+		if (iterator->chip->base >=
+		    gdev->chip->base + gdev->chip->ngpio) {
 			/*
 			 * Iterator is the first GPIO chip so there is no
 			 * previous one
@@ -211,8 +225,8 @@ static int gpiochip_add_to_list(struct gpio_chip *chip)
 				 * [base, base + ngpio - 1]) between previous
 				 * and iterator chip.
 				 */
-				if (previous->base + previous->ngpio
-						<= chip->base)
+				if (previous->chip->base + previous->chip->ngpio
+						<= gdev->chip->base)
 					goto found;
 			}
 		}
@@ -225,18 +239,18 @@ static int gpiochip_add_to_list(struct gpio_chip *chip)
 	 * Let iterator point to the last chip in the list.
 	 */
 
-	iterator = list_last_entry(&gpio_chips, struct gpio_chip, list);
-	if (iterator->base + iterator->ngpio <= chip->base) {
-		list_add(&chip->list, &iterator->list);
+	iterator = list_last_entry(&gpio_devices, struct gpio_device, list);
+	if (iterator->chip->base + iterator->chip->ngpio <= gdev->chip->base) {
+		list_add(&gdev->list, &iterator->list);
 		return 0;
 	}
 
-	dev_err(chip->parent,
+	dev_err(&gdev->dev,
 	       "GPIO integer space overlap, cannot add chip\n");
 	return -EBUSY;
 
 found:
-	list_add_tail(&chip->list, &iterator->list);
+	list_add_tail(&gdev->list, &iterator->list);
 	return 0;
 }
 
@@ -245,16 +259,16 @@ found:
  */
 static struct gpio_desc *gpio_name_to_desc(const char * const name)
 {
-	struct gpio_chip *chip;
+	struct gpio_device *gdev;
 	unsigned long flags;
 
 	spin_lock_irqsave(&gpio_lock, flags);
 
-	list_for_each_entry(chip, &gpio_chips, list) {
+	list_for_each_entry(gdev, &gpio_devices, list) {
 		int i;
 
-		for (i = 0; i != chip->ngpio; ++i) {
-			struct gpio_desc *gpio = &chip->desc[i];
+		for (i = 0; i != gdev->chip->ngpio; ++i) {
+			struct gpio_desc *gpio = &gdev->chip->desc[i];
 
 			if (!gpio->name || !name)
 				continue;
@@ -302,6 +316,14 @@ static int gpiochip_set_desc_names(struct gpio_chip *gc)
 	return 0;
 }
 
+static void gpiodevice_release(struct device *dev)
+{
+	struct gpio_device *gdev = dev_get_drvdata(dev);
+
+	list_del(&gdev->list);
+	ida_simple_remove(&gpio_ida, gdev->id);
+}
+
 /**
  * gpiochip_add_data() - register a gpio_chip
  * @chip: the chip to register, with chip->base initialized
@@ -323,19 +345,60 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 {
 	unsigned long	flags;
 	int		status = 0;
-	unsigned	id;
+	unsigned	i;
 	int		base = chip->base;
 	struct gpio_desc *descs;
+	struct gpio_device *gdev;
 
-	descs = kcalloc(chip->ngpio, sizeof(descs[0]), GFP_KERNEL);
-	if (!descs)
+	/*
+	 * First: allocate and populate the internal stat container, and
+	 * set up the struct device.
+	 */
+	gdev = kmalloc(sizeof(*gdev), GFP_KERNEL);
+	if (!gdev)
 		return -ENOMEM;
+	gdev->chip = chip;
+	chip->gpiodev = gdev;
+	if (chip->parent) {
+		gdev->dev.parent = chip->parent;
+		gdev->dev.of_node = chip->parent->of_node;
+	} else {
+#ifdef CONFIG_OF_GPIO
+	/* If the gpiochip has an assigned OF node this takes precedence */
+		if (chip->of_node)
+			gdev->dev.of_node = chip->of_node;
+#endif
+	}
+	gdev->id = ida_simple_get(&gpio_ida, 0, 0, GFP_KERNEL);
+	if (gdev->id < 0) {
+		status = gdev->id;
+		goto err_free_gdev;
+	}
+	dev_set_name(&gdev->dev, "gpiochip%d", gdev->id);
+	device_initialize(&gdev->dev);
+	dev_set_drvdata(&gdev->dev, gdev);
+	if (chip->parent && chip->parent->driver)
+		gdev->owner = chip->parent->driver->owner;
+	else if (chip->owner)
+		/* TODO: remove chip->owner */
+		gdev->owner = chip->owner;
+	else
+		gdev->owner = THIS_MODULE;
 
+	/* FIXME: devm_kcalloc() these and move to gpio_device */
+	descs = kcalloc(chip->ngpio, sizeof(descs[0]), GFP_KERNEL);
+	if (!descs) {
+		status = -ENOMEM;
+		goto err_free_gdev;
+	}
+
+	/* FIXME: move driver data into gpio_device dev_set_drvdata() */
 	chip->data = data;
 
 	if (chip->ngpio == 0) {
 		chip_err(chip, "tried to insert a GPIO chip with zero lines\n");
-		return -EINVAL;
+		status = -EINVAL;
+		goto err_free_descs;
 	}
 
 	spin_lock_irqsave(&gpio_lock, flags);
@@ -350,15 +413,16 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 		chip->base = base;
 	}
 
-	status = gpiochip_add_to_list(chip);
+	status = gpiodev_add_to_list(gdev);
 	if (status) {
 		spin_unlock_irqrestore(&gpio_lock, flags);
 		goto err_free_descs;
 	}
 
-	for (id = 0; id < chip->ngpio; id++) {
-		struct gpio_desc *desc = &descs[id];
+	for (i = 0; i < chip->ngpio; i++) {
+		struct gpio_desc *desc = &descs[i];
 
+		/* REVISIT: maybe a pointer to gpio_device is better */
 		desc->chip = chip;
 
 		/* REVISIT: most hardware initializes GPIOs as inputs (often
@@ -369,18 +433,15 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 		 */
 		desc->flags = !chip->direction_input ? (1 << FLAG_IS_OUT) : 0;
 	}
-
 	chip->desc = descs;
 
 	spin_unlock_irqrestore(&gpio_lock, flags);
 
 #ifdef CONFIG_PINCTRL
+	/* FIXME: move pin ranges to gpio_device */
 	INIT_LIST_HEAD(&chip->pin_ranges);
 #endif
 
-	if (!chip->owner && chip->parent && chip->parent->driver)
-		chip->owner = chip->parent->driver->owner;
-
 	status = gpiochip_set_desc_names(chip);
 	if (status)
 		goto err_remove_from_list;
@@ -391,28 +452,39 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 
 	acpi_gpiochip_add(chip);
 
-	status = gpiochip_sysfs_register(chip);
+	status = device_add(&gdev->dev);
 	if (status)
 		goto err_remove_chip;
 
+	status = gpiochip_sysfs_register(chip);
+	if (status)
+		goto err_remove_device;
+
+	/* From this point, the .release() function cleans up gpio_device */
+	gdev->dev.release = gpiodevice_release;
+	get_device(&gdev->dev);
 	pr_debug("%s: registered GPIOs %d to %d on device: %s\n", __func__,
 		chip->base, chip->base + chip->ngpio - 1,
 		chip->label ? : "generic");
 
 	return 0;
 
+err_remove_device:
+	device_del(&gdev->dev);
 err_remove_chip:
 	acpi_gpiochip_remove(chip);
 	gpiochip_free_hogs(chip);
 	of_gpiochip_remove(chip);
 err_remove_from_list:
 	spin_lock_irqsave(&gpio_lock, flags);
-	list_del(&chip->list);
+	list_del(&gdev->list);
 	spin_unlock_irqrestore(&gpio_lock, flags);
 	chip->desc = NULL;
 err_free_descs:
 	kfree(descs);
-
+err_free_gdev:
+	ida_simple_remove(&gpio_ida, gdev->id);
+	kfree(gdev);
 	/* failures here can mean systems won't boot... */
 	pr_err("%s: GPIOs %d..%d (%s) failed to register\n", __func__,
 		chip->base, chip->base + chip->ngpio - 1,
@@ -429,15 +501,18 @@ EXPORT_SYMBOL_GPL(gpiochip_add_data);
  */
 void gpiochip_remove(struct gpio_chip *chip)
 {
+	struct gpio_device *gdev = chip->gpiodev;
 	struct gpio_desc *desc;
 	unsigned long	flags;
 	unsigned	id;
 	bool		requested = false;
 
-	gpiochip_sysfs_unregister(chip);
+	/* Numb the device, cancelling all outstanding operations */
+	gdev->chip = NULL;
 
+	/* FIXME: should the legacy sysfs handling be moved to gpio_device? */
+	gpiochip_sysfs_unregister(chip);
 	gpiochip_irqchip_remove(chip);
-
 	acpi_gpiochip_remove(chip);
 	gpiochip_remove_pin_ranges(chip);
 	gpiochip_free_hogs(chip);
@@ -450,15 +525,23 @@ void gpiochip_remove(struct gpio_chip *chip)
 		if (test_bit(FLAG_REQUESTED, &desc->flags))
 			requested = true;
 	}
-	list_del(&chip->list);
 	spin_unlock_irqrestore(&gpio_lock, flags);
 
 	if (requested)
 		dev_crit(chip->parent,
 			 "REMOVING GPIOCHIP WITH GPIOS STILL REQUESTED\n");
 
+	/* FIXME: need to be moved to gpio_device and held there */
 	kfree(chip->desc);
 	chip->desc = NULL;
+
+	/*
+	 * The gpiochip side puts its use of the device to rest here:
+	 * if there are no userspace clients, the chardev and device will
+	 * be removed, else it will be dangling until the last user is
+	 * gone.
+	 */
+	put_device(&gdev->dev);
 }
 EXPORT_SYMBOL_GPL(gpiochip_remove);
 
@@ -477,17 +560,21 @@ struct gpio_chip *gpiochip_find(void *data,
 				int (*match)(struct gpio_chip *chip,
 					     void *data))
 {
+	struct gpio_device *gdev;
 	struct gpio_chip *chip;
 	unsigned long flags;
 
 	spin_lock_irqsave(&gpio_lock, flags);
-	list_for_each_entry(chip, &gpio_chips, list)
-		if (match(chip, data))
+	list_for_each_entry(gdev, &gpio_devices, list)
+		if (match(gdev->chip, data))
 			break;
 
 	/* No match? */
-	if (&chip->list == &gpio_chips)
+	if (&gdev->list == &gpio_devices)
 		chip = NULL;
+	else
+		chip = gdev->chip;
+
 	spin_unlock_irqrestore(&gpio_lock, flags);
 
 	return chip;
@@ -617,14 +704,14 @@ static int gpiochip_irq_reqres(struct irq_data *d)
 {
 	struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
 
-	if (!try_module_get(chip->owner))
+	if (!try_module_get(chip->gpiodev->owner))
 		return -ENODEV;
 
 	if (gpiochip_lock_as_irq(chip, d->hwirq)) {
 		chip_err(chip,
 			"unable to lock HW IRQ %lu for IRQ\n",
 			d->hwirq);
-		module_put(chip->owner);
+		module_put(chip->gpiodev->owner);
 		return -EINVAL;
 	}
 	return 0;
@@ -635,7 +722,7 @@ static void gpiochip_irq_relres(struct irq_data *d)
 	struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
 
 	gpiochip_unlock_as_irq(chip, d->hwirq);
-	module_put(chip->owner);
+	module_put(chip->gpiodev->owner);
 }
 
 static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset)
@@ -985,10 +1072,10 @@ int gpiod_request(struct gpio_desc *desc, const char *label)
 	if (!chip)
 		goto done;
 
-	if (try_module_get(chip->owner)) {
+	if (try_module_get(chip->gpiodev->owner)) {
 		status = __gpiod_request(desc, label);
 		if (status < 0)
-			module_put(chip->owner);
+			module_put(chip->gpiodev->owner);
 	}
 
 done:
@@ -1034,7 +1121,7 @@ static bool __gpiod_free(struct gpio_desc *desc)
 void gpiod_free(struct gpio_desc *desc)
 {
 	if (desc && __gpiod_free(desc))
-		module_put(desc->chip->owner);
+		module_put(desc->chip->gpiodev->owner);
 	else
 		WARN_ON(extra_checks);
 }
@@ -2492,16 +2579,16 @@ static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 static void *gpiolib_seq_start(struct seq_file *s, loff_t *pos)
 {
 	unsigned long flags;
-	struct gpio_chip *chip = NULL;
+	struct gpio_device *gdev = NULL;
 	loff_t index = *pos;
 
 	s->private = "";
 
 	spin_lock_irqsave(&gpio_lock, flags);
-	list_for_each_entry(chip, &gpio_chips, list)
+	list_for_each_entry(gdev, &gpio_devices, list)
 		if (index-- == 0) {
 			spin_unlock_irqrestore(&gpio_lock, flags);
-			return chip;
+			return gdev;
 		}
 	spin_unlock_irqrestore(&gpio_lock, flags);
 
@@ -2511,14 +2598,14 @@ static void *gpiolib_seq_start(struct seq_file *s, loff_t *pos)
 static void *gpiolib_seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
 	unsigned long flags;
-	struct gpio_chip *chip = v;
+	struct gpio_device *gdev = v;
 	void *ret = NULL;
 
 	spin_lock_irqsave(&gpio_lock, flags);
-	if (list_is_last(&chip->list, &gpio_chips))
+	if (list_is_last(&gdev->list, &gpio_devices))
 		ret = NULL;
 	else
-		ret = list_entry(chip->list.next, struct gpio_chip, list);
+		ret = list_entry(gdev->list.next, struct gpio_device, list);
 	spin_unlock_irqrestore(&gpio_lock, flags);
 
 	s->private = "\n";
@@ -2533,15 +2620,24 @@ static void gpiolib_seq_stop(struct seq_file *s, void *v)
 
 static int gpiolib_seq_show(struct seq_file *s, void *v)
 {
-	struct gpio_chip *chip = v;
-	struct device *dev;
+	struct gpio_device *gdev = v;
+	struct gpio_chip *chip = gdev->chip;
+	struct device *parent;
+
+	if (!chip) {
+		seq_printf(s, "%s%s: (dangling chip)", (char *)s->private,
+			   dev_name(&gdev->dev));
+		return 0;
+	}
 
-	seq_printf(s, "%sGPIOs %d-%d", (char *)s->private,
-			chip->base, chip->base + chip->ngpio - 1);
-	dev = chip->parent;
-	if (dev)
-		seq_printf(s, ", %s/%s", dev->bus ? dev->bus->name : "no-bus",
-			dev_name(dev));
+	seq_printf(s, "%s%s: GPIOs %d-%d", (char *)s->private,
+		   dev_name(&gdev->dev),
+		   chip->base, chip->base + chip->ngpio - 1);
+	parent = chip->parent;
+	if (parent)
+		seq_printf(s, ", parent: %s/%s",
+			   parent->bus ? parent->bus->name : "no-bus",
+			   dev_name(parent));
 	if (chip->label)
 		seq_printf(s, ", %s", chip->label);
 	if (chip->can_sleep)
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 99ed3b00ffe9..feea2c823e47 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -12,13 +12,38 @@
 #ifndef GPIOLIB_H
 #define GPIOLIB_H
 
+#include <linux/gpio/driver.h>
 #include <linux/err.h>
 #include <linux/device.h>
+#include <linux/module.h>
+#include <linux/cdev.h>
 
 enum of_gpio_flags;
 enum gpiod_flags;
 struct acpi_device;
 
+/**
+ * struct gpio_device - internal state container for GPIO devices
+ * @id: numerical ID number for the GPIO chip
+ * @dev: the GPIO device struct
+ * @owner: helps prevent removal of modules exporting active GPIOs
+ * @chip: pointer to the corresponding gpiochip, holding static
+ * data for this device
+ * @list: links gpio_device:s together for traversal
+ *
+ * This state container holds most of the runtime variable data
+ * for a GPIO device and can hold references and live on after the
+ * GPIO chip has been removed, if it is still being used from
+ * userspace.
+ */
+struct gpio_device {
+	int			id;
+	struct device		dev;
+	struct module		*owner;
+	struct gpio_chip	*chip;
+	struct list_head        list;
+};
+
 /**
  * struct acpi_gpio_info - ACPI GPIO specific information
  * @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo
@@ -90,7 +115,7 @@ struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
 struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip, u16 hwnum);
 
 extern struct spinlock gpio_lock;
-extern struct list_head gpio_chips;
+extern struct list_head gpio_devices;
 
 struct gpio_desc {
 	struct gpio_chip	*chip;
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 82fda487453f..f3f1dbd43c9b 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -1,6 +1,7 @@
 #ifndef __LINUX_GPIO_DRIVER_H
 #define __LINUX_GPIO_DRIVER_H
 
+#include <linux/device.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/irq.h>
@@ -10,22 +11,22 @@
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/kconfig.h>
 
-struct device;
 struct gpio_desc;
 struct of_phandle_args;
 struct device_node;
 struct seq_file;
+struct gpio_device;
 
 #ifdef CONFIG_GPIOLIB
 
 /**
  * struct gpio_chip - abstract a GPIO controller
  * @label: for diagnostics
+ * @gpiodev: the internal state holder, opaque struct
  * @parent: optional parent device providing the GPIOs
  * @cdev: class device used by sysfs interface (may be NULL)
  * @owner: helps prevent removal of modules exporting active GPIOs
  * @data: per-instance data assigned by the driver
- * @list: links gpio_chips together for traversal
  * @request: optional hook for chip-specific activation, such as
  *	enabling module power and clock; may sleep
  * @free: optional hook for chip-specific deactivation, such as
@@ -107,11 +108,11 @@ struct seq_file;
  */
 struct gpio_chip {
 	const char		*label;
+	struct gpio_device	*gpiodev;
 	struct device		*parent;
 	struct device		*cdev;
 	struct module		*owner;
 	void			*data;
-	struct list_head        list;
 
 	int			(*request)(struct gpio_chip *chip,
 						unsigned offset);
-- 
cgit v1.2.3


From 156d7d4120e1c860fde667fc30eeae84bc3e7a25 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Mon, 11 Jan 2016 20:09:20 +0100
Subject: vga_switcheroo: Add handler flags infrastructure

Allow handlers to declare their capabilities and allow clients to
obtain that information. So far we have these use cases:

* If the handler is able to switch DDC separately, clients need to
  probe EDID with drm_get_edid_switcheroo(). We should allow them
  to detect a capable handler to ensure this function only gets
  called when needed.

* Likewise if the handler is unable to switch AUX separately, the active
  client needs to communicate link training parameters to the inactive
  client, which may then skip the AUX handshake and set up its output
  with these pre-calibrated values (DisplayPort specification v1.1a,
  section 2.5.3.3). Clients need a way to recognize such a situation.

The flags for the radeon_atpx_handler and amdgpu_atpx_handler are
initially set to 0, this can later on be amended with
  handler_flags |= VGA_SWITCHEROO_CAN_SWITCH_DDC;
when a ->switch_ddc callback is added.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=88861
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61115
Tested-by: Lukas Wunner <lukas@wunner.de>
    [MBP  9,1 2012  intel IVB + nvidia GK107  pre-retina  15"]
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/2b0d93ed6e511ca09e95e45e0b35627f330fabce.1452525860.git.lukas@wunner.de
---
 Documentation/DocBook/gpu.tmpl                   |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c |  3 ++-
 drivers/gpu/drm/nouveau/nouveau_acpi.c           |  2 +-
 drivers/gpu/drm/radeon/radeon_atpx_handler.c     |  3 ++-
 drivers/gpu/vga/vga_switcheroo.c                 | 22 ++++++++++++++++++-
 drivers/platform/x86/apple-gmux.c                |  2 +-
 include/linux/vga_switcheroo.h                   | 28 ++++++++++++++++++++++--
 7 files changed, 54 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/gpu.tmpl b/Documentation/DocBook/gpu.tmpl
index 49c97913c5ae..d6579d8c1341 100644
--- a/Documentation/DocBook/gpu.tmpl
+++ b/Documentation/DocBook/gpu.tmpl
@@ -3422,6 +3422,7 @@ int num_ioctls;</synopsis>
     </sect1>
     <sect1>
       <title>Public constants</title>
+!Finclude/linux/vga_switcheroo.h vga_switcheroo_handler_flags_t
 !Finclude/linux/vga_switcheroo.h vga_switcheroo_client_id
 !Finclude/linux/vga_switcheroo.h vga_switcheroo_state
     </sect1>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 3c895863fcf5..fa948dcbdd5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -552,13 +552,14 @@ static bool amdgpu_atpx_detect(void)
 void amdgpu_register_atpx_handler(void)
 {
 	bool r;
+	enum vga_switcheroo_handler_flags_t handler_flags = 0;
 
 	/* detect if we have any ATPX + 2 VGA in the system */
 	r = amdgpu_atpx_detect();
 	if (!r)
 		return;
 
-	vga_switcheroo_register_handler(&amdgpu_atpx_handler);
+	vga_switcheroo_register_handler(&amdgpu_atpx_handler, handler_flags);
 }
 
 /**
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index d5e6938cc6bc..cdf522770cfa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -314,7 +314,7 @@ void nouveau_register_dsm_handler(void)
 	if (!r)
 		return;
 
-	vga_switcheroo_register_handler(&nouveau_dsm_handler);
+	vga_switcheroo_register_handler(&nouveau_dsm_handler, 0);
 }
 
 /* Must be called for Optimus models before the card can be turned off */
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index c4b4f298a283..56482e35d43e 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -551,13 +551,14 @@ static bool radeon_atpx_detect(void)
 void radeon_register_atpx_handler(void)
 {
 	bool r;
+	enum vga_switcheroo_handler_flags_t handler_flags = 0;
 
 	/* detect if we have any ATPX + 2 VGA in the system */
 	r = radeon_atpx_detect();
 	if (!r)
 		return;
 
-	vga_switcheroo_register_handler(&radeon_atpx_handler);
+	vga_switcheroo_register_handler(&radeon_atpx_handler, handler_flags);
 }
 
 /**
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 665ab9fd0e01..e89f6ad5d83e 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -126,6 +126,7 @@ static DEFINE_MUTEX(vgasr_mutex);
  * 	(counting only vga clients, not audio clients)
  * @clients: list of registered clients
  * @handler: registered handler
+ * @handler_flags: flags of registered handler
  *
  * vga_switcheroo private data. Currently only one vga_switcheroo instance
  * per system is supported.
@@ -142,6 +143,7 @@ struct vgasr_priv {
 	struct list_head clients;
 
 	const struct vga_switcheroo_handler *handler;
+	enum vga_switcheroo_handler_flags_t handler_flags;
 };
 
 #define ID_BIT_AUDIO		0x100
@@ -190,13 +192,15 @@ static void vga_switcheroo_enable(void)
 /**
  * vga_switcheroo_register_handler() - register handler
  * @handler: handler callbacks
+ * @handler_flags: handler flags
  *
  * Register handler. Enable vga_switcheroo if two vga clients have already
  * registered.
  *
  * Return: 0 on success, -EINVAL if a handler was already registered.
  */
-int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler)
+int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler,
+				    enum vga_switcheroo_handler_flags_t handler_flags)
 {
 	mutex_lock(&vgasr_mutex);
 	if (vgasr_priv.handler) {
@@ -205,6 +209,7 @@ int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler
 	}
 
 	vgasr_priv.handler = handler;
+	vgasr_priv.handler_flags = handler_flags;
 	if (vga_switcheroo_ready()) {
 		pr_info("enabled\n");
 		vga_switcheroo_enable();
@@ -222,6 +227,7 @@ EXPORT_SYMBOL(vga_switcheroo_register_handler);
 void vga_switcheroo_unregister_handler(void)
 {
 	mutex_lock(&vgasr_mutex);
+	vgasr_priv.handler_flags = 0;
 	vgasr_priv.handler = NULL;
 	if (vgasr_priv.active) {
 		pr_info("disabled\n");
@@ -232,6 +238,20 @@ void vga_switcheroo_unregister_handler(void)
 }
 EXPORT_SYMBOL(vga_switcheroo_unregister_handler);
 
+/**
+ * vga_switcheroo_handler_flags() - obtain handler flags
+ *
+ * Helper for clients to obtain the handler flags bitmask.
+ *
+ * Return: Handler flags. A value of 0 means that no handler is registered
+ * or that the handler has no special capabilities.
+ */
+enum vga_switcheroo_handler_flags_t vga_switcheroo_handler_flags(void)
+{
+	return vgasr_priv.handler_flags;
+}
+EXPORT_SYMBOL(vga_switcheroo_handler_flags);
+
 static int register_client(struct pci_dev *pdev,
 			   const struct vga_switcheroo_client_ops *ops,
 			   enum vga_switcheroo_client_id id, bool active,
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index f236250ac106..c401d4936b65 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -705,7 +705,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
 	init_completion(&gmux_data->powerchange_done);
 	gmux_enable_interrupts(gmux_data);
 
-	if (vga_switcheroo_register_handler(&gmux_handler)) {
+	if (vga_switcheroo_register_handler(&gmux_handler, 0)) {
 		ret = -ENODEV;
 		goto err_register_handler;
 	}
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index 69e1d4a1f1b3..a745f4f0f729 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -35,6 +35,26 @@
 
 struct pci_dev;
 
+/**
+ * enum vga_switcheroo_handler_flags_t - handler flags bitmask
+ * @VGA_SWITCHEROO_CAN_SWITCH_DDC: whether the handler is able to switch the
+ * 	DDC lines separately. This signals to clients that they should call
+ * 	drm_get_edid_switcheroo() to probe the EDID
+ * @VGA_SWITCHEROO_NEEDS_EDP_CONFIG: whether the handler is unable to switch
+ * 	the AUX channel separately. This signals to clients that the active
+ * 	GPU needs to train the link and communicate the link parameters to the
+ * 	inactive GPU (mediated by vga_switcheroo). The inactive GPU may then
+ * 	skip the AUX handshake and set up its output with these pre-calibrated
+ * 	values (DisplayPort specification v1.1a, section 2.5.3.3)
+ *
+ * Handler flags bitmask. Used by handlers to declare their capabilities upon
+ * registering with vga_switcheroo.
+ */
+enum vga_switcheroo_handler_flags_t {
+	VGA_SWITCHEROO_CAN_SWITCH_DDC	= (1 << 0),
+	VGA_SWITCHEROO_NEEDS_EDP_CONFIG	= (1 << 1),
+};
+
 /**
  * enum vga_switcheroo_state - client power state
  * @VGA_SWITCHEROO_OFF: off
@@ -132,8 +152,10 @@ int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 void vga_switcheroo_client_fb_set(struct pci_dev *dev,
 				  struct fb_info *info);
 
-int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler);
+int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler,
+				    enum vga_switcheroo_handler_flags_t handler_flags);
 void vga_switcheroo_unregister_handler(void);
+enum vga_switcheroo_handler_flags_t vga_switcheroo_handler_flags(void);
 
 int vga_switcheroo_process_delayed_switch(void);
 
@@ -150,11 +172,13 @@ static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {}
 static inline int vga_switcheroo_register_client(struct pci_dev *dev,
 		const struct vga_switcheroo_client_ops *ops, bool driver_power_control) { return 0; }
 static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info) {}
-static inline int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler) { return 0; }
+static inline int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler,
+		enum vga_switcheroo_handler_flags_t handler_flags) { return 0; }
 static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	const struct vga_switcheroo_client_ops *ops,
 	enum vga_switcheroo_client_id id) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
+static inline enum vga_switcheroo_handler_flags_t vga_switcheroo_handler_flags(void) { return 0; }
 static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
 static inline enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; }
 
-- 
cgit v1.2.3


From e4cb81d7e49c806fa557cf0ff4f3f40bd7a9cb7c Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Mon, 11 Jan 2016 20:09:20 +0100
Subject: vga_switcheroo: Add support for switching only the DDC

Originally by Seth Forshee <seth.forshee@canonical.com>, 2012-10-04:
    During graphics driver initialization it's useful to be able to mux
    only the DDC to the inactive client in order to read the EDID. Add
    a switch_ddc callback to allow capable handlers to provide this
    functionality, and add vga_switcheroo_switch_ddc() to allow DRM
    to mux only the DDC.

Modified by Dave Airlie <airlied@gmail.com>, 2012-12-22:
    I can't figure out why I didn't like this, but I rewrote this [...]
    to lock/unlock the ddc lines [...]. I think I'd prefer something
    like that otherwise the interface got really ugly.

Modified by Lukas Wunner <lukas@wunner.de>, 2015-04 - 2015-10:
    Change semantics of ->switch_ddc handler callback to return previous
    DDC owner. Original version tried to determine previous DDC owner
    with find_active_client() but this fails if the inactive client
    registers before the active client.

    Don't lock vgasr_mutex in _lock_ddc() / _unlock_ddc(), it can cause
    deadlocks because (a) during switch (with vgasr_mutex already held),
    GPU is woken and probes its outputs, tries to re-acquire vgasr_mutex
    to lock DDC lines; (b) Likewise during switch, GPU is suspended and
    calls cancel_delayed_work_sync() to stop output polling, if poll
    task is running at this moment we may wait forever for it to finish.

    Instead, lock mux_hw_lock when unregistering the handler because
    the only reason why we'd want to lock vgasr_mutex in _lock_ddc() /
    _unlock_ddc() is to block the handler from disappearing while DDC
    lines are switched.

    Also acquire mux_hw_lock in stage2 to avoid race condition where
    reading the EDID and switching happens simultaneously. Likewise on
    MIGD / MDIS commands and on runtime suspend.

    v2.1: Overhaul locking, squash commits (Daniel Vetter)

    v2.2: Readability improvements (Thierry Reding)

    v2.3: Overhaul locking once more

    v2.4: Retain semantics of ->switchto handler callback to switch all
          pins, including DDC (Daniel Vetter)

    v5:   Rename ddc_lock to mux_hw_lock: Since we acquire this both
          when calling ->switch_ddc and ->switchto, it protects not just
          access to the DDC lines but to the mux in general. This is in
          line with the DRM convention to use low-level locks to avoid
          concurrent hw access (e.g. i2c, dp_aux) which are often called
          hw_lock (Daniel Vetter)

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=88861
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61115
Tested-by: Lukas Wunner <lukas@wunner.de>
    [MBP  9,1 2012  intel IVB + nvidia GK107  pre-retina  15"]
Cc: Seth Forshee <seth.forshee@canonical.com>
Cc: Dave Airlie <airlied@gmail.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/e81ae9722b84c5ed591805fee3ea6dbf5dc6c4b3.1452525860.git.lukas@wunner.de
---
 drivers/gpu/vga/vga_switcheroo.c | 97 +++++++++++++++++++++++++++++++++++++++-
 include/linux/vga_switcheroo.h   |  8 ++++
 2 files changed, 103 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index e89f6ad5d83e..cbd7c986d926 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -74,9 +74,17 @@
  * there can thus be up to three clients: Two vga clients (GPUs) and one audio
  * client (on the discrete GPU). The code is mostly prepared to support
  * machines with more than two GPUs should they become available.
+ *
  * The GPU to which the outputs are currently switched is called the
  * active client in vga_switcheroo parlance. The GPU not in use is the
- * inactive client.
+ * inactive client. When the inactive client's DRM driver is loaded,
+ * it will be unable to probe the panel's EDID and hence depends on
+ * VBIOS to provide its display modes. If the VBIOS modes are bogus or
+ * if there is no VBIOS at all (which is common on the MacBook Pro),
+ * a client may alternatively request that the DDC lines are temporarily
+ * switched to it, provided that the handler supports this. Switching
+ * only the DDC lines and not the entire output avoids unnecessary
+ * flickering.
  */
 
 /**
@@ -127,6 +135,9 @@ static DEFINE_MUTEX(vgasr_mutex);
  * @clients: list of registered clients
  * @handler: registered handler
  * @handler_flags: flags of registered handler
+ * @mux_hw_lock: protects mux state
+ *	(in particular while DDC lines are temporarily switched)
+ * @old_ddc_owner: client to which DDC lines will be switched back on unlock
  *
  * vga_switcheroo private data. Currently only one vga_switcheroo instance
  * per system is supported.
@@ -144,6 +155,8 @@ struct vgasr_priv {
 
 	const struct vga_switcheroo_handler *handler;
 	enum vga_switcheroo_handler_flags_t handler_flags;
+	struct mutex mux_hw_lock;
+	int old_ddc_owner;
 };
 
 #define ID_BIT_AUDIO		0x100
@@ -158,6 +171,7 @@ static void vga_switcheroo_debugfs_fini(struct vgasr_priv *priv);
 /* only one switcheroo per system */
 static struct vgasr_priv vgasr_priv = {
 	.clients = LIST_HEAD_INIT(vgasr_priv.clients),
+	.mux_hw_lock = __MUTEX_INITIALIZER(vgasr_priv.mux_hw_lock),
 };
 
 static bool vga_switcheroo_ready(void)
@@ -227,6 +241,7 @@ EXPORT_SYMBOL(vga_switcheroo_register_handler);
 void vga_switcheroo_unregister_handler(void)
 {
 	mutex_lock(&vgasr_mutex);
+	mutex_lock(&vgasr_priv.mux_hw_lock);
 	vgasr_priv.handler_flags = 0;
 	vgasr_priv.handler = NULL;
 	if (vgasr_priv.active) {
@@ -234,6 +249,7 @@ void vga_switcheroo_unregister_handler(void)
 		vga_switcheroo_debugfs_fini(&vgasr_priv);
 		vgasr_priv.active = false;
 	}
+	mutex_unlock(&vgasr_priv.mux_hw_lock);
 	mutex_unlock(&vgasr_mutex);
 }
 EXPORT_SYMBOL(vga_switcheroo_unregister_handler);
@@ -432,6 +448,76 @@ void vga_switcheroo_client_fb_set(struct pci_dev *pdev,
 }
 EXPORT_SYMBOL(vga_switcheroo_client_fb_set);
 
+/**
+ * vga_switcheroo_lock_ddc() - temporarily switch DDC lines to a given client
+ * @pdev: client pci device
+ *
+ * Temporarily switch DDC lines to the client identified by @pdev
+ * (but leave the outputs otherwise switched to where they are).
+ * This allows the inactive client to probe EDID. The DDC lines must
+ * afterwards be switched back by calling vga_switcheroo_unlock_ddc(),
+ * even if this function returns an error.
+ *
+ * Return: Previous DDC owner on success or a negative int on error.
+ * Specifically, %-ENODEV if no handler has registered or if the handler
+ * does not support switching the DDC lines. Also, a negative value
+ * returned by the handler is propagated back to the caller.
+ * The return value has merely an informational purpose for any caller
+ * which might be interested in it. It is acceptable to ignore the return
+ * value and simply rely on the result of the subsequent EDID probe,
+ * which will be %NULL if DDC switching failed.
+ */
+int vga_switcheroo_lock_ddc(struct pci_dev *pdev)
+{
+	enum vga_switcheroo_client_id id;
+
+	mutex_lock(&vgasr_priv.mux_hw_lock);
+	if (!vgasr_priv.handler || !vgasr_priv.handler->switch_ddc) {
+		vgasr_priv.old_ddc_owner = -ENODEV;
+		return -ENODEV;
+	}
+
+	id = vgasr_priv.handler->get_client_id(pdev);
+	vgasr_priv.old_ddc_owner = vgasr_priv.handler->switch_ddc(id);
+	return vgasr_priv.old_ddc_owner;
+}
+EXPORT_SYMBOL(vga_switcheroo_lock_ddc);
+
+/**
+ * vga_switcheroo_unlock_ddc() - switch DDC lines back to previous owner
+ * @pdev: client pci device
+ *
+ * Switch DDC lines back to the previous owner after calling
+ * vga_switcheroo_lock_ddc(). This must be called even if
+ * vga_switcheroo_lock_ddc() returned an error.
+ *
+ * Return: Previous DDC owner on success (i.e. the client identifier of @pdev)
+ * or a negative int on error.
+ * Specifically, %-ENODEV if no handler has registered or if the handler
+ * does not support switching the DDC lines. Also, a negative value
+ * returned by the handler is propagated back to the caller.
+ * Finally, invoking this function without calling vga_switcheroo_lock_ddc()
+ * first is not allowed and will result in %-EINVAL.
+ */
+int vga_switcheroo_unlock_ddc(struct pci_dev *pdev)
+{
+	enum vga_switcheroo_client_id id;
+	int ret = vgasr_priv.old_ddc_owner;
+
+	if (WARN_ON_ONCE(!mutex_is_locked(&vgasr_priv.mux_hw_lock)))
+		return -EINVAL;
+
+	if (vgasr_priv.old_ddc_owner >= 0) {
+		id = vgasr_priv.handler->get_client_id(pdev);
+		if (vgasr_priv.old_ddc_owner != id)
+			ret = vgasr_priv.handler->switch_ddc(
+						     vgasr_priv.old_ddc_owner);
+	}
+	mutex_unlock(&vgasr_priv.mux_hw_lock);
+	return ret;
+}
+EXPORT_SYMBOL(vga_switcheroo_unlock_ddc);
+
 /**
  * DOC: Manual switching and manual power control
  *
@@ -569,7 +655,9 @@ static int vga_switchto_stage2(struct vga_switcheroo_client *new_client)
 		console_unlock();
 	}
 
+	mutex_lock(&vgasr_priv.mux_hw_lock);
 	ret = vgasr_priv.handler->switchto(new_client->id);
+	mutex_unlock(&vgasr_priv.mux_hw_lock);
 	if (ret)
 		return ret;
 
@@ -684,7 +772,9 @@ vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf,
 	vgasr_priv.delayed_switch_active = false;
 
 	if (just_mux) {
+		mutex_lock(&vgasr_priv.mux_hw_lock);
 		ret = vgasr_priv.handler->switchto(client_id);
+		mutex_unlock(&vgasr_priv.mux_hw_lock);
 		goto out;
 	}
 
@@ -896,8 +986,11 @@ static int vga_switcheroo_runtime_suspend(struct device *dev)
 	if (ret)
 		return ret;
 	mutex_lock(&vgasr_mutex);
-	if (vgasr_priv.handler->switchto)
+	if (vgasr_priv.handler->switchto) {
+		mutex_lock(&vgasr_priv.mux_hw_lock);
 		vgasr_priv.handler->switchto(VGA_SWITCHEROO_IGD);
+		mutex_unlock(&vgasr_priv.mux_hw_lock);
+	}
 	vga_switcheroo_power_switch(pdev, VGA_SWITCHEROO_OFF);
 	mutex_unlock(&vgasr_mutex);
 	return 0;
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index a745f4f0f729..b39a5f3153bd 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -102,6 +102,9 @@ enum vga_switcheroo_client_id {
  * 	Mandatory. For muxless machines this should be a no-op. Returning 0
  * 	denotes success, anything else failure (in which case the switch is
  * 	aborted)
+ * @switch_ddc: switch DDC lines to given client.
+ * 	Optional. Should return the previous DDC owner on success or a
+ * 	negative int on failure
  * @power_state: cut or reinstate power of given client.
  * 	Optional. The return value is ignored
  * @get_client_id: determine if given pci device is integrated or discrete GPU.
@@ -113,6 +116,7 @@ enum vga_switcheroo_client_id {
 struct vga_switcheroo_handler {
 	int (*init)(void);
 	int (*switchto)(enum vga_switcheroo_client_id id);
+	int (*switch_ddc)(enum vga_switcheroo_client_id id);
 	int (*power_state)(enum vga_switcheroo_client_id id,
 			   enum vga_switcheroo_state state);
 	enum vga_switcheroo_client_id (*get_client_id)(struct pci_dev *pdev);
@@ -156,6 +160,8 @@ int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler
 				    enum vga_switcheroo_handler_flags_t handler_flags);
 void vga_switcheroo_unregister_handler(void);
 enum vga_switcheroo_handler_flags_t vga_switcheroo_handler_flags(void);
+int vga_switcheroo_lock_ddc(struct pci_dev *pdev);
+int vga_switcheroo_unlock_ddc(struct pci_dev *pdev);
 
 int vga_switcheroo_process_delayed_switch(void);
 
@@ -179,6 +185,8 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	enum vga_switcheroo_client_id id) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
 static inline enum vga_switcheroo_handler_flags_t vga_switcheroo_handler_flags(void) { return 0; }
+static inline int vga_switcheroo_lock_ddc(struct pci_dev *pdev) { return -ENODEV; }
+static inline int vga_switcheroo_unlock_ddc(struct pci_dev *pdev) { return -ENODEV; }
 static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
 static inline enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; }
 
-- 
cgit v1.2.3


From 2413306c2566b729a9d17a81e9d1181e6f354d6a Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Mon, 11 Jan 2016 20:09:20 +0100
Subject: apple-gmux: Add helper for presence detect

Centralize gmux' ACPI HID in a header file and add apple_gmux_present().
This can be used by other drivers to activate quirks specific to dual
GPU MacBook Pros & Mac Pros. The alternative would be to hardcode DMI
or PCI IDs and amend them whenever Apple introduces a new machine.

Tested-by: Lukas Wunner <lukas@wunner.de>
    [MBP  9,1 2012  intel IVB + nvidia GK107  pre-retina  15"]
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/89c23769058a340e5e11d4a7102f3793d3b0c94c.1452525860.git.lukas@wunner.de
---
 Documentation/DocBook/gpu.tmpl    |  4 ++++
 drivers/platform/x86/apple-gmux.c |  3 ++-
 include/linux/apple-gmux.h        | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/apple-gmux.h

(limited to 'include/linux')

diff --git a/Documentation/DocBook/gpu.tmpl b/Documentation/DocBook/gpu.tmpl
index d6579d8c1341..fe6b36a2fd98 100644
--- a/Documentation/DocBook/gpu.tmpl
+++ b/Documentation/DocBook/gpu.tmpl
@@ -3451,6 +3451,10 @@ int num_ioctls;</synopsis>
         <title>Backlight control</title>
 !Pdrivers/platform/x86/apple-gmux.c Backlight control
       </sect2>
+      <sect2>
+        <title>Public functions</title>
+!Iinclude/linux/apple-gmux.h
+      </sect2>
     </sect1>
   </chapter>
 
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index 1384a393f2f7..4034d2d4c507 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -19,6 +19,7 @@
 #include <linux/acpi.h>
 #include <linux/pnp.h>
 #include <linux/apple_bl.h>
+#include <linux/apple-gmux.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
@@ -828,7 +829,7 @@ static void gmux_remove(struct pnp_dev *pnp)
 }
 
 static const struct pnp_device_id gmux_device_ids[] = {
-	{"APP000B", 0},
+	{GMUX_ACPI_HID, 0},
 	{"", 0}
 };
 
diff --git a/include/linux/apple-gmux.h b/include/linux/apple-gmux.h
new file mode 100644
index 000000000000..feebc2840462
--- /dev/null
+++ b/include/linux/apple-gmux.h
@@ -0,0 +1,39 @@
+/*
+ * apple-gmux.h - microcontroller built into dual GPU MacBook Pro & Mac Pro
+ * Copyright (C) 2015 Lukas Wunner <lukas@wunner.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LINUX_APPLE_GMUX_H
+#define LINUX_APPLE_GMUX_H
+
+#include <linux/acpi.h>
+
+#define GMUX_ACPI_HID "APP000B"
+
+/**
+ * apple_gmux_present() - detect if gmux is built into the machine
+ *
+ * Drivers may use this to activate quirks specific to dual GPU MacBook Pros
+ * and Mac Pros, e.g. for deferred probing, runtime pm and backlight.
+ *
+ * Return: %true if gmux is present and the kernel was configured
+ * with CONFIG_APPLE_GMUX, %false otherwise.
+ */
+static inline bool apple_gmux_present(void)
+{
+	return IS_ENABLED(CONFIG_APPLE_GMUX) && acpi_dev_present(GMUX_ACPI_HID);
+}
+
+#endif /* LINUX_APPLE_GMUX_H */
-- 
cgit v1.2.3


From be6257b251cebd2deb8c76d43e387e28e3f7412d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 25 Jan 2016 19:24:50 +0100
Subject: quota: Add support for ->get_nextdqblk() for VFS quota

Add infrastructure for supporting get_nextdqblk() callback for VFS
quotas. Translate the operation into a callback to appropriate
filesystem and consequently to quota format callback.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/super.c          |  1 +
 fs/quota/dquot.c         | 39 +++++++++++++++++++++++++++++++++++++++
 fs/reiserfs/super.c      |  1 +
 include/linux/quota.h    |  3 +++
 include/linux/quotaops.h |  3 +++
 5 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3ed01ec011d7..b5bcbddceb91 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1132,6 +1132,7 @@ static const struct dquot_operations ext4_quota_operations = {
 	.alloc_dquot	= dquot_alloc,
 	.destroy_dquot	= dquot_destroy,
 	.get_projid	= ext4_get_projid,
+	.get_next_id	= dquot_get_next_id,
 };
 
 static const struct quotactl_ops ext4_qctl_operations = {
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 3c3b81bb6dfe..7e0137bde6d6 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2031,6 +2031,21 @@ int dquot_commit_info(struct super_block *sb, int type)
 }
 EXPORT_SYMBOL(dquot_commit_info);
 
+int dquot_get_next_id(struct super_block *sb, struct kqid *qid)
+{
+	struct quota_info *dqopt = sb_dqopt(sb);
+	int err;
+
+	if (!dqopt->ops[qid->type]->get_next_id)
+		return -ENOSYS;
+	mutex_lock(&dqopt->dqio_mutex);
+	err = dqopt->ops[qid->type]->get_next_id(sb, qid);
+	mutex_unlock(&dqopt->dqio_mutex);
+
+	return err;
+}
+EXPORT_SYMBOL(dquot_get_next_id);
+
 /*
  * Definitions of diskquota operations.
  */
@@ -2042,6 +2057,7 @@ const struct dquot_operations dquot_operations = {
 	.write_info	= dquot_commit_info,
 	.alloc_dquot	= dquot_alloc,
 	.destroy_dquot	= dquot_destroy,
+	.get_next_id	= dquot_get_next_id,
 };
 EXPORT_SYMBOL(dquot_operations);
 
@@ -2565,6 +2581,27 @@ int dquot_get_dqblk(struct super_block *sb, struct kqid qid,
 }
 EXPORT_SYMBOL(dquot_get_dqblk);
 
+int dquot_get_next_dqblk(struct super_block *sb, struct kqid *qid,
+			 struct qc_dqblk *di)
+{
+	struct dquot *dquot;
+	int err;
+
+	if (!sb->dq_op->get_next_id)
+		return -ENOSYS;
+	err = sb->dq_op->get_next_id(sb, qid);
+	if (err < 0)
+		return err;
+	dquot = dqget(sb, *qid);
+	if (IS_ERR(dquot))
+		return PTR_ERR(dquot);
+	do_get_dqblk(dquot, di);
+	dqput(dquot);
+
+	return 0;
+}
+EXPORT_SYMBOL(dquot_get_next_dqblk);
+
 #define VFS_QC_MASK \
 	(QC_SPACE | QC_SPC_SOFT | QC_SPC_HARD | \
 	 QC_INO_COUNT | QC_INO_SOFT | QC_INO_HARD | \
@@ -2765,6 +2802,7 @@ const struct quotactl_ops dquot_quotactl_ops = {
 	.get_state	= dquot_get_state,
 	.set_info	= dquot_set_dqinfo,
 	.get_dqblk	= dquot_get_dqblk,
+	.get_nextdqblk	= dquot_get_next_dqblk,
 	.set_dqblk	= dquot_set_dqblk
 };
 EXPORT_SYMBOL(dquot_quotactl_ops);
@@ -2776,6 +2814,7 @@ const struct quotactl_ops dquot_quotactl_sysfile_ops = {
 	.get_state	= dquot_get_state,
 	.set_info	= dquot_set_dqinfo,
 	.get_dqblk	= dquot_get_dqblk,
+	.get_nextdqblk	= dquot_get_next_dqblk,
 	.set_dqblk	= dquot_set_dqblk
 };
 EXPORT_SYMBOL(dquot_quotactl_sysfile_ops);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index c0306ec8ed7b..b8f2d1e8c645 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -802,6 +802,7 @@ static const struct dquot_operations reiserfs_quota_operations = {
 	.write_info = reiserfs_write_info,
 	.alloc_dquot	= dquot_alloc,
 	.destroy_dquot	= dquot_destroy,
+	.get_next_id	= dquot_get_next_id,
 };
 
 static const struct quotactl_ops reiserfs_qctl_operations = {
diff --git a/include/linux/quota.h b/include/linux/quota.h
index fba92f5c1a63..9dfb6bce8c9e 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -306,6 +306,7 @@ struct quota_format_ops {
 	int (*read_dqblk)(struct dquot *dquot);		/* Read structure for one user */
 	int (*commit_dqblk)(struct dquot *dquot);	/* Write structure for one user */
 	int (*release_dqblk)(struct dquot *dquot);	/* Called when last reference to dquot is being dropped */
+	int (*get_next_id)(struct super_block *sb, struct kqid *qid);	/* Get next ID with existing structure in the quota file */
 };
 
 /* Operations working with dquots */
@@ -321,6 +322,8 @@ struct dquot_operations {
 	 * quota code only */
 	qsize_t *(*get_reserved_space) (struct inode *);
 	int (*get_projid) (struct inode *, kprojid_t *);/* Get project ID */
+	/* Get next ID with active quota structure */
+	int (*get_next_id) (struct super_block *sb, struct kqid *qid);
 };
 
 struct path;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 7a57c28eb5e7..f00fa86ac966 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -82,6 +82,7 @@ int dquot_commit(struct dquot *dquot);
 int dquot_acquire(struct dquot *dquot);
 int dquot_release(struct dquot *dquot);
 int dquot_commit_info(struct super_block *sb, int type);
+int dquot_get_next_id(struct super_block *sb, struct kqid *qid);
 int dquot_mark_dquot_dirty(struct dquot *dquot);
 
 int dquot_file_open(struct inode *inode, struct file *file);
@@ -99,6 +100,8 @@ int dquot_get_state(struct super_block *sb, struct qc_state *state);
 int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii);
 int dquot_get_dqblk(struct super_block *sb, struct kqid id,
 		struct qc_dqblk *di);
+int dquot_get_next_dqblk(struct super_block *sb, struct kqid *id,
+		struct qc_dqblk *di);
 int dquot_set_dqblk(struct super_block *sb, struct kqid id,
 		struct qc_dqblk *di);
 
-- 
cgit v1.2.3


From 0066373d9f9320f69f24b080ee00d10f14397355 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 25 Jan 2016 20:39:27 +0100
Subject: quota_v2: Implement get_next_id() for V2 quota format

Implement functions to get id of next existing quota structure in quota
file for quota tree based formats and thus for V2 quota format.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/quota_tree.c       | 67 +++++++++++++++++++++++++++++++++++++++++++--
 fs/quota/quota_v2.c         |  6 ++++
 include/linux/dqblk_qtree.h |  2 ++
 3 files changed, 73 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 58efb83dec1c..0738972e8d3f 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -22,10 +22,9 @@ MODULE_LICENSE("GPL");
 
 #define __QUOTA_QT_PARANOIA
 
-static int get_index(struct qtree_mem_dqinfo *info, struct kqid qid, int depth)
+static int __get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth)
 {
 	unsigned int epb = info->dqi_usable_bs >> 2;
-	qid_t id = from_kqid(&init_user_ns, qid);
 
 	depth = info->dqi_qtree_depth - depth - 1;
 	while (depth--)
@@ -33,6 +32,13 @@ static int get_index(struct qtree_mem_dqinfo *info, struct kqid qid, int depth)
 	return id % epb;
 }
 
+static int get_index(struct qtree_mem_dqinfo *info, struct kqid qid, int depth)
+{
+	qid_t id = from_kqid(&init_user_ns, qid);
+
+	return __get_index(info, id, depth);
+}
+
 /* Number of entries in one blocks */
 static int qtree_dqstr_in_blk(struct qtree_mem_dqinfo *info)
 {
@@ -668,3 +674,60 @@ int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 	return 0;
 }
 EXPORT_SYMBOL(qtree_release_dquot);
+
+static int find_next_id(struct qtree_mem_dqinfo *info, qid_t *id,
+			unsigned int blk, int depth)
+{
+	char *buf = getdqbuf(info->dqi_usable_bs);
+	__le32 *ref = (__le32 *)buf;
+	ssize_t ret;
+	unsigned int epb = info->dqi_usable_bs >> 2;
+	unsigned int level_inc = 1;
+	int i;
+
+	if (!buf)
+		return -ENOMEM;
+
+	for (i = depth; i < info->dqi_qtree_depth - 1; i++)
+		level_inc *= epb;
+
+	ret = read_blk(info, blk, buf);
+	if (ret < 0) {
+		quota_error(info->dqi_sb,
+			    "Can't read quota tree block %u", blk);
+		goto out_buf;
+	}
+	for (i = __get_index(info, *id, depth); i < epb; i++) {
+		if (ref[i] == cpu_to_le32(0)) {
+			*id += level_inc;
+			continue;
+		}
+		if (depth == info->dqi_qtree_depth - 1) {
+			ret = 0;
+			goto out_buf;
+		}
+		ret = find_next_id(info, id, le32_to_cpu(ref[i]), depth + 1);
+		if (ret != -ENOENT)
+			break;
+	}
+	if (i == epb) {
+		ret = -ENOENT;
+		goto out_buf;
+	}
+out_buf:
+	kfree(buf);
+	return ret;
+}
+
+int qtree_get_next_id(struct qtree_mem_dqinfo *info, struct kqid *qid)
+{
+	qid_t id = from_kqid(&init_user_ns, *qid);
+	int ret;
+
+	ret = find_next_id(info, &id, QT_TREEOFF, 0);
+	if (ret < 0)
+		return ret;
+	*qid = make_kqid(&init_user_ns, qid->type, id);
+	return 0;
+}
+EXPORT_SYMBOL(qtree_get_next_id);
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index ed85d4f35c04..ca71bf881ad1 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -304,6 +304,11 @@ static int v2_free_file_info(struct super_block *sb, int type)
 	return 0;
 }
 
+static int v2_get_next_id(struct super_block *sb, struct kqid *qid)
+{
+	return qtree_get_next_id(sb_dqinfo(sb, qid->type)->dqi_priv, qid);
+}
+
 static const struct quota_format_ops v2_format_ops = {
 	.check_quota_file	= v2_check_quota_file,
 	.read_file_info		= v2_read_file_info,
@@ -312,6 +317,7 @@ static const struct quota_format_ops v2_format_ops = {
 	.read_dqblk		= v2_read_dquot,
 	.commit_dqblk		= v2_write_dquot,
 	.release_dqblk		= v2_release_dquot,
+	.get_next_id		= v2_get_next_id,
 };
 
 static struct quota_format_type v2r0_quota_format = {
diff --git a/include/linux/dqblk_qtree.h b/include/linux/dqblk_qtree.h
index ff8b55359648..0de21e935976 100644
--- a/include/linux/dqblk_qtree.h
+++ b/include/linux/dqblk_qtree.h
@@ -15,6 +15,7 @@
 #define QTREE_DEL_REWRITE 6
 
 struct dquot;
+struct kqid;
 
 /* Operations */
 struct qtree_fmt_operations {
@@ -52,5 +53,6 @@ static inline int qtree_depth(struct qtree_mem_dqinfo *info)
 		entries *= epb;
 	return i;
 }
+int qtree_get_next_id(struct qtree_mem_dqinfo *info, struct kqid *qid);
 
 #endif /* _LINUX_DQBLK_QTREE_H */
-- 
cgit v1.2.3


From 5d2787cf0b210d2925e8d44e2e79241385249d6b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Feb 2016 16:40:46 +0000
Subject: KEYS: Add an alloc flag to convey the builtinness of a key

Add KEY_ALLOC_BUILT_IN to convey that a key should have KEY_FLAG_BUILTIN
set rather than setting it after the fact.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
---
 certs/system_keyring.c | 4 ++--
 include/linux/key.h    | 1 +
 security/keys/key.c    | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 2570598b784d..f4180326c2e1 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -84,12 +84,12 @@ static __init int load_system_certificate_list(void)
 					   ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
 					   KEY_USR_VIEW | KEY_USR_READ),
 					   KEY_ALLOC_NOT_IN_QUOTA |
-					   KEY_ALLOC_TRUSTED);
+					   KEY_ALLOC_TRUSTED |
+					   KEY_ALLOC_BUILT_IN);
 		if (IS_ERR(key)) {
 			pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
 			       PTR_ERR(key));
 		} else {
-			set_bit(KEY_FLAG_BUILTIN, &key_ref_to_ptr(key)->flags);
 			pr_notice("Loaded X.509 cert '%s'\n",
 				  key_ref_to_ptr(key)->description);
 			key_ref_put(key);
diff --git a/include/linux/key.h b/include/linux/key.h
index 7321ab8ef949..5f5b1129dc92 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -219,6 +219,7 @@ extern struct key *key_alloc(struct key_type *type,
 #define KEY_ALLOC_QUOTA_OVERRUN	0x0001	/* add to quota, permit even if overrun */
 #define KEY_ALLOC_NOT_IN_QUOTA	0x0002	/* not in quota */
 #define KEY_ALLOC_TRUSTED	0x0004	/* Key should be flagged as trusted */
+#define KEY_ALLOC_BUILT_IN	0x0008	/* Key is built into kernel */
 
 extern void key_revoke(struct key *key);
 extern void key_invalidate(struct key *key);
diff --git a/security/keys/key.c b/security/keys/key.c
index 09ef276c4bdc..b28755131687 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -296,6 +296,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 		key->flags |= 1 << KEY_FLAG_IN_QUOTA;
 	if (flags & KEY_ALLOC_TRUSTED)
 		key->flags |= 1 << KEY_FLAG_TRUSTED;
+	if (flags & KEY_ALLOC_BUILT_IN)
+		key->flags |= 1 << KEY_FLAG_BUILTIN;
 
 #ifdef KEY_DEBUGGING
 	key->magic = KEY_DEBUG_MAGIC;
-- 
cgit v1.2.3


From a0a90718f18264dc904d34a580f332006f5561e9 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 8 Feb 2016 17:14:28 +0200
Subject: spi: Let drivers translate ACPI DeviceSelection to suitable Linux
 chip select

In Windows it is up to the SPI host controller driver to handle the ACPI
DeviceSelection as it likes. The SPI core does not take any part in it.
This is different in Linux because we always expect to have chip select in
range of 0 .. master->num_chipselect - 1.

In order to support this in Linux we need a way to allow the driver to
translate between ACPI DeviceSelection field and Linux chip select number
so provide a new optional hook ->fw_translate_cs() that can be used by a
driver to handle translation and call this hook if set during SPI slave
ACPI enumeration.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 19 ++++++++++++++++++-
 include/linux/spi/spi.h |  5 +++++
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 47eff8012a77..2c0c26a57f03 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1581,13 +1581,30 @@ static void of_register_spi_devices(struct spi_master *master) { }
 static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
 {
 	struct spi_device *spi = data;
+	struct spi_master *master = spi->master;
 
 	if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) {
 		struct acpi_resource_spi_serialbus *sb;
 
 		sb = &ares->data.spi_serial_bus;
 		if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_SPI) {
-			spi->chip_select = sb->device_selection;
+			/*
+			 * ACPI DeviceSelection numbering is handled by the
+			 * host controller driver in Windows and can vary
+			 * from driver to driver. In Linux we always expect
+			 * 0 .. max - 1 so we need to ask the driver to
+			 * translate between the two schemes.
+			 */
+			if (master->fw_translate_cs) {
+				int cs = master->fw_translate_cs(master,
+						sb->device_selection);
+				if (cs < 0)
+					return cs;
+				spi->chip_select = cs;
+			} else {
+				spi->chip_select = sb->device_selection;
+			}
+
 			spi->max_speed_hz = sb->connection_speed;
 
 			if (sb->clock_phase == ACPI_SPI_SECOND_PHASE)
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 53be3a4c60cb..8a25e6c2fb56 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -369,6 +369,9 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @dma_rx: DMA receive channel
  * @dummy_rx: dummy receive buffer for full-duplex devices
  * @dummy_tx: dummy transmit buffer for full-duplex devices
+ * @fw_translate_cs: If the boot firmware uses different numbering scheme
+ *	what Linux expects, this optional hook can be used to translate
+ *	between the two.
  *
  * Each SPI master controller can communicate with one or more @spi_device
  * children.  These make a small bus, sharing MOSI, MISO and SCK signals
@@ -537,6 +540,8 @@ struct spi_master {
 	/* dummy data for full duplex devices */
 	void			*dummy_rx;
 	void			*dummy_tx;
+
+	int (*fw_translate_cs)(struct spi_master *master, unsigned cs);
 };
 
 static inline void *spi_master_get_devdata(struct spi_master *master)
-- 
cgit v1.2.3


From 30f3a6ab44d8d06bb3d94f6320e4aa76df59d025 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 8 Feb 2016 17:14:31 +0200
Subject: spi: pxa2xx: Add support for both chip selects on Intel Braswell

Intel Braswell LPSS SPI controller actually has two chip selects and there
is no capabilities register where this could be found out. These two chip
selects are controlled by bits which are in slightly differrent location
than Broxton has.

Braswell Windows driver also starts chip select (ACPI DeviceSelection)
numbering from 1 so translate it to be suitable for Linux as well.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c   | 21 ++++++++++++++++++++-
 include/linux/pxa2xx_ssp.h |  1 +
 2 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 81d68e01046a..0eb79368eabc 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -83,6 +83,7 @@ struct lpss_config {
 	/* Chip select control */
 	unsigned cs_sel_shift;
 	unsigned cs_sel_mask;
+	unsigned cs_num;
 };
 
 /* Keep these sorted with enum pxa_ssp_type */
@@ -107,6 +108,19 @@ static const struct lpss_config lpss_platforms[] = {
 		.tx_threshold_lo = 160,
 		.tx_threshold_hi = 224,
 	},
+	{	/* LPSS_BSW_SSP */
+		.offset = 0x400,
+		.reg_general = 0x08,
+		.reg_ssp = 0x0c,
+		.reg_cs_ctrl = 0x18,
+		.reg_capabilities = -1,
+		.rx_threshold = 64,
+		.tx_threshold_lo = 160,
+		.tx_threshold_hi = 224,
+		.cs_sel_shift = 2,
+		.cs_sel_mask = 1 << 2,
+		.cs_num = 2,
+	},
 	{	/* LPSS_SPT_SSP */
 		.offset = 0x200,
 		.reg_general = -1,
@@ -142,6 +156,7 @@ static bool is_lpss_ssp(const struct driver_data *drv_data)
 	switch (drv_data->ssp_type) {
 	case LPSS_LPT_SSP:
 	case LPSS_BYT_SSP:
+	case LPSS_BSW_SSP:
 	case LPSS_SPT_SSP:
 	case LPSS_BXT_SSP:
 		return true;
@@ -1189,6 +1204,7 @@ static int setup(struct spi_device *spi)
 		break;
 	case LPSS_LPT_SSP:
 	case LPSS_BYT_SSP:
+	case LPSS_BSW_SSP:
 	case LPSS_SPT_SSP:
 	case LPSS_BXT_SSP:
 		config = lpss_get_config(drv_data);
@@ -1336,7 +1352,7 @@ static const struct acpi_device_id pxa2xx_spi_acpi_match[] = {
 	{ "INT3430", LPSS_LPT_SSP },
 	{ "INT3431", LPSS_LPT_SSP },
 	{ "80860F0E", LPSS_BYT_SSP },
-	{ "8086228E", LPSS_BYT_SSP },
+	{ "8086228E", LPSS_BSW_SSP },
 	{ },
 };
 MODULE_DEVICE_TABLE(acpi, pxa2xx_spi_acpi_match);
@@ -1473,6 +1489,7 @@ static int pxa2xx_spi_fw_translate_cs(struct spi_master *master, unsigned cs)
 		 * to match what Linux expects.
 		 */
 		case LPSS_BYT_SSP:
+		case LPSS_BSW_SSP:
 			return cs - 1;
 
 		default:
@@ -1622,6 +1639,8 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 			tmp &= LPSS_CAPS_CS_EN_MASK;
 			tmp >>= LPSS_CAPS_CS_EN_SHIFT;
 			platform_info->num_chipselect = ffz(tmp);
+		} else if (config->cs_num) {
+			platform_info->num_chipselect = config->cs_num;
 		}
 	}
 	master->num_chipselect = platform_info->num_chipselect;
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index c2f2574ff61c..2a097d176ba9 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -197,6 +197,7 @@ enum pxa_ssp_type {
 	QUARK_X1000_SSP,
 	LPSS_LPT_SSP, /* Keep LPSS types sorted with lpss_platforms[] */
 	LPSS_BYT_SSP,
+	LPSS_BSW_SSP,
 	LPSS_SPT_SSP,
 	LPSS_BXT_SSP,
 };
-- 
cgit v1.2.3


From d780c3711d9df9bacd56b71cf23443b895a331ca Mon Sep 17 00:00:00 2001
From: Martin Sperl <kernel@martin.sperl.org>
Date: Mon, 14 Dec 2015 15:20:18 +0000
Subject: spi: core: added spi_resource management

SPI resource management framework used while processing a spi_message
via the spi-core.

The basic idea is taken from devres, but as the allocation may happen
fairly frequently, some provisioning (in the form of an unused spi_device
pointer argument to spi_res_alloc) has been made so that at a later stage
we may implement reuse objects allocated earlier avoiding the repeated
allocation by keeping a cache of objects that we can reuse.

This framework can get used for:
* rewriting spi_messages
  * to fullfill alignment requirements of the spi_master HW
  * to fullfill transfer length requirements
    (e.g: transfers need to be less than 64k)
  * consolidate spi_messages with multiple transfers into a single transfer
  when the total transfer length is below a threshold.
* reimplement spi_unmap_buf without explicitly needing to check if it has
  been mapped

Signed-off-by: Martin Sperl <kernel@martin.sperl.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 91 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi.h | 36 +++++++++++++++++++
 2 files changed, 127 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 47eff8012a77..894ed0357dd7 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1024,6 +1024,8 @@ out:
 	if (msg->status && master->handle_err)
 		master->handle_err(master, msg);
 
+	spi_res_release(master, msg);
+
 	spi_finalize_current_message(master);
 
 	return ret;
@@ -2013,6 +2015,95 @@ struct spi_master *spi_busnum_to_master(u16 bus_num)
 }
 EXPORT_SYMBOL_GPL(spi_busnum_to_master);
 
+/*-------------------------------------------------------------------------*/
+
+/* Core methods for SPI resource management */
+
+/**
+ * spi_res_alloc - allocate a spi resource that is life-cycle managed
+ *                 during the processing of a spi_message while using
+ *                 spi_transfer_one
+ * @spi:     the spi device for which we allocate memory
+ * @release: the release code to execute for this resource
+ * @size:    size to alloc and return
+ * @gfp:     GFP allocation flags
+ *
+ * Return: the pointer to the allocated data
+ *
+ * This may get enhanced in the future to allocate from a memory pool
+ * of the @spi_device or @spi_master to avoid repeated allocations.
+ */
+void *spi_res_alloc(struct spi_device *spi,
+		    spi_res_release_t release,
+		    size_t size, gfp_t gfp)
+{
+	struct spi_res *sres;
+
+	sres = kzalloc(sizeof(*sres) + size, gfp);
+	if (!sres)
+		return NULL;
+
+	INIT_LIST_HEAD(&sres->entry);
+	sres->release = release;
+
+	return sres->data;
+}
+EXPORT_SYMBOL_GPL(spi_res_alloc);
+
+/**
+ * spi_res_free - free an spi resource
+ * @res: pointer to the custom data of a resource
+ *
+ */
+void spi_res_free(void *res)
+{
+	struct spi_res *sres = container_of(res, struct spi_res, data);
+
+	if (!res)
+		return;
+
+	WARN_ON(!list_empty(&sres->entry));
+	kfree(sres);
+}
+EXPORT_SYMBOL_GPL(spi_res_free);
+
+/**
+ * spi_res_add - add a spi_res to the spi_message
+ * @message: the spi message
+ * @res:     the spi_resource
+ */
+void spi_res_add(struct spi_message *message, void *res)
+{
+	struct spi_res *sres = container_of(res, struct spi_res, data);
+
+	WARN_ON(!list_empty(&sres->entry));
+	list_add_tail(&sres->entry, &message->resources);
+}
+EXPORT_SYMBOL_GPL(spi_res_add);
+
+/**
+ * spi_res_release - release all spi resources for this message
+ * @master:  the @spi_master
+ * @message: the @spi_message
+ */
+void spi_res_release(struct spi_master *master,
+		     struct spi_message *message)
+{
+	struct spi_res *res;
+
+	while (!list_empty(&message->resources)) {
+		res = list_last_entry(&message->resources,
+				      struct spi_res, entry);
+
+		if (res->release)
+			res->release(master, message, res->data);
+
+		list_del(&res->entry);
+
+		kfree(res);
+	}
+}
+EXPORT_SYMBOL_GPL(spi_res_release);
 
 /*-------------------------------------------------------------------------*/
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 53be3a4c60cb..38204b584dc5 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -582,6 +582,37 @@ extern void spi_unregister_master(struct spi_master *master);
 
 extern struct spi_master *spi_busnum_to_master(u16 busnum);
 
+/*
+ * SPI resource management while processing a SPI message
+ */
+
+/**
+ * struct spi_res - spi resource management structure
+ * @entry:   list entry
+ * @release: release code called prior to freeing this resource
+ * @data:    extra data allocated for the specific use-case
+ *
+ * this is based on ideas from devres, but focused on life-cycle
+ * management during spi_message processing
+ */
+typedef void (*spi_res_release_t)(struct spi_master *master,
+				  struct spi_message *msg,
+				  void *res);
+struct spi_res {
+	struct list_head        entry;
+	spi_res_release_t       release;
+	unsigned long long      data[]; /* guarantee ull alignment */
+};
+
+extern void *spi_res_alloc(struct spi_device *spi,
+			   spi_res_release_t release,
+			   size_t size, gfp_t gfp);
+extern void spi_res_add(struct spi_message *message, void *res);
+extern void spi_res_free(void *res);
+
+extern void spi_res_release(struct spi_master *master,
+			    struct spi_message *message);
+
 /*---------------------------------------------------------------------------*/
 
 /*
@@ -720,6 +751,7 @@ struct spi_transfer {
  * @status: zero for success, else negative errno
  * @queue: for use by whichever driver currently owns the message
  * @state: for use by whichever driver currently owns the message
+ * @resources: for resource management when the spi message is processed
  *
  * A @spi_message is used to execute an atomic sequence of data transfers,
  * each represented by a struct spi_transfer.  The sequence is "atomic"
@@ -766,11 +798,15 @@ struct spi_message {
 	 */
 	struct list_head	queue;
 	void			*state;
+
+	/* list of spi_res reources when the spi message is processed */
+	struct list_head        resources;
 };
 
 static inline void spi_message_init_no_memset(struct spi_message *m)
 {
 	INIT_LIST_HEAD(&m->transfers);
+	INIT_LIST_HEAD(&m->resources);
 }
 
 static inline void spi_message_init(struct spi_message *m)
-- 
cgit v1.2.3


From 523baf5a0609690cb742b3662b7ccac0ea0b2ef2 Mon Sep 17 00:00:00 2001
From: Martin Sperl <kernel@martin.sperl.org>
Date: Mon, 14 Dec 2015 15:20:19 +0000
Subject: spi: core: add spi_replace_transfers method

Add the spi_replace_transfers method that can get used
to replace some spi_transfers from a spi_message with other
transfers.

Signed-off-by: Martin Sperl <kernel@martin.sperl.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 132 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi.h |  45 +++++++++++++++++
 2 files changed, 177 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 894ed0357dd7..2ec8e66a8098 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2107,6 +2107,138 @@ EXPORT_SYMBOL_GPL(spi_res_release);
 
 /*-------------------------------------------------------------------------*/
 
+/* Core methods for spi_message alterations */
+
+static void __spi_replace_transfers_release(struct spi_master *master,
+					    struct spi_message *msg,
+					    void *res)
+{
+	struct spi_replaced_transfers *rxfer = res;
+	size_t i;
+
+	/* call extra callback if requested */
+	if (rxfer->release)
+		rxfer->release(master, msg, res);
+
+	/* insert replaced transfers back into the message */
+	list_splice(&rxfer->replaced_transfers, rxfer->replaced_after);
+
+	/* remove the formerly inserted entries */
+	for (i = 0; i < rxfer->inserted; i++)
+		list_del(&rxfer->inserted_transfers[i].transfer_list);
+}
+
+/**
+ * spi_replace_transfers - replace transfers with several transfers
+ *                         and register change with spi_message.resources
+ * @msg:           the spi_message we work upon
+ * @xfer_first:    the first spi_transfer we want to replace
+ * @remove:        number of transfers to remove
+ * @insert:        the number of transfers we want to insert instead
+ * @release:       extra release code necessary in some circumstances
+ * @extradatasize: extra data to allocate (with alignment guarantees
+ *                 of struct @spi_transfer)
+ *
+ * Returns: pointer to @spi_replaced_transfers,
+ *          PTR_ERR(...) in case of errors.
+ */
+struct spi_replaced_transfers *spi_replace_transfers(
+	struct spi_message *msg,
+	struct spi_transfer *xfer_first,
+	size_t remove,
+	size_t insert,
+	spi_replaced_release_t release,
+	size_t extradatasize,
+	gfp_t gfp)
+{
+	struct spi_replaced_transfers *rxfer;
+	struct spi_transfer *xfer;
+	size_t i;
+
+	/* allocate the structure using spi_res */
+	rxfer = spi_res_alloc(msg->spi, __spi_replace_transfers_release,
+			      insert * sizeof(struct spi_transfer)
+			      + sizeof(struct spi_replaced_transfers)
+			      + extradatasize,
+			      gfp);
+	if (!rxfer)
+		return ERR_PTR(-ENOMEM);
+
+	/* the release code to invoke before running the generic release */
+	rxfer->release = release;
+
+	/* assign extradata */
+	if (extradatasize)
+		rxfer->extradata =
+			&rxfer->inserted_transfers[insert];
+
+	/* init the replaced_transfers list */
+	INIT_LIST_HEAD(&rxfer->replaced_transfers);
+
+	/* assign the list_entry after which we should reinsert
+	 * the @replaced_transfers - it may be spi_message.messages!
+	 */
+	rxfer->replaced_after = xfer_first->transfer_list.prev;
+
+	/* remove the requested number of transfers */
+	for (i = 0; i < remove; i++) {
+		/* if the entry after replaced_after it is msg->transfers
+		 * then we have been requested to remove more transfers
+		 * than are in the list
+		 */
+		if (rxfer->replaced_after->next == &msg->transfers) {
+			dev_err(&msg->spi->dev,
+				"requested to remove more spi_transfers than are available\n");
+			/* insert replaced transfers back into the message */
+			list_splice(&rxfer->replaced_transfers,
+				    rxfer->replaced_after);
+
+			/* free the spi_replace_transfer structure */
+			spi_res_free(rxfer);
+
+			/* and return with an error */
+			return ERR_PTR(-EINVAL);
+		}
+
+		/* remove the entry after replaced_after from list of
+		 * transfers and add it to list of replaced_transfers
+		 */
+		list_move_tail(rxfer->replaced_after->next,
+			       &rxfer->replaced_transfers);
+	}
+
+	/* create copy of the given xfer with identical settings
+	 * based on the first transfer to get removed
+	 */
+	for (i = 0; i < insert; i++) {
+		/* we need to run in reverse order */
+		xfer = &rxfer->inserted_transfers[insert - 1 - i];
+
+		/* copy all spi_transfer data */
+		memcpy(xfer, xfer_first, sizeof(*xfer));
+
+		/* add to list */
+		list_add(&xfer->transfer_list, rxfer->replaced_after);
+
+		/* clear cs_change and delay_usecs for all but the last */
+		if (i) {
+			xfer->cs_change = false;
+			xfer->delay_usecs = 0;
+		}
+	}
+
+	/* set up inserted */
+	rxfer->inserted = insert;
+
+	/* and register it with spi_res/spi_message */
+	spi_res_add(msg, rxfer);
+
+	return rxfer;
+}
+EXPORT_SYMBOL_GPL(spi_replace_transfers);
+
+/*-------------------------------------------------------------------------*/
+
 /* Core methods for SPI master protocol drivers.  Some of the
  * other core methods are currently defined as inline functions.
  */
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 38204b584dc5..d71385756fee 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -890,6 +890,51 @@ spi_max_transfer_size(struct spi_device *spi)
 
 /*---------------------------------------------------------------------------*/
 
+/* SPI transfer replacement methods which make use of spi_res */
+
+/**
+ * struct spi_replaced_transfers - structure describing the spi_transfer
+ *                                 replacements that have occurred
+ *                                 so that they can get reverted
+ * @release:            some extra release code to get executed prior to
+ *                      relasing this structure
+ * @extradata:          pointer to some extra data if requested or NULL
+ * @replaced_transfers: transfers that have been replaced and which need
+ *                      to get restored
+ * @replaced_after:     the transfer after which the @replaced_transfers
+ *                      are to get re-inserted
+ * @inserted:           number of transfers inserted
+ * @inserted_transfers: array of spi_transfers of array-size @inserted,
+ *                      that have been replacing replaced_transfers
+ *
+ * note: that @extradata will point to @inserted_transfers[@inserted]
+ * if some extra allocation is requested, so alignment will be the same
+ * as for spi_transfers
+ */
+struct spi_replaced_transfers;
+typedef void (*spi_replaced_release_t)(struct spi_master *master,
+				       struct spi_message *msg,
+				       struct spi_replaced_transfers *res);
+struct spi_replaced_transfers {
+	spi_replaced_release_t release;
+	void *extradata;
+	struct list_head replaced_transfers;
+	struct list_head *replaced_after;
+	size_t inserted;
+	struct spi_transfer inserted_transfers[];
+};
+
+extern struct spi_replaced_transfers *spi_replace_transfers(
+	struct spi_message *msg,
+	struct spi_transfer *xfer_first,
+	size_t remove,
+	size_t insert,
+	spi_replaced_release_t release,
+	size_t extradatasize,
+	gfp_t gfp);
+
+/*---------------------------------------------------------------------------*/
+
 /* All these synchronous SPI transfer routines are utilities layered
  * over the core async transfer primitive.  Here, "synchronous" means
  * they will sleep uninterruptibly until the async transfer completes.
-- 
cgit v1.2.3


From d9f1212272818420fcde611a940c1ad611a8b785 Mon Sep 17 00:00:00 2001
From: Martin Sperl <kernel@martin.sperl.org>
Date: Mon, 14 Dec 2015 15:20:20 +0000
Subject: spi: core: add spi_split_transfers_maxsize

Add spi_split_transfers_maxsize method that splits
spi_transfers transparently into multiple transfers
that are below the given max-size.

This makes use of the spi_res framework via
spi_replace_transfers to allocate/free the extra
transfers as well as reverting back the changes applied
while processing the spi_message.

Signed-off-by: Martin Sperl <kernel@martin.sperl.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 111 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi.h |  15 +++++++
 2 files changed, 126 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 2ec8e66a8098..34e3741504f9 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -144,6 +144,8 @@ SPI_STATISTICS_TRANSFER_BYTES_HISTO(14, "16384-32767");
 SPI_STATISTICS_TRANSFER_BYTES_HISTO(15, "32768-65535");
 SPI_STATISTICS_TRANSFER_BYTES_HISTO(16, "65536+");
 
+SPI_STATISTICS_SHOW(transfers_split_maxsize, "%lu");
+
 static struct attribute *spi_dev_attrs[] = {
 	&dev_attr_modalias.attr,
 	NULL,
@@ -181,6 +183,7 @@ static struct attribute *spi_device_statistics_attrs[] = {
 	&dev_attr_spi_device_transfer_bytes_histo14.attr,
 	&dev_attr_spi_device_transfer_bytes_histo15.attr,
 	&dev_attr_spi_device_transfer_bytes_histo16.attr,
+	&dev_attr_spi_device_transfers_split_maxsize.attr,
 	NULL,
 };
 
@@ -223,6 +226,7 @@ static struct attribute *spi_master_statistics_attrs[] = {
 	&dev_attr_spi_master_transfer_bytes_histo14.attr,
 	&dev_attr_spi_master_transfer_bytes_histo15.attr,
 	&dev_attr_spi_master_transfer_bytes_histo16.attr,
+	&dev_attr_spi_master_transfers_split_maxsize.attr,
 	NULL,
 };
 
@@ -2237,6 +2241,113 @@ struct spi_replaced_transfers *spi_replace_transfers(
 }
 EXPORT_SYMBOL_GPL(spi_replace_transfers);
 
+int __spi_split_transfer_maxsize(struct spi_master *master,
+				 struct spi_message *msg,
+				 struct spi_transfer **xferp,
+				 size_t maxsize,
+				 gfp_t gfp)
+{
+	struct spi_transfer *xfer = *xferp, *xfers;
+	struct spi_replaced_transfers *srt;
+	size_t offset;
+	size_t count, i;
+
+	/* warn once about this fact that we are splitting a transfer */
+	dev_warn_once(&msg->spi->dev,
+		      "spi_transfer of length %i exceed max length of %i - needed to split transfers\n",
+		      xfer->len, maxsize);
+
+	/* calculate how many we have to replace */
+	count = DIV_ROUND_UP(xfer->len, maxsize);
+
+	/* create replacement */
+	srt = spi_replace_transfers(msg, xfer, 1, count, NULL, 0, gfp);
+	if (!srt)
+		return -ENOMEM;
+	xfers = srt->inserted_transfers;
+
+	/* now handle each of those newly inserted spi_transfers
+	 * note that the replacements spi_transfers all are preset
+	 * to the same values as *xferp, so tx_buf, rx_buf and len
+	 * are all identical (as well as most others)
+	 * so we just have to fix up len and the pointers.
+	 *
+	 * this also includes support for the depreciated
+	 * spi_message.is_dma_mapped interface
+	 */
+
+	/* the first transfer just needs the length modified, so we
+	 * run it outside the loop
+	 */
+	xfers[0].len = min(maxsize, xfer[0].len);
+
+	/* all the others need rx_buf/tx_buf also set */
+	for (i = 1, offset = maxsize; i < count; offset += maxsize, i++) {
+		/* update rx_buf, tx_buf and dma */
+		if (xfers[i].rx_buf)
+			xfers[i].rx_buf += offset;
+		if (xfers[i].rx_dma)
+			xfers[i].rx_dma += offset;
+		if (xfers[i].tx_buf)
+			xfers[i].tx_buf += offset;
+		if (xfers[i].tx_dma)
+			xfers[i].tx_dma += offset;
+
+		/* update length */
+		xfers[i].len = min(maxsize, xfers[i].len - offset);
+	}
+
+	/* we set up xferp to the last entry we have inserted,
+	 * so that we skip those already split transfers
+	 */
+	*xferp = &xfers[count - 1];
+
+	/* increment statistics counters */
+	SPI_STATISTICS_INCREMENT_FIELD(&master->statistics,
+				       transfers_split_maxsize);
+	SPI_STATISTICS_INCREMENT_FIELD(&msg->spi->statistics,
+				       transfers_split_maxsize);
+
+	return 0;
+}
+
+/**
+ * spi_split_tranfers_maxsize - split spi transfers into multiple transfers
+ *                              when an individual transfer exceeds a
+ *                              certain size
+ * @master:    the @spi_master for this transfer
+ * @message:   the @spi_message to transform
+ * @max_size:  the maximum when to apply this
+ *
+ * Return: status of transformation
+ */
+int spi_split_transfers_maxsize(struct spi_master *master,
+				struct spi_message *msg,
+				size_t maxsize,
+				gfp_t gfp)
+{
+	struct spi_transfer *xfer;
+	int ret;
+
+	/* iterate over the transfer_list,
+	 * but note that xfer is advanced to the last transfer inserted
+	 * to avoid checking sizes again unnecessarily (also xfer does
+	 * potentiall belong to a different list by the time the
+	 * replacement has happened
+	 */
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		if (xfer->len > maxsize) {
+			ret = __spi_split_transfer_maxsize(
+				master, msg, &xfer, maxsize, gfp);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spi_split_transfers_maxsize);
+
 /*-------------------------------------------------------------------------*/
 
 /* Core methods for SPI master protocol drivers.  Some of the
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index d71385756fee..3c02b4d06268 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -53,6 +53,10 @@ extern struct bus_type spi_bus_type;
  *
  * @transfer_bytes_histo:
  *                 transfer bytes histogramm
+ *
+ * @transfers_split_maxsize:
+ *                 number of transfers that have been split because of
+ *                 maxsize limit
  */
 struct spi_statistics {
 	spinlock_t		lock; /* lock for the whole structure */
@@ -72,6 +76,8 @@ struct spi_statistics {
 
 #define SPI_STATISTICS_HISTO_SIZE 17
 	unsigned long transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE];
+
+	unsigned long transfers_split_maxsize;
 };
 
 void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
@@ -935,6 +941,15 @@ extern struct spi_replaced_transfers *spi_replace_transfers(
 
 /*---------------------------------------------------------------------------*/
 
+/* SPI transfer transformation methods */
+
+extern int spi_split_transfers_maxsize(struct spi_master *master,
+				       struct spi_message *msg,
+				       size_t maxsize,
+				       gfp_t gfp);
+
+/*---------------------------------------------------------------------------*/
+
 /* All these synchronous SPI transfer routines are utilities layered
  * over the core async transfer primitive.  Here, "synchronous" means
  * they will sleep uninterruptibly until the async transfer completes.
-- 
cgit v1.2.3


From 556351f14e74db4cd3ddde386457edce7bf0b27f Mon Sep 17 00:00:00 2001
From: Vignesh R <vigneshr@ti.com>
Date: Fri, 11 Dec 2015 09:39:56 +0530
Subject: spi: introduce accelerated read support for spi flash devices

In addition to providing direct access to SPI bus, some spi controller
hardwares (like ti-qspi) provide special port (like memory mapped port)
that are optimized to improve SPI flash read performance.
This means the controller can automatically send the SPI signals
required to read data from the SPI flash device.
For this, SPI controller needs to know flash specific information like
read command to use, dummy bytes and address width.

Introduce spi_flash_read() interface to support accelerated read
over SPI flash devices. SPI master drivers can implement this callback to
support interfaces such as memory mapped read etc. m25p80 flash driver
and other flash drivers can call this make use of such interfaces. The
interface should only be used with SPI flashes and cannot be used with
other SPI devices.

Signed-off-by: Vignesh R <vigneshr@ti.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi.h | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 47eff8012a77..dcc6f6e92668 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1152,6 +1152,7 @@ static void __spi_pump_messages(struct spi_master *master, bool in_kthread)
 		}
 	}
 
+	mutex_lock(&master->bus_lock_mutex);
 	trace_spi_message_start(master->cur_msg);
 
 	if (master->prepare_message) {
@@ -1161,6 +1162,7 @@ static void __spi_pump_messages(struct spi_master *master, bool in_kthread)
 				"failed to prepare message: %d\n", ret);
 			master->cur_msg->status = ret;
 			spi_finalize_current_message(master);
+			mutex_unlock(&master->bus_lock_mutex);
 			return;
 		}
 		master->cur_msg_prepared = true;
@@ -1170,6 +1172,7 @@ static void __spi_pump_messages(struct spi_master *master, bool in_kthread)
 	if (ret) {
 		master->cur_msg->status = ret;
 		spi_finalize_current_message(master);
+		mutex_unlock(&master->bus_lock_mutex);
 		return;
 	}
 
@@ -1177,8 +1180,10 @@ static void __spi_pump_messages(struct spi_master *master, bool in_kthread)
 	if (ret) {
 		dev_err(&master->dev,
 			"failed to transfer one message from queue\n");
+		mutex_unlock(&master->bus_lock_mutex);
 		return;
 	}
+	mutex_unlock(&master->bus_lock_mutex);
 }
 
 /**
@@ -2351,6 +2356,46 @@ int spi_async_locked(struct spi_device *spi, struct spi_message *message)
 EXPORT_SYMBOL_GPL(spi_async_locked);
 
 
+int spi_flash_read(struct spi_device *spi,
+		   struct spi_flash_read_message *msg)
+
+{
+	struct spi_master *master = spi->master;
+	int ret;
+
+	if ((msg->opcode_nbits == SPI_NBITS_DUAL ||
+	     msg->addr_nbits == SPI_NBITS_DUAL) &&
+	    !(spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD)))
+		return -EINVAL;
+	if ((msg->opcode_nbits == SPI_NBITS_QUAD ||
+	     msg->addr_nbits == SPI_NBITS_QUAD) &&
+	    !(spi->mode & SPI_TX_QUAD))
+		return -EINVAL;
+	if (msg->data_nbits == SPI_NBITS_DUAL &&
+	    !(spi->mode & (SPI_RX_DUAL | SPI_RX_QUAD)))
+		return -EINVAL;
+	if (msg->data_nbits == SPI_NBITS_QUAD &&
+	    !(spi->mode &  SPI_RX_QUAD))
+		return -EINVAL;
+
+	if (master->auto_runtime_pm) {
+		ret = pm_runtime_get_sync(master->dev.parent);
+		if (ret < 0) {
+			dev_err(&master->dev, "Failed to power device: %d\n",
+				ret);
+			return ret;
+		}
+	}
+	mutex_lock(&master->bus_lock_mutex);
+	ret = master->spi_flash_read(spi, msg);
+	mutex_unlock(&master->bus_lock_mutex);
+	if (master->auto_runtime_pm)
+		pm_runtime_put(master->dev.parent);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(spi_flash_read);
+
 /*-------------------------------------------------------------------------*/
 
 /* Utility methods for SPI master protocol drivers, layered on
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 53be3a4c60cb..ba0dee9d05a3 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -25,6 +25,7 @@
 struct dma_chan;
 struct spi_master;
 struct spi_transfer;
+struct spi_flash_read_message;
 
 /*
  * INTERFACES between SPI master-side drivers and SPI infrastructure.
@@ -361,6 +362,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @handle_err: the subsystem calls the driver to handle an error that occurs
  *		in the generic implementation of transfer_one_message().
  * @unprepare_message: undo any work done by prepare_message().
+ * @spi_flash_read: to support spi-controller hardwares that provide
+ *                  accelerated interface to read from flash devices.
  * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
  *	number. Any individual value may be -ENOENT for CS lines that
  *	are not GPIOs (driven by the SPI controller itself).
@@ -513,6 +516,8 @@ struct spi_master {
 			       struct spi_message *message);
 	int (*unprepare_message)(struct spi_master *master,
 				 struct spi_message *message);
+	int (*spi_flash_read)(struct  spi_device *spi,
+			      struct spi_flash_read_message *msg);
 
 	/*
 	 * These hooks are for drivers that use a generic implementation
@@ -1019,6 +1024,42 @@ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd)
 	return be16_to_cpu(result);
 }
 
+/**
+ * struct spi_flash_read_message - flash specific information for
+ * spi-masters that provide accelerated flash read interfaces
+ * @buf: buffer to read data
+ * @from: offset within the flash from where data is to be read
+ * @len: length of data to be read
+ * @retlen: actual length of data read
+ * @read_opcode: read_opcode to be used to communicate with flash
+ * @addr_width: number of address bytes
+ * @dummy_bytes: number of dummy bytes
+ * @opcode_nbits: number of lines to send opcode
+ * @addr_nbits: number of lines to send address
+ * @data_nbits: number of lines for data
+ */
+struct spi_flash_read_message {
+	void *buf;
+	loff_t from;
+	size_t len;
+	size_t retlen;
+	u8 read_opcode;
+	u8 addr_width;
+	u8 dummy_bytes;
+	u8 opcode_nbits;
+	u8 addr_nbits;
+	u8 data_nbits;
+};
+
+/* SPI core interface for flash read support */
+static inline bool spi_flash_read_supported(struct spi_device *spi)
+{
+	return spi->master->spi_flash_read ? true : false;
+}
+
+int spi_flash_read(struct spi_device *spi,
+		   struct spi_flash_read_message *msg);
+
 /*---------------------------------------------------------------------------*/
 
 /*
-- 
cgit v1.2.3


From 868f2f0b72068a097508b6e8870a8950fd8eb7ef Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 17 Dec 2015 17:08:14 -0700
Subject: blk-mq: dynamic h/w context count

The hardware's provided queue count may change at runtime with resource
provisioning. This patch allows a block driver to alter the number of
h/w queues available when its resource count changes.

The main part is a new blk-mq API to request a new number of h/w queues
for a given live tag set. The new API freezes all queues using that set,
then adjusts the allocated count prior to remapping these to CPUs.

The bulk of the rest just shifts where h/w contexts and all their
artifacts are allocated and freed.

The number of max h/w contexts is capped to the number of possible cpus
since there is no use for more than that. As such, all pre-allocated
memory for pointers need to account for the max possible rather than
the initial number of queues.

A side effect of this is that the blk-mq will proceed successfully as
long as it can allocate at least one h/w context. Previously it would
fail request queue initialization if less than the requested number
was allocated.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Jon Derrick <jonathan.derrick@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sysfs.c   |   9 +--
 block/blk-mq.c         | 173 +++++++++++++++++++++++++++++--------------------
 block/blk-mq.h         |   1 +
 include/linux/blk-mq.h |   2 +
 4 files changed, 112 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 1cf18784c5cf..431fdda21737 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -408,17 +408,18 @@ void blk_mq_unregister_disk(struct gendisk *disk)
 	blk_mq_enable_hotplug();
 }
 
+void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
+{
+	kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
+}
+
 static void blk_mq_sysfs_init(struct request_queue *q)
 {
-	struct blk_mq_hw_ctx *hctx;
 	struct blk_mq_ctx *ctx;
 	int i;
 
 	kobject_init(&q->mq_kobj, &blk_mq_ktype);
 
-	queue_for_each_hw_ctx(q, hctx, i)
-		kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
-
 	queue_for_each_ctx(q, ctx, i)
 		kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4c0622fae413..645eb9e716d0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1742,31 +1742,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
 	return -1;
 }
 
-static int blk_mq_init_hw_queues(struct request_queue *q,
-		struct blk_mq_tag_set *set)
-{
-	struct blk_mq_hw_ctx *hctx;
-	unsigned int i;
-
-	/*
-	 * Initialize hardware queues
-	 */
-	queue_for_each_hw_ctx(q, hctx, i) {
-		if (blk_mq_init_hctx(q, set, hctx, i))
-			break;
-	}
-
-	if (i == q->nr_hw_queues)
-		return 0;
-
-	/*
-	 * Init failed
-	 */
-	blk_mq_exit_hw_queues(q, set, i);
-
-	return 1;
-}
-
 static void blk_mq_init_cpu_queues(struct request_queue *q,
 				   unsigned int nr_hw_queues)
 {
@@ -1824,6 +1799,7 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 			continue;
 
 		hctx = q->mq_ops->map_queue(q, i);
+
 		cpumask_set_cpu(i, hctx->cpumask);
 		ctx->index_hw = hctx->nr_ctx;
 		hctx->ctxs[hctx->nr_ctx++] = ctx;
@@ -1972,54 +1948,89 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 }
 EXPORT_SYMBOL(blk_mq_init_queue);
 
-struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
-						  struct request_queue *q)
+static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
+						struct request_queue *q)
 {
-	struct blk_mq_hw_ctx **hctxs;
-	struct blk_mq_ctx __percpu *ctx;
-	unsigned int *map;
-	int i;
-
-	ctx = alloc_percpu(struct blk_mq_ctx);
-	if (!ctx)
-		return ERR_PTR(-ENOMEM);
-
-	hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
-			set->numa_node);
-
-	if (!hctxs)
-		goto err_percpu;
-
-	map = blk_mq_make_queue_map(set);
-	if (!map)
-		goto err_map;
+	int i, j;
+	struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
 
+	blk_mq_sysfs_unregister(q);
 	for (i = 0; i < set->nr_hw_queues; i++) {
-		int node = blk_mq_hw_queue_to_node(map, i);
+		int node;
 
+		if (hctxs[i])
+			continue;
+
+		node = blk_mq_hw_queue_to_node(q->mq_map, i);
 		hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
 					GFP_KERNEL, node);
 		if (!hctxs[i])
-			goto err_hctxs;
+			break;
 
 		if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL,
-						node))
-			goto err_hctxs;
+						node)) {
+			kfree(hctxs[i]);
+			hctxs[i] = NULL;
+			break;
+		}
 
 		atomic_set(&hctxs[i]->nr_active, 0);
 		hctxs[i]->numa_node = node;
 		hctxs[i]->queue_num = i;
+
+		if (blk_mq_init_hctx(q, set, hctxs[i], i)) {
+			free_cpumask_var(hctxs[i]->cpumask);
+			kfree(hctxs[i]);
+			hctxs[i] = NULL;
+			break;
+		}
+		blk_mq_hctx_kobj_init(hctxs[i]);
 	}
+	for (j = i; j < q->nr_hw_queues; j++) {
+		struct blk_mq_hw_ctx *hctx = hctxs[j];
+
+		if (hctx) {
+			if (hctx->tags) {
+				blk_mq_free_rq_map(set, hctx->tags, j);
+				set->tags[j] = NULL;
+			}
+			blk_mq_exit_hctx(q, set, hctx, j);
+			free_cpumask_var(hctx->cpumask);
+			kobject_put(&hctx->kobj);
+			kfree(hctx->ctxs);
+			kfree(hctx);
+			hctxs[j] = NULL;
+
+		}
+	}
+	q->nr_hw_queues = i;
+	blk_mq_sysfs_register(q);
+}
+
+struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+						  struct request_queue *q)
+{
+	q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
+	if (!q->queue_ctx)
+		return ERR_PTR(-ENOMEM);
+
+	q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
+						GFP_KERNEL, set->numa_node);
+	if (!q->queue_hw_ctx)
+		goto err_percpu;
+
+	q->mq_map = blk_mq_make_queue_map(set);
+	if (!q->mq_map)
+		goto err_map;
+
+	blk_mq_realloc_hw_ctxs(set, q);
+	if (!q->nr_hw_queues)
+		goto err_hctxs;
 
 	INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
 	blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
 
 	q->nr_queues = nr_cpu_ids;
-	q->nr_hw_queues = set->nr_hw_queues;
-	q->mq_map = map;
-
-	q->queue_ctx = ctx;
-	q->queue_hw_ctx = hctxs;
 
 	q->mq_ops = set->ops;
 	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
@@ -2048,9 +2059,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 
-	if (blk_mq_init_hw_queues(q, set))
-		goto err_hctxs;
-
 	get_online_cpus();
 	mutex_lock(&all_q_mutex);
 
@@ -2064,17 +2072,11 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	return q;
 
 err_hctxs:
-	kfree(map);
-	for (i = 0; i < set->nr_hw_queues; i++) {
-		if (!hctxs[i])
-			break;
-		free_cpumask_var(hctxs[i]->cpumask);
-		kfree(hctxs[i]);
-	}
+	kfree(q->mq_map);
 err_map:
-	kfree(hctxs);
+	kfree(q->queue_hw_ctx);
 err_percpu:
-	free_percpu(ctx);
+	free_percpu(q->queue_ctx);
 	return ERR_PTR(-ENOMEM);
 }
 EXPORT_SYMBOL(blk_mq_init_allocated_queue);
@@ -2282,9 +2284,13 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 		set->nr_hw_queues = 1;
 		set->queue_depth = min(64U, set->queue_depth);
 	}
+	/*
+	 * There is no use for more h/w queues than cpus.
+	 */
+	if (set->nr_hw_queues > nr_cpu_ids)
+		set->nr_hw_queues = nr_cpu_ids;
 
-	set->tags = kmalloc_node(set->nr_hw_queues *
-				 sizeof(struct blk_mq_tags *),
+	set->tags = kzalloc_node(nr_cpu_ids * sizeof(struct blk_mq_tags *),
 				 GFP_KERNEL, set->numa_node);
 	if (!set->tags)
 		return -ENOMEM;
@@ -2307,7 +2313,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
 {
 	int i;
 
-	for (i = 0; i < set->nr_hw_queues; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		if (set->tags[i])
 			blk_mq_free_rq_map(set, set->tags[i], i);
 	}
@@ -2339,6 +2345,35 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 	return ret;
 }
 
+void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
+{
+	struct request_queue *q;
+
+	if (nr_hw_queues > nr_cpu_ids)
+		nr_hw_queues = nr_cpu_ids;
+	if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues)
+		return;
+
+	list_for_each_entry(q, &set->tag_list, tag_set_list)
+		blk_mq_freeze_queue(q);
+
+	set->nr_hw_queues = nr_hw_queues;
+	list_for_each_entry(q, &set->tag_list, tag_set_list) {
+		blk_mq_realloc_hw_ctxs(set, q);
+
+		if (q->nr_hw_queues > 1)
+			blk_queue_make_request(q, blk_mq_make_request);
+		else
+			blk_queue_make_request(q, blk_sq_make_request);
+
+		blk_mq_queue_reinit(q, cpu_online_mask);
+	}
+
+	list_for_each_entry(q, &set->tag_list, tag_set_list)
+		blk_mq_unfreeze_queue(q);
+}
+EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
+
 void blk_mq_disable_hotplug(void)
 {
 	mutex_lock(&all_q_mutex);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index eaede8e45c9c..9087b11037b7 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -57,6 +57,7 @@ extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
  */
 extern int blk_mq_sysfs_register(struct request_queue *q);
 extern void blk_mq_sysfs_unregister(struct request_queue *q);
+extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
 
 extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 7fc9296b5742..15a73d49fd1d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -244,6 +244,8 @@ void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_mq_freeze_queue_start(struct request_queue *q);
 
+void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
+
 /*
  * Driver command data is immediately after the request. So subtract request
  * size to get back to the original request, add request size to get the PDU.
-- 
cgit v1.2.3


From 9f8ea969d5cfdd4353d2adb004e8e2286b984369 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:33 +0530
Subject: PM / OPP: get/put regulators from OPP core

This allows the OPP core to request/free the regulator resource,
attached to a device OPP. The regulator device is fetched using the name
provided by the driver, while calling: dev_pm_opp_set_regulator().

This will work for both OPP-v1 and v2 bindings.

This is a preliminary step for moving the OPP switching logic into the
OPP core.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/opp/core.c | 111 ++++++++++++++++++++++++++++++++++++++++++
 drivers/base/power/opp/opp.h  |   4 ++
 include/linux/pm_opp.h        |   9 ++++
 3 files changed, 124 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index cf351d3dab1c..1e22b71abf1e 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -19,6 +19,7 @@
 #include <linux/device.h>
 #include <linux/of.h>
 #include <linux/export.h>
+#include <linux/regulator/consumer.h>
 
 #include "opp.h"
 
@@ -565,6 +566,9 @@ static void _remove_device_opp(struct device_opp *dev_opp)
 	if (dev_opp->prop_name)
 		return;
 
+	if (!IS_ERR_OR_NULL(dev_opp->regulator))
+		return;
+
 	list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp,
 				    node);
 
@@ -1085,6 +1089,113 @@ unlock:
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
 
+/**
+ * dev_pm_opp_set_regulator() - Set regulator name for the device
+ * @dev: Device for which regulator name is being set.
+ * @name: Name of the regulator.
+ *
+ * In order to support OPP switching, OPP layer needs to know the name of the
+ * device's regulator, as the core would be required to switch voltages as well.
+ *
+ * This must be called before any OPPs are initialized for the device.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_set_regulator(struct device *dev, const char *name)
+{
+	struct device_opp *dev_opp;
+	struct regulator *reg;
+	int ret;
+
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = _add_device_opp(dev);
+	if (!dev_opp) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	/* This should be called before OPPs are initialized */
+	if (WARN_ON(!list_empty(&dev_opp->opp_list))) {
+		ret = -EBUSY;
+		goto err;
+	}
+
+	/* Already have a regulator set */
+	if (WARN_ON(!IS_ERR_OR_NULL(dev_opp->regulator))) {
+		ret = -EBUSY;
+		goto err;
+	}
+	/* Allocate the regulator */
+	reg = regulator_get_optional(dev, name);
+	if (IS_ERR(reg)) {
+		ret = PTR_ERR(reg);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "%s: no regulator (%s) found: %d\n",
+				__func__, name, ret);
+		goto err;
+	}
+
+	dev_opp->regulator = reg;
+
+	mutex_unlock(&dev_opp_list_lock);
+	return 0;
+
+err:
+	_remove_device_opp(dev_opp);
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator);
+
+/**
+ * dev_pm_opp_put_regulator() - Releases resources blocked for regulator
+ * @dev: Device for which regulator was set.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_put_regulator(struct device *dev)
+{
+	struct device_opp *dev_opp;
+
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Check for existing list for 'dev' first */
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp));
+		goto unlock;
+	}
+
+	if (IS_ERR_OR_NULL(dev_opp->regulator)) {
+		dev_err(dev, "%s: Doesn't have regulator set\n", __func__);
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	regulator_put(dev_opp->regulator);
+	dev_opp->regulator = ERR_PTR(-EINVAL);
+
+	/* Try freeing device_opp if this was the last blocking resource */
+	_remove_device_opp(dev_opp);
+
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulator);
+
 static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp,
 			      struct device_node *np)
 {
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index 690638ef36ee..416293b7da23 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -22,6 +22,8 @@
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 
+struct regulator;
+
 /* Lock to allow exclusive modification to the device and opp lists */
 extern struct mutex dev_opp_list_lock;
 
@@ -132,6 +134,7 @@ struct device_list_opp {
  * @supported_hw: Array of version number to support.
  * @supported_hw_count: Number of elements in supported_hw array.
  * @prop_name: A name to postfix to many DT properties, while parsing them.
+ * @regulator: Supply regulator
  * @dentry:	debugfs dentry pointer of the real device directory (not links).
  * @dentry_name: Name of the real dentry.
  *
@@ -159,6 +162,7 @@ struct device_opp {
 	unsigned int *supported_hw;
 	unsigned int supported_hw_count;
 	const char *prop_name;
+	struct regulator *regulator;
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dentry;
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 95403d2ccaf5..c70a18ac9c8a 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -60,6 +60,8 @@ int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
 void dev_pm_opp_put_supported_hw(struct device *dev);
 int dev_pm_opp_set_prop_name(struct device *dev, const char *name);
 void dev_pm_opp_put_prop_name(struct device *dev);
+int dev_pm_opp_set_regulator(struct device *dev, const char *name);
+void dev_pm_opp_put_regulator(struct device *dev);
 #else
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
@@ -151,6 +153,13 @@ static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 
 static inline void dev_pm_opp_put_prop_name(struct device *dev) {}
 
+static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name)
+{
+	return -EINVAL;
+}
+
+static inline void dev_pm_opp_put_regulator(struct device *dev) {}
+
 #endif		/* CONFIG_PM_OPP */
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
-- 
cgit v1.2.3


From 655c9df961751ce21466f6e97e8033932c27a675 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:35 +0530
Subject: PM / OPP: Introduce dev_pm_opp_get_max_volt_latency()

In few use cases (like: cpufreq), it is desired to get the maximum
voltage latency for changing OPPs. Add support for that.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/opp/core.c | 59 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm_opp.h        |  6 +++++
 2 files changed, 65 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 71545becfca1..ffe2406af882 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -230,6 +230,65 @@ unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency);
 
+/**
+ * dev_pm_opp_get_max_volt_latency() - Get max voltage latency in nanoseconds
+ * @dev: device for which we do this operation
+ *
+ * Return: This function returns the max voltage latency in nanoseconds.
+ *
+ * Locking: This function takes rcu_read_lock().
+ */
+unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
+{
+	struct device_opp *dev_opp;
+	struct dev_pm_opp *opp;
+	struct regulator *reg;
+	unsigned long latency_ns = 0;
+	unsigned long min_uV = ~0, max_uV = 0;
+	int ret;
+
+	rcu_read_lock();
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	reg = dev_opp->regulator;
+	if (IS_ERR_OR_NULL(reg)) {
+		/* Regulator may not be required for device */
+		if (reg)
+			dev_err(dev, "%s: Invalid regulator (%ld)\n", __func__,
+				PTR_ERR(reg));
+		rcu_read_unlock();
+		return 0;
+	}
+
+	list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) {
+		if (!opp->available)
+			continue;
+
+		if (opp->u_volt_min < min_uV)
+			min_uV = opp->u_volt_min;
+		if (opp->u_volt_max > max_uV)
+			max_uV = opp->u_volt_max;
+	}
+
+	rcu_read_unlock();
+
+	/*
+	 * The caller needs to ensure that dev_opp (and hence the regulator)
+	 * isn't freed, while we are executing this routine.
+	 */
+	ret = regulator_set_voltage_time(reg, min_uV, max_uV);
+	if (ret > 0)
+		latency_ns = ret * 1000;
+
+	return latency_ns;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_volt_latency);
+
 /**
  * dev_pm_opp_get_suspend_opp() - Get suspend opp
  * @dev:	device for which we do this operation
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index c70a18ac9c8a..5daa43058ac1 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -34,6 +34,7 @@ bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp);
 
 int dev_pm_opp_get_opp_count(struct device *dev);
 unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev);
+unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev);
 struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev);
 
 struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
@@ -88,6 +89,11 @@ static inline unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev)
 	return 0;
 }
 
+static inline unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
+{
+	return 0;
+}
+
 static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev)
 {
 	return NULL;
-- 
cgit v1.2.3


From 2174344765f472895c076d703c9cdc58215e1393 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:36 +0530
Subject: PM / OPP: Introduce dev_pm_opp_get_max_transition_latency()

In few use cases (like: cpufreq), it is desired to get the maximum
latency for changing OPPs. Add support for that.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/opp/core.c | 17 +++++++++++++++++
 include/linux/pm_opp.h        |  6 ++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index ffe2406af882..b0f5c72f0fc3 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -289,6 +289,23 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_volt_latency);
 
+/**
+ * dev_pm_opp_get_max_transition_latency() - Get max transition latency in
+ *					     nanoseconds
+ * @dev: device for which we do this operation
+ *
+ * Return: This function returns the max transition latency, in nanoseconds, to
+ * switch from one OPP to other.
+ *
+ * Locking: This function takes rcu_read_lock().
+ */
+unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev)
+{
+	return dev_pm_opp_get_max_volt_latency(dev) +
+		dev_pm_opp_get_max_clock_latency(dev);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_transition_latency);
+
 /**
  * dev_pm_opp_get_suspend_opp() - Get suspend opp
  * @dev:	device for which we do this operation
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 5daa43058ac1..59da3d9e11ea 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -35,6 +35,7 @@ bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp);
 int dev_pm_opp_get_opp_count(struct device *dev);
 unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev);
 unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev);
+unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev);
 struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev);
 
 struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
@@ -94,6 +95,11 @@ static inline unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 	return 0;
 }
 
+static inline unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev)
+{
+	return 0;
+}
+
 static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev)
 {
 	return NULL;
-- 
cgit v1.2.3


From 6a0712f6f199e737aa5913d28ec4bd3a25de9660 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 10:30:39 +0530
Subject: PM / OPP: Add dev_pm_opp_set_rate()

This adds a routine, dev_pm_opp_set_rate(), responsible for configuring
power-supply and clock source for an OPP.

The OPP is found by matching against the target_freq passed to the
routine. This shall replace similar code present in most of the OPP
users and help simplify them a lot.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/opp/core.c | 176 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm_opp.h        |   6 ++
 2 files changed, 182 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 7d7749ce1ce4..ab711c2c3e00 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -529,6 +529,182 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
 
+/*
+ * The caller needs to ensure that device_opp (and hence the clk) isn't freed,
+ * while clk returned here is used.
+ */
+static struct clk *_get_opp_clk(struct device *dev)
+{
+	struct device_opp *dev_opp;
+	struct clk *clk;
+
+	rcu_read_lock();
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "%s: device opp doesn't exist\n", __func__);
+		clk = ERR_CAST(dev_opp);
+		goto unlock;
+	}
+
+	clk = dev_opp->clk;
+	if (IS_ERR(clk))
+		dev_err(dev, "%s: No clock available for the device\n",
+			__func__);
+
+unlock:
+	rcu_read_unlock();
+	return clk;
+}
+
+static int _set_opp_voltage(struct device *dev, struct regulator *reg,
+			    unsigned long u_volt, unsigned long u_volt_min,
+			    unsigned long u_volt_max)
+{
+	int ret;
+
+	/* Regulator not available for device */
+	if (IS_ERR(reg)) {
+		dev_dbg(dev, "%s: regulator not available: %ld\n", __func__,
+			PTR_ERR(reg));
+		return 0;
+	}
+
+	dev_dbg(dev, "%s: voltages (mV): %lu %lu %lu\n", __func__, u_volt_min,
+		u_volt, u_volt_max);
+
+	ret = regulator_set_voltage_triplet(reg, u_volt_min, u_volt,
+					    u_volt_max);
+	if (ret)
+		dev_err(dev, "%s: failed to set voltage (%lu %lu %lu mV): %d\n",
+			__func__, u_volt_min, u_volt, u_volt_max, ret);
+
+	return ret;
+}
+
+/**
+ * dev_pm_opp_set_rate() - Configure new OPP based on frequency
+ * @dev:	 device for which we do this operation
+ * @target_freq: frequency to achieve
+ *
+ * This configures the power-supplies and clock source to the levels specified
+ * by the OPP corresponding to the target_freq.
+ *
+ * Locking: This function takes rcu_read_lock().
+ */
+int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
+{
+	struct device_opp *dev_opp;
+	struct dev_pm_opp *old_opp, *opp;
+	struct regulator *reg;
+	struct clk *clk;
+	unsigned long freq, old_freq;
+	unsigned long u_volt, u_volt_min, u_volt_max;
+	unsigned long ou_volt, ou_volt_min, ou_volt_max;
+	int ret;
+
+	if (unlikely(!target_freq)) {
+		dev_err(dev, "%s: Invalid target frequency %lu\n", __func__,
+			target_freq);
+		return -EINVAL;
+	}
+
+	clk = _get_opp_clk(dev);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	freq = clk_round_rate(clk, target_freq);
+	if ((long)freq <= 0)
+		freq = target_freq;
+
+	old_freq = clk_get_rate(clk);
+
+	/* Return early if nothing to do */
+	if (old_freq == freq) {
+		dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n",
+			__func__, freq);
+		return 0;
+	}
+
+	rcu_read_lock();
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "%s: device opp doesn't exist\n", __func__);
+		rcu_read_unlock();
+		return PTR_ERR(dev_opp);
+	}
+
+	old_opp = dev_pm_opp_find_freq_ceil(dev, &old_freq);
+	if (!IS_ERR(old_opp)) {
+		ou_volt = old_opp->u_volt;
+		ou_volt_min = old_opp->u_volt_min;
+		ou_volt_max = old_opp->u_volt_max;
+	} else {
+		dev_err(dev, "%s: failed to find current OPP for freq %lu (%ld)\n",
+			__func__, old_freq, PTR_ERR(old_opp));
+	}
+
+	opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+	if (IS_ERR(opp)) {
+		ret = PTR_ERR(opp);
+		dev_err(dev, "%s: failed to find OPP for freq %lu (%d)\n",
+			__func__, freq, ret);
+		rcu_read_unlock();
+		return ret;
+	}
+
+	u_volt = opp->u_volt;
+	u_volt_min = opp->u_volt_min;
+	u_volt_max = opp->u_volt_max;
+
+	reg = dev_opp->regulator;
+
+	rcu_read_unlock();
+
+	/* Scaling up? Scale voltage before frequency */
+	if (freq > old_freq) {
+		ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min,
+				       u_volt_max);
+		if (ret)
+			goto restore_voltage;
+	}
+
+	/* Change frequency */
+
+	dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n",
+		__func__, old_freq, freq);
+
+	ret = clk_set_rate(clk, freq);
+	if (ret) {
+		dev_err(dev, "%s: failed to set clock rate: %d\n", __func__,
+			ret);
+		goto restore_voltage;
+	}
+
+	/* Scaling down? Scale voltage after frequency */
+	if (freq < old_freq) {
+		ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min,
+				       u_volt_max);
+		if (ret)
+			goto restore_freq;
+	}
+
+	return 0;
+
+restore_freq:
+	if (clk_set_rate(clk, old_freq))
+		dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n",
+			__func__, old_freq);
+restore_voltage:
+	/* This shouldn't harm even if the voltages weren't updated earlier */
+	if (!IS_ERR(old_opp))
+		_set_opp_voltage(dev, reg, ou_volt, ou_volt_min, ou_volt_max);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate);
+
 /* List-dev Helpers */
 static void _kfree_list_dev_rcu(struct rcu_head *head)
 {
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 59da3d9e11ea..cccaf4a29e9f 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -64,6 +64,7 @@ int dev_pm_opp_set_prop_name(struct device *dev, const char *name);
 void dev_pm_opp_put_prop_name(struct device *dev);
 int dev_pm_opp_set_regulator(struct device *dev, const char *name);
 void dev_pm_opp_put_regulator(struct device *dev);
+int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
 #else
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
@@ -172,6 +173,11 @@ static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name)
 
 static inline void dev_pm_opp_put_regulator(struct device *dev) {}
 
+static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
+{
+	return -EINVAL;
+}
+
 #endif		/* CONFIG_PM_OPP */
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
-- 
cgit v1.2.3


From b7172119714781d15128cd1ae029bd028b0bf37a Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Wed, 10 Feb 2016 14:17:41 +0100
Subject: apple-gmux: Fix build breakage if !CONFIG_ACPI

The DRM drivers i915, nouveau and radeon may be compiled with
CONFIG_ACPI not set, in which case acpi_dev_present() is undefined.

Add a no-op stub for apple_gmux_present() which is used if
CONFIG_APPLE_GMUX is not enabled to avoid build breakage.
(CONFIG_APPLE_GMUX depends on CONFIG_ACPI.)

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20160210131741.GA15492@wunner.de
---
 include/linux/apple-gmux.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/apple-gmux.h b/include/linux/apple-gmux.h
index feebc2840462..b2d32e01dfe4 100644
--- a/include/linux/apple-gmux.h
+++ b/include/linux/apple-gmux.h
@@ -22,6 +22,8 @@
 
 #define GMUX_ACPI_HID "APP000B"
 
+#if IS_ENABLED(CONFIG_APPLE_GMUX)
+
 /**
  * apple_gmux_present() - detect if gmux is built into the machine
  *
@@ -33,7 +35,16 @@
  */
 static inline bool apple_gmux_present(void)
 {
-	return IS_ENABLED(CONFIG_APPLE_GMUX) && acpi_dev_present(GMUX_ACPI_HID);
+	return acpi_dev_present(GMUX_ACPI_HID);
 }
 
+#else  /* !CONFIG_APPLE_GMUX */
+
+static inline bool apple_gmux_present(void)
+{
+	return false;
+}
+
+#endif /* !CONFIG_APPLE_GMUX */
+
 #endif /* LINUX_APPLE_GMUX_H */
-- 
cgit v1.2.3


From d9b3fca27385eafe61c3ca6feab6cb1e7dc77482 Mon Sep 17 00:00:00 2001
From: Craig Gallek <kraig@google.com>
Date: Wed, 10 Feb 2016 11:50:37 -0500
Subject: tcp: __tcp_hdrlen() helper

tcp_hdrlen is wasteful if you already have a pointer to struct tcphdr.
This splits the size calculation into a helper function that can be
used if a struct tcphdr is already available.

Signed-off-by: Craig Gallek <kraig@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index d909feeeaea2..bcbf51da4e1e 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -29,9 +29,14 @@ static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
 	return (struct tcphdr *)skb_transport_header(skb);
 }
 
+static inline unsigned int __tcp_hdrlen(const struct tcphdr *th)
+{
+	return th->doff * 4;
+}
+
 static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
 {
-	return tcp_hdr(skb)->doff * 4;
+	return __tcp_hdrlen(tcp_hdr(skb));
 }
 
 static inline struct tcphdr *inner_tcp_hdr(const struct sk_buff *skb)
-- 
cgit v1.2.3


From abbc30436d39dfed8ebfca338d253f211ac7b094 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 4 Feb 2016 13:31:19 +0100
Subject: ipv6: add option to drop unicast encapsulated in L2 multicast

In order to solve a problem with 802.11, the so-called hole-196 attack,
add an option (sysctl) called "drop_unicast_in_l2_multicast" which, if
enabled, causes the stack to drop IPv6 unicast packets encapsulated in
link-layer multi- or broadcast frames. Such frames can (as an attack)
be created by any member of the same wireless network and transmitted
as valid encrypted frames since the symmetric key for broadcast frames
is shared between all stations.

Reviewed-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  6 ++++++
 include/linux/ipv6.h                   |  1 +
 include/uapi/linux/ipv6.h              |  1 +
 net/ipv6/addrconf.c                    |  8 ++++++++
 net/ipv6/ip6_input.c                   | 10 ++++++++++
 5 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a53bbfaff1c7..e0e7350a4e6a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1674,6 +1674,12 @@ stable_secret - IPv6 address
 
 	By default the stable secret is unset.
 
+drop_unicast_in_l2_multicast - BOOLEAN
+	Drop any unicast IPv6 packets that are received in link-layer
+	multicast (or broadcast) frames.
+
+	By default this is turned off.
+
 icmp/*:
 ratelimit - INTEGER
 	Limit the maximal rates for sending ICMPv6 packets.
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 402753bccafa..4a4c1ae826cb 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -50,6 +50,7 @@ struct ipv6_devconf {
 	__s32		mc_forwarding;
 #endif
 	__s32		disable_ipv6;
+	__s32		drop_unicast_in_l2_multicast;
 	__s32		accept_dad;
 	__s32		force_tllao;
 	__s32           ndisc_notify;
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 38b4fef20219..4c413570efe8 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -174,6 +174,7 @@ enum {
 	DEVCONF_USE_OIF_ADDRS_ONLY,
 	DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT,
 	DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN,
+	DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 38eeddedfc21..23e325f39f8e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4711,6 +4711,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
 	/* we omit DEVCONF_STABLE_SECRET for now */
 	array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
+	array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -5784,6 +5785,13 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= addrconf_sysctl_ignore_routes_with_linkdown,
 		},
+		{
+			.procname	= "drop_unicast_in_l2_multicast",
+			.data		= &ipv6_devconf.drop_unicast_in_l2_multicast,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
 		{
 			/* sentinel */
 		}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 9075acf081dd..31ac3c56da4b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -134,6 +134,16 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	    IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1)
 		goto err;
 
+	/* If enabled, drop unicast packets that were encapsulated in link-layer
+	 * multicast or broadcast to protected against the so-called "hole-196"
+	 * attack in 802.11 wireless.
+	 */
+	if (!ipv6_addr_is_multicast(&hdr->daddr) &&
+	    (skb->pkt_type == PACKET_BROADCAST ||
+	     skb->pkt_type == PACKET_MULTICAST) &&
+	    idev->cnf.drop_unicast_in_l2_multicast)
+		goto err;
+
 	/* RFC4291 2.7
 	 * Nodes must not originate a packet to a multicast address whose scope
 	 * field contains the reserved value 0; if such a packet is received, it
-- 
cgit v1.2.3


From 7a02bf892d8f1e5298af1676f001bee410509d80 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 4 Feb 2016 13:31:20 +0100
Subject: ipv6: add option to drop unsolicited neighbor advertisements

In certain 802.11 wireless deployments, there will be NA proxies
that use knowledge of the network to correctly answer requests.
To prevent unsolicitd advertisements on the shared medium from
being a problem, on such deployments wireless needs to drop them.

Enable this by providing an option called "drop_unsolicited_na".

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 7 +++++++
 include/linux/ipv6.h                   | 1 +
 include/uapi/linux/ipv6.h              | 1 +
 net/ipv6/addrconf.c                    | 8 ++++++++
 net/ipv6/ndisc.c                       | 9 +++++++++
 5 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index e0e7350a4e6a..24ce97f42d35 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1680,6 +1680,13 @@ drop_unicast_in_l2_multicast - BOOLEAN
 
 	By default this is turned off.
 
+drop_unsolicited_na - BOOLEAN
+	Drop all unsolicited neighbor advertisements, for example if there's
+	a known good NA proxy on the network and such frames need not be used
+	(or in the case of 802.11, must not be used to prevent attacks.)
+
+	By default this is turned off.
+
 icmp/*:
 ratelimit - INTEGER
 	Limit the maximal rates for sending ICMPv6 packets.
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 4a4c1ae826cb..4b2267e1b7c3 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -56,6 +56,7 @@ struct ipv6_devconf {
 	__s32           ndisc_notify;
 	__s32		suppress_frag_ndisc;
 	__s32		accept_ra_mtu;
+	__s32		drop_unsolicited_na;
 	struct ipv6_stable_secret {
 		bool initialized;
 		struct in6_addr secret;
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 4c413570efe8..ec117b65d5a5 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -175,6 +175,7 @@ enum {
 	DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT,
 	DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN,
 	DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
+	DEVCONF_DROP_UNSOLICITED_NA,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 23e325f39f8e..ac0ba9e4e06b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4712,6 +4712,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	/* we omit DEVCONF_STABLE_SECRET for now */
 	array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
 	array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
+	array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -5792,6 +5793,13 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
+		{
+			.procname	= "drop_unsolicited_na",
+			.data		= &ipv6_devconf.drop_unsolicited_na,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
 		{
 			/* sentinel */
 		}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 84afb9a77278..c245895a3d41 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -883,6 +883,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
 				    offsetof(struct nd_msg, opt));
 	struct ndisc_options ndopts;
 	struct net_device *dev = skb->dev;
+	struct inet6_dev *idev = __in6_dev_get(dev);
 	struct inet6_ifaddr *ifp;
 	struct neighbour *neigh;
 
@@ -902,6 +903,14 @@ static void ndisc_recv_na(struct sk_buff *skb)
 		return;
 	}
 
+	/* For some 802.11 wireless deployments (and possibly other networks),
+	 * there will be a NA proxy and unsolicitd packets are attacks
+	 * and thus should not be accepted.
+	 */
+	if (!msg->icmph.icmp6_solicited && idev &&
+	    idev->cnf.drop_unsolicited_na)
+		return;
+
 	if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
 		ND_PRINTK(2, warn, "NS: invalid ND option\n");
 		return;
-- 
cgit v1.2.3


From 76443456227097179c14826425f88a95d81a892e Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Fri, 5 Feb 2016 15:27:37 -0800
Subject: net: Move GSO csum into SKB_GSO_CB

This patch moves the checksum maintained by GSO out of skb->csum and into
the GSO context block in order to allow for us to work on outer checksums
while maintaining the inner checksum offsets in the case of the inner
checksum being offloaded, while the outer checksums will be computed.

While updating the code I also did a minor cleanu-up on gso_make_checksum.
The change is mostly to make it so that we store the values and compute the
checksum instead of computing the checksum and then storing the values we
needed to update.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Acked-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 14 +++++++-------
 net/core/skbuff.c      | 16 +++++++++-------
 2 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 11f935c1a090..acece7ce376f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3549,6 +3549,7 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 struct skb_gso_cb {
 	int	mac_offset;
 	int	encap_level;
+	__wsum	csum;
 	__u16	csum_start;
 };
 #define SKB_SGO_CB_OFFSET	32
@@ -3585,15 +3586,14 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra)
  */
 static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
 {
-	int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) -
-		   skb_transport_offset(skb);
-	__wsum partial;
+	unsigned char *csum_start = skb_transport_header(skb);
+	int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start;
+	__wsum partial = SKB_GSO_CB(skb)->csum;
 
-	partial = csum_partial(skb_transport_header(skb), plen, skb->csum);
-	skb->csum = res;
-	SKB_GSO_CB(skb)->csum_start -= plen;
+	SKB_GSO_CB(skb)->csum = res;
+	SKB_GSO_CB(skb)->csum_start = csum_start - skb->head;
 
-	return csum_fold(partial);
+	return csum_fold(csum_partial(csum_start, plen, partial));
 }
 
 static inline bool skb_is_gso(const struct sk_buff *skb)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b2df375ec9c2..02c638a643ea 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3100,11 +3100,12 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 
 		if (!sg && !nskb->remcsum_offload) {
 			nskb->ip_summed = CHECKSUM_NONE;
-			nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
-							    skb_put(nskb, len),
-							    len, 0);
+			SKB_GSO_CB(nskb)->csum =
+				skb_copy_and_csum_bits(head_skb, offset,
+						       skb_put(nskb, len),
+						       len, 0);
 			SKB_GSO_CB(nskb)->csum_start =
-			    skb_headroom(nskb) + doffset;
+				skb_headroom(nskb) + doffset;
 			continue;
 		}
 
@@ -3171,11 +3172,12 @@ skip_fraglist:
 
 perform_csum_check:
 		if (!csum && !nskb->remcsum_offload) {
-			nskb->csum = skb_checksum(nskb, doffset,
-						  nskb->len - doffset, 0);
 			nskb->ip_summed = CHECKSUM_NONE;
+			SKB_GSO_CB(nskb)->csum =
+				skb_checksum(nskb, doffset,
+					     nskb->len - doffset, 0);
 			SKB_GSO_CB(nskb)->csum_start =
-			    skb_headroom(nskb) + doffset;
+				skb_headroom(nskb) + doffset;
 		}
 	} while ((offset += len) < head_skb->len);
 
-- 
cgit v1.2.3


From 08b64fcca942733413bc5ac2321d57021d3e8578 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Fri, 5 Feb 2016 15:27:49 -0800
Subject: net: Store checksum result for offloaded GSO checksums

This patch makes it so that we can offload the checksums for a packet up
to a certain point and then begin computing the checksums via software.
Setting this up is fairly straight forward as all we need to do is reset
the values stored in csum and csum_start for the GSO context block.

One complication for this is remote checksum offload.  In order to allow
the inner checksums to be offloaded while computing the outer checksum
manually we needed to have some way of indicating that the offload wasn't
real.  In order to do that I replaced CHECKSUM_PARTIAL with
CHECKSUM_UNNECESSARY in the case of us computing checksums for the outer
header while skipping computing checksums for the inner headers.  We clean
up the ip_summed flag and set it to either CHECKSUM_PARTIAL or
CHECKSUM_NONE once we hand the packet off to the next lower level.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 15 +++++++++++++++
 net/ipv4/tcp_offload.c |  8 ++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index acece7ce376f..a8fc2220e8ce 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2161,6 +2161,11 @@ static inline int skb_checksum_start_offset(const struct sk_buff *skb)
 	return skb->csum_start - skb_headroom(skb);
 }
 
+static inline unsigned char *skb_checksum_start(const struct sk_buff *skb)
+{
+	return skb->head + skb->csum_start;
+}
+
 static inline int skb_transport_offset(const struct sk_buff *skb)
 {
 	return skb_transport_header(skb) - skb->data;
@@ -3576,6 +3581,16 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra)
 	return 0;
 }
 
+static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res)
+{
+	/* Do not update partial checksums if remote checksum is enabled. */
+	if (skb->remcsum_offload)
+		return;
+
+	SKB_GSO_CB(skb)->csum = res;
+	SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head;
+}
+
 /* Compute the checksum for a gso segment. First compute the checksum value
  * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and
  * then add in skb->csum (checksum from csum_start to end of packet).
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 9864a2dbadce..773083b7f1e9 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -135,7 +135,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 		th->fin = th->psh = 0;
 		th->check = newcheck;
 
-		if (skb->ip_summed != CHECKSUM_PARTIAL)
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
+			gso_reset_checksum(skb, ~th->check);
+		else
 			th->check = gso_make_checksum(skb, ~th->check);
 
 		seq += mss;
@@ -169,7 +171,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 		      skb->data_len);
 	th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
 				(__force u32)delta));
-	if (skb->ip_summed != CHECKSUM_PARTIAL)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		gso_reset_checksum(skb, ~th->check);
+	else
 		th->check = gso_make_checksum(skb, ~th->check);
 out:
 	return segs;
-- 
cgit v1.2.3


From 815c52700746cdcc0874a33390bac334a4b90107 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <kernel@kyup.com>
Date: Mon, 8 Feb 2016 23:29:21 +0200
Subject: igmp: Namespaceify igmp_max_memberships sysctl knob

Signed-off-by: Nikolay Borisov <kernel@kyup.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h       |  1 -
 include/net/netns/ipv4.h   |  2 ++
 net/ipv4/igmp.c            |  4 +---
 net/ipv4/sysctl_net_ipv4.c | 14 +++++++-------
 net/ipv4/tcp_ipv4.c        |  2 ++
 5 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 9c9de11549a7..57d6d06ce0b3 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -38,7 +38,6 @@ static inline struct igmpv3_query *
 }
 
 extern int sysctl_igmp_llm_reports;
-extern int sysctl_igmp_max_memberships;
 extern int sysctl_igmp_max_msf;
 extern int sysctl_igmp_qrv;
 
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 4d6ec3f6fafe..759cf624eec2 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -108,6 +108,8 @@ struct netns_ipv4 {
 	int sysctl_tcp_fin_timeout;
 	unsigned int sysctl_tcp_notsent_lowat;
 
+	int sysctl_igmp_max_memberships;
+
 	struct ping_group_range ping_group_range;
 
 	atomic_t dev_addr_genid;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 05e4cba14162..5b86257c9d6b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -107,7 +107,6 @@
 #include <linux/seq_file.h>
 #endif
 
-#define IP_MAX_MEMBERSHIPS	20
 #define IP_MAX_MSF		10
 
 /* IGMP reports for link-local multicast groups are enabled by default */
@@ -1727,7 +1726,6 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
 /*
  *	Join a socket to a group
  */
-int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
 int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
 #ifdef CONFIG_IP_MULTICAST
 int sysctl_igmp_qrv __read_mostly = IGMP_QUERY_ROBUSTNESS_VARIABLE;
@@ -2074,7 +2072,7 @@ int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
 		count++;
 	}
 	err = -ENOBUFS;
-	if (count >= sysctl_igmp_max_memberships)
+	if (count >= net->ipv4.sysctl_igmp_max_memberships)
 		goto done;
 	iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
 	if (!iml)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 44bb59824267..6ea3dbb96db4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -367,13 +367,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-	{
-		.procname	= "igmp_max_memberships",
-		.data		= &sysctl_igmp_max_memberships,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
 	{
 		.procname	= "igmp_max_msf",
 		.data		= &sysctl_igmp_max_msf,
@@ -871,6 +864,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "igmp_max_memberships",
+		.data		= &init_net.ipv4.sysctl_igmp_max_memberships,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{
 		.procname	= "tcp_keepalive_time",
 		.data		= &init_net.ipv4.sysctl_tcp_keepalive_time,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3f872a6bc274..4b203789900b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2399,6 +2399,8 @@ static int __net_init tcp_sk_init(struct net *net)
 	net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
 	net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
 
+	net->ipv4.sysctl_igmp_max_memberships = 20;
+
 	return 0;
 fail:
 	tcp_sk_exit(net);
-- 
cgit v1.2.3


From 166b6b2d6f01be67a83b87ab5c91350a68b17115 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <kernel@kyup.com>
Date: Mon, 8 Feb 2016 23:29:22 +0200
Subject: igmp: Namespaceify igmp_max_msf sysctl knob

Signed-off-by: Nikolay Borisov <kernel@kyup.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h       |  1 -
 include/net/netns/ipv4.h   |  1 +
 net/ipv4/igmp.c            |  5 +----
 net/ipv4/ip_sockglue.c     |  5 +++--
 net/ipv4/sysctl_net_ipv4.c | 14 +++++++-------
 net/ipv4/tcp_ipv4.c        |  1 +
 6 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 57d6d06ce0b3..a91ec9f575e7 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -38,7 +38,6 @@ static inline struct igmpv3_query *
 }
 
 extern int sysctl_igmp_llm_reports;
-extern int sysctl_igmp_max_msf;
 extern int sysctl_igmp_qrv;
 
 struct ip_sf_socklist {
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 759cf624eec2..522a2cfe1ad9 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -109,6 +109,7 @@ struct netns_ipv4 {
 	unsigned int sysctl_tcp_notsent_lowat;
 
 	int sysctl_igmp_max_memberships;
+	int sysctl_igmp_max_msf;
 
 	struct ping_group_range ping_group_range;
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 5b86257c9d6b..6da2e467b63c 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -107,8 +107,6 @@
 #include <linux/seq_file.h>
 #endif
 
-#define IP_MAX_MSF		10
-
 /* IGMP reports for link-local multicast groups are enabled by default */
 int sysctl_igmp_llm_reports __read_mostly = 1;
 
@@ -1726,7 +1724,6 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
 /*
  *	Join a socket to a group
  */
-int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
 #ifdef CONFIG_IP_MULTICAST
 int sysctl_igmp_qrv __read_mostly = IGMP_QUERY_ROBUSTNESS_VARIABLE;
 #endif
@@ -2244,7 +2241,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 	}
 	/* else, add a new source to the filter */
 
-	if (psl && psl->sl_count >= sysctl_igmp_max_msf) {
+	if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) {
 		err = -ENOBUFS;
 		goto done;
 	}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5f73a7c03e27..92808f147ef5 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -571,6 +571,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			    int optname, char __user *optval, unsigned int optlen)
 {
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 	int val = 0, err;
 	bool needs_rtnl = setsockopt_needs_rtnl(optname);
 
@@ -910,7 +911,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		}
 		/* numsrc >= (1G-4) overflow in 32 bits */
 		if (msf->imsf_numsrc >= 0x3ffffffcU ||
-		    msf->imsf_numsrc > sysctl_igmp_max_msf) {
+		    msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
 			kfree(msf);
 			err = -ENOBUFS;
 			break;
@@ -1065,7 +1066,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 
 		/* numsrc >= (4G-140)/128 overflow in 32 bits */
 		if (gsf->gf_numsrc >= 0x1ffffff ||
-		    gsf->gf_numsrc > sysctl_igmp_max_msf) {
+		    gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
 			err = -ENOBUFS;
 			goto mc_msf_out;
 		}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 6ea3dbb96db4..225659a02cf2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -367,13 +367,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-	{
-		.procname	= "igmp_max_msf",
-		.data		= &sysctl_igmp_max_msf,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
 #ifdef CONFIG_IP_MULTICAST
 	{
 		.procname	= "igmp_qrv",
@@ -871,6 +864,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "igmp_max_msf",
+		.data		= &init_net.ipv4.sysctl_igmp_max_msf,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{
 		.procname	= "tcp_keepalive_time",
 		.data		= &init_net.ipv4.sysctl_tcp_keepalive_time,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4b203789900b..055d8a9a0c61 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2400,6 +2400,7 @@ static int __net_init tcp_sk_init(struct net *net)
 	net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
 
 	net->ipv4.sysctl_igmp_max_memberships = 20;
+	net->ipv4.sysctl_igmp_max_msf = 10;
 
 	return 0;
 fail:
-- 
cgit v1.2.3


From 87a8a2ae65b7721893c7922f963502be8fa01c94 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov@siteground.com>
Date: Tue, 9 Feb 2016 00:13:50 +0200
Subject: igmp: Namespaceify igmp_llm_reports sysctl knob

This was initially introduced in df2cf4a78e488d26 ("IGMP: Inhibit
reports for local multicast groups") by defining the sysctl in the
ipv4_net_table array, however it was never implemented to be
namespace aware. Fix this by changing the code accordingly.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h       |  1 -
 include/net/netns/ipv4.h   |  1 +
 net/ipv4/igmp.c            | 26 +++++++++++++++-----------
 net/ipv4/sysctl_net_ipv4.c |  2 +-
 net/ipv4/tcp_ipv4.c        |  2 ++
 5 files changed, 19 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index a91ec9f575e7..c683f4bf642b 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -37,7 +37,6 @@ static inline struct igmpv3_query *
 	return (struct igmpv3_query *)skb_transport_header(skb);
 }
 
-extern int sysctl_igmp_llm_reports;
 extern int sysctl_igmp_qrv;
 
 struct ip_sf_socklist {
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 522a2cfe1ad9..cbbf8115e8a7 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -110,6 +110,7 @@ struct netns_ipv4 {
 
 	int sysctl_igmp_max_memberships;
 	int sysctl_igmp_max_msf;
+	int sysctl_igmp_llm_reports;
 
 	struct ping_group_range ping_group_range;
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6da2e467b63c..2e22ee0efc98 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -107,9 +107,6 @@
 #include <linux/seq_file.h>
 #endif
 
-/* IGMP reports for link-local multicast groups are enabled by default */
-int sysctl_igmp_llm_reports __read_mostly = 1;
-
 #ifdef CONFIG_IP_MULTICAST
 /* Parameter names and values are taken from igmp-v2-06 draft */
 
@@ -430,6 +427,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 	int type, int gdeleted, int sdeleted)
 {
 	struct net_device *dev = pmc->interface->dev;
+	struct net *net = dev_net(dev);
 	struct igmpv3_report *pih;
 	struct igmpv3_grec *pgr = NULL;
 	struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
@@ -437,7 +435,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 
 	if (pmc->multiaddr == IGMP_ALL_HOSTS)
 		return skb;
-	if (ipv4_is_local_multicast(pmc->multiaddr) && !sysctl_igmp_llm_reports)
+	if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
 		return skb;
 
 	isquery = type == IGMPV3_MODE_IS_INCLUDE ||
@@ -540,6 +538,7 @@ empty_source:
 static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
 {
 	struct sk_buff *skb = NULL;
+	struct net *net = dev_net(in_dev->dev);
 	int type;
 
 	if (!pmc) {
@@ -548,7 +547,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
 			if (pmc->multiaddr == IGMP_ALL_HOSTS)
 				continue;
 			if (ipv4_is_local_multicast(pmc->multiaddr) &&
-			     !sysctl_igmp_llm_reports)
+			     !net->ipv4.sysctl_igmp_llm_reports)
 				continue;
 			spin_lock_bh(&pmc->lock);
 			if (pmc->sfcount[MCAST_EXCLUDE])
@@ -684,7 +683,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
 		return igmpv3_send_report(in_dev, pmc);
 
-	if (ipv4_is_local_multicast(group) && !sysctl_igmp_llm_reports)
+	if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
 		return 0;
 
 	if (type == IGMP_HOST_LEAVE_MESSAGE)
@@ -855,12 +854,13 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
 static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
 {
 	struct ip_mc_list *im;
+	struct net *net = dev_net(in_dev->dev);
 
 	/* Timers are only set for non-local groups */
 
 	if (group == IGMP_ALL_HOSTS)
 		return false;
-	if (ipv4_is_local_multicast(group) && !sysctl_igmp_llm_reports)
+	if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
 		return false;
 
 	rcu_read_lock();
@@ -884,6 +884,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 	__be32			group = ih->group;
 	int			max_delay;
 	int			mark = 0;
+	struct net		*net = dev_net(in_dev->dev);
 
 
 	if (len == 8) {
@@ -969,7 +970,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 		if (im->multiaddr == IGMP_ALL_HOSTS)
 			continue;
 		if (ipv4_is_local_multicast(im->multiaddr) &&
-		    !sysctl_igmp_llm_reports)
+		    !net->ipv4.sysctl_igmp_llm_reports)
 			continue;
 		spin_lock_bh(&im->lock);
 		if (im->tm_running)
@@ -1184,6 +1185,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
 {
 	struct in_device *in_dev = im->interface;
 #ifdef CONFIG_IP_MULTICAST
+	struct net *net = dev_net(in_dev->dev);
 	int reporter;
 #endif
 
@@ -1195,7 +1197,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
 #ifdef CONFIG_IP_MULTICAST
 	if (im->multiaddr == IGMP_ALL_HOSTS)
 		return;
-	if (ipv4_is_local_multicast(im->multiaddr) && !sysctl_igmp_llm_reports)
+	if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
 		return;
 
 	reporter = im->reporter;
@@ -1220,6 +1222,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
 static void igmp_group_added(struct ip_mc_list *im)
 {
 	struct in_device *in_dev = im->interface;
+	struct net *net = dev_net(in_dev->dev);
 
 	if (im->loaded == 0) {
 		im->loaded = 1;
@@ -1229,7 +1232,7 @@ static void igmp_group_added(struct ip_mc_list *im)
 #ifdef CONFIG_IP_MULTICAST
 	if (im->multiaddr == IGMP_ALL_HOSTS)
 		return;
-	if (ipv4_is_local_multicast(im->multiaddr) && !sysctl_igmp_llm_reports)
+	if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
 		return;
 
 	if (in_dev->dead)
@@ -1530,6 +1533,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
 #ifdef CONFIG_IP_MULTICAST
 	struct ip_mc_list *im;
 	int type;
+	struct net *net = dev_net(in_dev->dev);
 
 	ASSERT_RTNL();
 
@@ -1537,7 +1541,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
 		if (im->multiaddr == IGMP_ALL_HOSTS)
 			continue;
 		if (ipv4_is_local_multicast(im->multiaddr) &&
-		    !sysctl_igmp_llm_reports)
+		    !net->ipv4.sysctl_igmp_llm_reports)
 			continue;
 
 		/* a failover is happening and switches
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 225659a02cf2..fc40fa1303d3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -852,7 +852,7 @@ static struct ctl_table ipv4_net_table[] = {
 	},
 	{
 		.procname	= "igmp_link_local_mcast_reports",
-		.data		= &sysctl_igmp_llm_reports,
+		.data		= &init_net.ipv4.sysctl_igmp_llm_reports,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 055d8a9a0c61..6c3c1d5232c6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2401,6 +2401,8 @@ static int __net_init tcp_sk_init(struct net *net)
 
 	net->ipv4.sysctl_igmp_max_memberships = 20;
 	net->ipv4.sysctl_igmp_max_msf = 10;
+	/* IGMP reports for link-local multicast groups are enabled by default */
+	net->ipv4.sysctl_igmp_llm_reports = 1;
 
 	return 0;
 fail:
-- 
cgit v1.2.3


From 165094afcee79e4d5b6e94032a5d3be157460b4a Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <kernel@kyup.com>
Date: Mon, 8 Feb 2016 23:29:24 +0200
Subject: igmp: Namespacify igmp_qrv sysctl knob

Signed-off-by: Nikolay Borisov <kernel@kyup.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h       |  2 --
 include/net/netns/ipv4.h   |  1 +
 net/ipv4/igmp.c            | 29 +++++++++++++++++------------
 net/ipv4/sysctl_net_ipv4.c | 20 ++++++++++----------
 net/ipv4/tcp_ipv4.c        |  1 +
 5 files changed, 29 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index c683f4bf642b..12f6fba6d21a 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -37,8 +37,6 @@ static inline struct igmpv3_query *
 	return (struct igmpv3_query *)skb_transport_header(skb);
 }
 
-extern int sysctl_igmp_qrv;
-
 struct ip_sf_socklist {
 	unsigned int		sl_max;
 	unsigned int		sl_count;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index cbbf8115e8a7..848fe8056534 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -111,6 +111,7 @@ struct netns_ipv4 {
 	int sysctl_igmp_max_memberships;
 	int sysctl_igmp_max_msf;
 	int sysctl_igmp_llm_reports;
+	int sysctl_igmp_qrv;
 
 	struct ping_group_range ping_group_range;
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2e22ee0efc98..7c95335bf85e 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -762,9 +762,10 @@ static void igmp_ifc_timer_expire(unsigned long data)
 
 static void igmp_ifc_event(struct in_device *in_dev)
 {
+	struct net *net = dev_net(in_dev->dev);
 	if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
 		return;
-	in_dev->mr_ifc_count = in_dev->mr_qrv ?: sysctl_igmp_qrv;
+	in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
 	igmp_ifc_start_timer(in_dev, 1);
 }
 
@@ -1086,6 +1087,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
 static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 {
 	struct ip_mc_list *pmc;
+	struct net *net = dev_net(in_dev->dev);
 
 	/* this is an "ip_mc_list" for convenience; only the fields below
 	 * are actually used. In particular, the refcnt and users are not
@@ -1100,7 +1102,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 	pmc->interface = im->interface;
 	in_dev_hold(in_dev);
 	pmc->multiaddr = im->multiaddr;
-	pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
+	pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
 	pmc->sfmode = im->sfmode;
 	if (pmc->sfmode == MCAST_INCLUDE) {
 		struct ip_sf_list *psf;
@@ -1245,7 +1247,7 @@ static void igmp_group_added(struct ip_mc_list *im)
 	}
 	/* else, v3 */
 
-	im->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
+	im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
 	igmp_ifc_event(in_dev);
 #endif
 }
@@ -1314,6 +1316,7 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
 void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 {
 	struct ip_mc_list *im;
+	struct net *net = dev_net(in_dev->dev);
 
 	ASSERT_RTNL();
 
@@ -1340,7 +1343,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 	spin_lock_init(&im->lock);
 #ifdef CONFIG_IP_MULTICAST
 	setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im);
-	im->unsolicit_count = sysctl_igmp_qrv;
+	im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
 #endif
 
 	im->next_rcu = in_dev->mc_list;
@@ -1640,6 +1643,7 @@ void ip_mc_down(struct in_device *in_dev)
 
 void ip_mc_init_dev(struct in_device *in_dev)
 {
+	struct net *net = dev_net(in_dev->dev);
 	ASSERT_RTNL();
 
 #ifdef CONFIG_IP_MULTICAST
@@ -1647,7 +1651,7 @@ void ip_mc_init_dev(struct in_device *in_dev)
 			(unsigned long)in_dev);
 	setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
 			(unsigned long)in_dev);
-	in_dev->mr_qrv = sysctl_igmp_qrv;
+	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
 #endif
 
 	spin_lock_init(&in_dev->mc_tomb_lock);
@@ -1658,11 +1662,12 @@ void ip_mc_init_dev(struct in_device *in_dev)
 void ip_mc_up(struct in_device *in_dev)
 {
 	struct ip_mc_list *pmc;
+	struct net *net = dev_net(in_dev->dev);
 
 	ASSERT_RTNL();
 
 #ifdef CONFIG_IP_MULTICAST
-	in_dev->mr_qrv = sysctl_igmp_qrv;
+	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
 #endif
 	ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
@@ -1728,9 +1733,6 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
 /*
  *	Join a socket to a group
  */
-#ifdef CONFIG_IP_MULTICAST
-int sysctl_igmp_qrv __read_mostly = IGMP_QUERY_ROBUSTNESS_VARIABLE;
-#endif
 
 static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
 	__be32 *psfsrc)
@@ -1755,6 +1757,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
 	if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
 #ifdef CONFIG_IP_MULTICAST
 		struct in_device *in_dev = pmc->interface;
+		struct net *net = dev_net(in_dev->dev);
 #endif
 
 		/* no more filters for this source */
@@ -1765,7 +1768,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
 #ifdef CONFIG_IP_MULTICAST
 		if (psf->sf_oldin &&
 		    !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
-			psf->sf_crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
+			psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
 			psf->sf_next = pmc->tomb;
 			pmc->tomb = psf;
 			rv = 1;
@@ -1823,12 +1826,13 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 	    pmc->sfcount[MCAST_INCLUDE]) {
 #ifdef CONFIG_IP_MULTICAST
 		struct ip_sf_list *psf;
+		struct net *net = dev_net(in_dev->dev);
 #endif
 
 		/* filter mode change */
 		pmc->sfmode = MCAST_INCLUDE;
 #ifdef CONFIG_IP_MULTICAST
-		pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
+		pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
 		in_dev->mr_ifc_count = pmc->crcount;
 		for (psf = pmc->sources; psf; psf = psf->sf_next)
 			psf->sf_crcount = 0;
@@ -1995,6 +1999,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 	} else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
 #ifdef CONFIG_IP_MULTICAST
 		struct ip_sf_list *psf;
+		struct net *net = dev_net(pmc->interface->dev);
 		in_dev = pmc->interface;
 #endif
 
@@ -2006,7 +2011,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 #ifdef CONFIG_IP_MULTICAST
 		/* else no filters; keep old mode for reports */
 
-		pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
+		pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
 		in_dev->mr_ifc_count = pmc->crcount;
 		for (psf = pmc->sources; psf; psf = psf->sf_next)
 			psf->sf_crcount = 0;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index fc40fa1303d3..b537338f5c97 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -367,16 +367,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-#ifdef CONFIG_IP_MULTICAST
-	{
-		.procname	= "igmp_qrv",
-		.data		= &sysctl_igmp_qrv,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one
-	},
-#endif
 	{
 		.procname	= "inet_peer_threshold",
 		.data		= &inet_peer_threshold,
@@ -871,6 +861,16 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+#ifdef CONFIG_IP_MULTICAST
+	{
+		.procname	= "igmp_qrv",
+		.data		= &init_net.ipv4.sysctl_igmp_qrv,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one
+	},
+#endif
 	{
 		.procname	= "tcp_keepalive_time",
 		.data		= &init_net.ipv4.sysctl_tcp_keepalive_time,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6c3c1d5232c6..ba5d0146e3f0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2403,6 +2403,7 @@ static int __net_init tcp_sk_init(struct net *net)
 	net->ipv4.sysctl_igmp_max_msf = 10;
 	/* IGMP reports for link-local multicast groups are enabled by default */
 	net->ipv4.sysctl_igmp_llm_reports = 1;
+	net->ipv4.sysctl_igmp_qrv = 2;
 
 	return 0;
 fail:
-- 
cgit v1.2.3


From df05c6f6e0bb41380b31ef99b49796ac43a2a2df Mon Sep 17 00:00:00 2001
From: Doug Anderson <armlinux@m.disordat.com>
Date: Fri, 29 Jan 2016 23:07:26 +0100
Subject: ARM: 8506/1: common: DMA-mapping: add DMA_ATTR_ALLOC_SINGLE_PAGES
 attribute

This patch adds the DMA_ATTR_ALLOC_SINGLE_PAGES attribute to the
DMA-mapping subsystem.

This attribute can be used as a hint to the DMA-mapping subsystem that
it's likely not worth it to try to allocate large pages behind the
scenes.  Large pages are likely to make an IOMMU TLB work more
efficiently but may not be worth it.  See the Documentation contained in
this patch for more details about this attribute and when to use it.

Note that the name of the hint (DMA_ATTR_ALLOC_SINGLE_PAGES) is loosely
based on the name MADV_NOHUGEPAGE.  Just as there is MADV_NOHUGEPAGE
vs. MADV_HUGEPAGE we could also add an "opposite" attribute to
DMA_ATTR_ALLOC_SINGLE_PAGES.  Without having the "opposite" attribute
the lack of DMA_ATTR_ALLOC_SINGLE_PAGES means "use your best judgement
about whether to use small pages or large pages".

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 Documentation/DMA-attributes.txt | 26 ++++++++++++++++++++++++++
 include/linux/dma-attrs.h        |  1 +
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index 18dc52c4f2a0..e8cf9cf873b3 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -100,3 +100,29 @@ allocated by dma_alloc_attrs() function from individual pages if it can
 be mapped as contiguous chunk into device dma address space. By
 specifying this attribute the allocated buffer is forced to be contiguous
 also in physical memory.
+
+DMA_ATTR_ALLOC_SINGLE_PAGES
+---------------------------
+
+This is a hint to the DMA-mapping subsystem that it's probably not worth
+the time to try to allocate memory to in a way that gives better TLB
+efficiency (AKA it's not worth trying to build the mapping out of larger
+pages).  You might want to specify this if:
+- You know that the accesses to this memory won't thrash the TLB.
+  You might know that the accesses are likely to be sequential or
+  that they aren't sequential but it's unlikely you'll ping-pong
+  between many addresses that are likely to be in different physical
+  pages.
+- You know that the penalty of TLB misses while accessing the
+  memory will be small enough to be inconsequential.  If you are
+  doing a heavy operation like decryption or decompression this
+  might be the case.
+- You know that the DMA mapping is fairly transitory.  If you expect
+  the mapping to have a short lifetime then it may be worth it to
+  optimize allocation (avoid coming up with large pages) instead of
+  getting the slight performance win of larger pages.
+Setting this hint doesn't guarantee that you won't get huge pages, but it
+means that we won't try quite as hard to get them.
+
+NOTE: At the moment DMA_ATTR_ALLOC_SINGLE_PAGES is only implemented on ARM,
+though ARM64 patches will likely be posted soon.
diff --git a/include/linux/dma-attrs.h b/include/linux/dma-attrs.h
index 99c0be00b47c..5246239a4953 100644
--- a/include/linux/dma-attrs.h
+++ b/include/linux/dma-attrs.h
@@ -18,6 +18,7 @@ enum dma_attr {
 	DMA_ATTR_NO_KERNEL_MAPPING,
 	DMA_ATTR_SKIP_CPU_SYNC,
 	DMA_ATTR_FORCE_CONTIGUOUS,
+	DMA_ATTR_ALLOC_SINGLE_PAGES,
 	DMA_ATTR_MAX,
 };
 
-- 
cgit v1.2.3


From 8b6f2499ac45d5a0ab2e4b6f9613ab3f60416be1 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 1 Feb 2016 18:01:30 +0100
Subject: ARM: 8511/1: ARM64: kernel: PSCI: move PSCI idle management code to
 drivers/firmware

ARM64 PSCI kernel interfaces that initialize idle states and implement
the suspend API to enter them are generic and can be shared with the
ARM architecture.

To achieve that goal, this patch moves ARM64 PSCI idle management
code to drivers/firmware, so that the interface to initialize and
enter idle states can actually be shared by ARM and ARM64 arches
back-ends.

The ARM generic CPUidle implementation also requires the definition of
a cpuidle_ops section entry for the kernel to initialize the CPUidle
operations at boot based on the enable-method (ie ARM64 has the
statically initialized cpu_ops counterparts for that purpose); therefore
this patch also adds the required section entry on CONFIG_ARM for PSCI so
that the kernel can initialize the PSCI CPUidle back-end when PSCI is
the probed enable-method.

On ARM64 this patch provides no functional change.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com> [arch/arm64]
Acked-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Jisheng Zhang <jszhang@marvell.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Jisheng Zhang <jszhang@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig         |   2 +-
 arch/arm64/kernel/psci.c |  99 +-------------------------------------
 drivers/firmware/psci.c  | 120 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/psci.h     |   3 ++
 4 files changed, 126 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8456f69bc77d..cc95ff8f07cb 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2110,7 +2110,7 @@ config ARCH_SUSPEND_POSSIBLE
 	def_bool y
 
 config ARM_CPU_SUSPEND
-	def_bool PM_SLEEP || BL_SWITCHER
+	def_bool PM_SLEEP || BL_SWITCHER || ARM_PSCI_FW
 	depends on ARCH_SUSPEND_POSSIBLE
 
 config ARCH_HIBERNATION_POSSIBLE
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index f67f35b6edb1..42816bebb1e0 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -20,7 +20,6 @@
 #include <linux/smp.h>
 #include <linux/delay.h>
 #include <linux/psci.h>
-#include <linux/slab.h>
 
 #include <uapi/linux/psci.h>
 
@@ -28,73 +27,6 @@
 #include <asm/cpu_ops.h>
 #include <asm/errno.h>
 #include <asm/smp_plat.h>
-#include <asm/suspend.h>
-
-static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state);
-
-static int __maybe_unused cpu_psci_cpu_init_idle(unsigned int cpu)
-{
-	int i, ret, count = 0;
-	u32 *psci_states;
-	struct device_node *state_node, *cpu_node;
-
-	cpu_node = of_get_cpu_node(cpu, NULL);
-	if (!cpu_node)
-		return -ENODEV;
-
-	/*
-	 * If the PSCI cpu_suspend function hook has not been initialized
-	 * idle states must not be enabled, so bail out
-	 */
-	if (!psci_ops.cpu_suspend)
-		return -EOPNOTSUPP;
-
-	/* Count idle states */
-	while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
-					      count))) {
-		count++;
-		of_node_put(state_node);
-	}
-
-	if (!count)
-		return -ENODEV;
-
-	psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
-	if (!psci_states)
-		return -ENOMEM;
-
-	for (i = 0; i < count; i++) {
-		u32 state;
-
-		state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
-
-		ret = of_property_read_u32(state_node,
-					   "arm,psci-suspend-param",
-					   &state);
-		if (ret) {
-			pr_warn(" * %s missing arm,psci-suspend-param property\n",
-				state_node->full_name);
-			of_node_put(state_node);
-			goto free_mem;
-		}
-
-		of_node_put(state_node);
-		pr_debug("psci-power-state %#x index %d\n", state, i);
-		if (!psci_power_state_is_valid(state)) {
-			pr_warn("Invalid PSCI power state %#x\n", state);
-			ret = -EINVAL;
-			goto free_mem;
-		}
-		psci_states[i] = state;
-	}
-	/* Idle states parsed correctly, initialize per-cpu pointer */
-	per_cpu(psci_power_state, cpu) = psci_states;
-	return 0;
-
-free_mem:
-	kfree(psci_states);
-	return ret;
-}
 
 static int __init cpu_psci_cpu_init(unsigned int cpu)
 {
@@ -178,38 +110,11 @@ static int cpu_psci_cpu_kill(unsigned int cpu)
 }
 #endif
 
-static int psci_suspend_finisher(unsigned long index)
-{
-	u32 *state = __this_cpu_read(psci_power_state);
-
-	return psci_ops.cpu_suspend(state[index - 1],
-				    virt_to_phys(cpu_resume));
-}
-
-static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index)
-{
-	int ret;
-	u32 *state = __this_cpu_read(psci_power_state);
-	/*
-	 * idle state index 0 corresponds to wfi, should never be called
-	 * from the cpu_suspend operations
-	 */
-	if (WARN_ON_ONCE(!index))
-		return -EINVAL;
-
-	if (!psci_power_state_loses_context(state[index - 1]))
-		ret = psci_ops.cpu_suspend(state[index - 1], 0);
-	else
-		ret = cpu_suspend(index, psci_suspend_finisher);
-
-	return ret;
-}
-
 const struct cpu_operations cpu_psci_ops = {
 	.name		= "psci",
 #ifdef CONFIG_CPU_IDLE
-	.cpu_init_idle	= cpu_psci_cpu_init_idle,
-	.cpu_suspend	= cpu_psci_cpu_suspend,
+	.cpu_init_idle	= psci_cpu_init_idle,
+	.cpu_suspend	= psci_cpu_suspend_enter,
 #endif
 	.cpu_init	= cpu_psci_cpu_init,
 	.cpu_prepare	= cpu_psci_cpu_prepare,
diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
index f25cd79c8a79..11bfee8b79a9 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c
@@ -14,6 +14,7 @@
 #define pr_fmt(fmt) "psci: " fmt
 
 #include <linux/arm-smccc.h>
+#include <linux/cpuidle.h>
 #include <linux/errno.h>
 #include <linux/linkage.h>
 #include <linux/of.h>
@@ -21,10 +22,12 @@
 #include <linux/printk.h>
 #include <linux/psci.h>
 #include <linux/reboot.h>
+#include <linux/slab.h>
 #include <linux/suspend.h>
 
 #include <uapi/linux/psci.h>
 
+#include <asm/cpuidle.h>
 #include <asm/cputype.h>
 #include <asm/system_misc.h>
 #include <asm/smp_plat.h>
@@ -244,6 +247,123 @@ static int __init psci_features(u32 psci_func_id)
 			      psci_func_id, 0, 0);
 }
 
+#ifdef CONFIG_CPU_IDLE
+static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state);
+
+static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu)
+{
+	int i, ret, count = 0;
+	u32 *psci_states;
+	struct device_node *state_node;
+
+	/*
+	 * If the PSCI cpu_suspend function hook has not been initialized
+	 * idle states must not be enabled, so bail out
+	 */
+	if (!psci_ops.cpu_suspend)
+		return -EOPNOTSUPP;
+
+	/* Count idle states */
+	while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
+					      count))) {
+		count++;
+		of_node_put(state_node);
+	}
+
+	if (!count)
+		return -ENODEV;
+
+	psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
+	if (!psci_states)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		u32 state;
+
+		state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
+
+		ret = of_property_read_u32(state_node,
+					   "arm,psci-suspend-param",
+					   &state);
+		if (ret) {
+			pr_warn(" * %s missing arm,psci-suspend-param property\n",
+				state_node->full_name);
+			of_node_put(state_node);
+			goto free_mem;
+		}
+
+		of_node_put(state_node);
+		pr_debug("psci-power-state %#x index %d\n", state, i);
+		if (!psci_power_state_is_valid(state)) {
+			pr_warn("Invalid PSCI power state %#x\n", state);
+			ret = -EINVAL;
+			goto free_mem;
+		}
+		psci_states[i] = state;
+	}
+	/* Idle states parsed correctly, initialize per-cpu pointer */
+	per_cpu(psci_power_state, cpu) = psci_states;
+	return 0;
+
+free_mem:
+	kfree(psci_states);
+	return ret;
+}
+
+int psci_cpu_init_idle(unsigned int cpu)
+{
+	struct device_node *cpu_node;
+	int ret;
+
+	cpu_node = of_get_cpu_node(cpu, NULL);
+	if (!cpu_node)
+		return -ENODEV;
+
+	ret = psci_dt_cpu_init_idle(cpu_node, cpu);
+
+	of_node_put(cpu_node);
+
+	return ret;
+}
+
+static int psci_suspend_finisher(unsigned long index)
+{
+	u32 *state = __this_cpu_read(psci_power_state);
+
+	return psci_ops.cpu_suspend(state[index - 1],
+				    virt_to_phys(cpu_resume));
+}
+
+int psci_cpu_suspend_enter(unsigned long index)
+{
+	int ret;
+	u32 *state = __this_cpu_read(psci_power_state);
+	/*
+	 * idle state index 0 corresponds to wfi, should never be called
+	 * from the cpu_suspend operations
+	 */
+	if (WARN_ON_ONCE(!index))
+		return -EINVAL;
+
+	if (!psci_power_state_loses_context(state[index - 1]))
+		ret = psci_ops.cpu_suspend(state[index - 1], 0);
+	else
+		ret = cpu_suspend(index, psci_suspend_finisher);
+
+	return ret;
+}
+
+/* ARM specific CPU idle operations */
+#ifdef CONFIG_ARM
+static struct cpuidle_ops psci_cpuidle_ops __initdata = {
+	.suspend = psci_cpu_suspend_enter,
+	.init = psci_dt_cpu_init_idle,
+};
+
+CPUIDLE_METHOD_OF_DECLARE(psci, "arm,psci", &psci_cpuidle_ops);
+#endif
+#endif
+
 static int psci_system_suspend(unsigned long unused)
 {
 	return invoke_psci_fn(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND),
diff --git a/include/linux/psci.h b/include/linux/psci.h
index 12c4865457ad..393efe2edf9a 100644
--- a/include/linux/psci.h
+++ b/include/linux/psci.h
@@ -24,6 +24,9 @@ bool psci_tos_resident_on(int cpu);
 bool psci_power_state_loses_context(u32 state);
 bool psci_power_state_is_valid(u32 state);
 
+int psci_cpu_init_idle(unsigned int cpu);
+int psci_cpu_suspend_enter(unsigned long index);
+
 struct psci_operations {
 	int (*cpu_suspend)(u32 state, unsigned long entry_point);
 	int (*cpu_off)(u32 state);
-- 
cgit v1.2.3


From e2fccf5c15157218f0b6488168f98183f82c1eda Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Mon, 14 Dec 2015 14:53:50 +0100
Subject: mfd: fsl-imx25-tsadc: Register touchscreen ADC driver

This is the core driver for imx25 touchscreen/adc driver. The module
has one shared ADC and two different conversion queues which use the
ADC. The two queues are identical. Both can be used for general purpose
ADC but one is meant to be used for touchscreens.

This driver is the core which manages the central components and
registers of the TSC/ADC unit. It manages the IRQs and forwards them to
the correct components.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Denis Carikli <denis@eukrea.com>
[ensure correct ADC clock depending on the IPG clock]
Signed-off-by: Juergen Borleis <jbe@pengutronix.de>
Acked-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig             |   9 ++
 drivers/mfd/Makefile            |   2 +
 drivers/mfd/fsl-imx25-tsadc.c   | 203 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/imx25-tsadc.h | 140 +++++++++++++++++++++++++++
 4 files changed, 354 insertions(+)
 create mode 100644 drivers/mfd/fsl-imx25-tsadc.c
 create mode 100644 include/linux/mfd/imx25-tsadc.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 9ca66de0c1c1..6980e0463c8c 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -271,6 +271,15 @@ config MFD_MC13XXX_I2C
 	help
 	  Select this if your MC13xxx is connected via an I2C bus.
 
+config MFD_MX25_TSADC
+	tristate "Freescale i.MX25 integrated Touchscreen and ADC unit"
+	select REGMAP_MMIO
+	depends on (SOC_IMX25 && OF) || COMPILE_TEST
+	help
+	  Enable support for the integrated Touchscreen and ADC unit of the
+	  i.MX25 processors. They consist of a conversion queue for general
+	  purpose ADC and a queue for Touchscreens.
+
 config MFD_HI6421_PMIC
 	tristate "HiSilicon Hi6421 PMU/Codec IC"
 	depends on OF
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 0f230a6103f8..8706bf9cf51b 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -85,6 +85,8 @@ obj-$(CONFIG_TWL4030_POWER)    += twl4030-power.o
 obj-$(CONFIG_MFD_TWL4030_AUDIO)	+= twl4030-audio.o
 obj-$(CONFIG_TWL6040_CORE)	+= twl6040.o
 
+obj-$(CONFIG_MFD_MX25_TSADC)	+= fsl-imx25-tsadc.o
+
 obj-$(CONFIG_MFD_MC13XXX)	+= mc13xxx-core.o
 obj-$(CONFIG_MFD_MC13XXX_SPI)	+= mc13xxx-spi.o
 obj-$(CONFIG_MFD_MC13XXX_I2C)	+= mc13xxx-i2c.o
diff --git a/drivers/mfd/fsl-imx25-tsadc.c b/drivers/mfd/fsl-imx25-tsadc.c
new file mode 100644
index 000000000000..77b2675cf8f5
--- /dev/null
+++ b/drivers/mfd/fsl-imx25-tsadc.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2014-2015 Pengutronix, Markus Pargmann <mpa@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdesc.h>
+#include <linux/irqdomain.h>
+#include <linux/irq.h>
+#include <linux/mfd/imx25-tsadc.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+static struct regmap_config mx25_tsadc_regmap_config = {
+	.fast_io = true,
+	.max_register = 8,
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+};
+
+static void mx25_tsadc_irq_handler(struct irq_desc *desc)
+{
+	struct mx25_tsadc *tsadc = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	u32 status;
+
+	chained_irq_enter(chip, desc);
+
+	regmap_read(tsadc->regs, MX25_TSC_TGSR, &status);
+
+	if (status & MX25_TGSR_GCQ_INT)
+		generic_handle_irq(irq_find_mapping(tsadc->domain, 1));
+
+	if (status & MX25_TGSR_TCQ_INT)
+		generic_handle_irq(irq_find_mapping(tsadc->domain, 0));
+
+	chained_irq_exit(chip, desc);
+}
+
+static int mx25_tsadc_domain_map(struct irq_domain *d, unsigned int irq,
+				 irq_hw_number_t hwirq)
+{
+	struct mx25_tsadc *tsadc = d->host_data;
+
+	irq_set_chip_data(irq, tsadc);
+	irq_set_chip_and_handler(irq, &dummy_irq_chip,
+				 handle_level_irq);
+	irq_modify_status(irq, IRQ_NOREQUEST, IRQ_NOPROBE);
+
+	return 0;
+}
+
+static struct irq_domain_ops mx25_tsadc_domain_ops = {
+	.map = mx25_tsadc_domain_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+static int mx25_tsadc_setup_irq(struct platform_device *pdev,
+				struct mx25_tsadc *tsadc)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	int irq;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		dev_err(dev, "Failed to get irq\n");
+		return irq;
+	}
+
+	tsadc->domain = irq_domain_add_simple(np, 2, 0, &mx25_tsadc_domain_ops,
+					      tsadc);
+	if (!tsadc->domain) {
+		dev_err(dev, "Failed to add irq domain\n");
+		return -ENOMEM;
+	}
+
+	irq_set_chained_handler(irq, mx25_tsadc_irq_handler);
+	irq_set_handler_data(irq, tsadc);
+
+	return 0;
+}
+
+static void mx25_tsadc_setup_clk(struct platform_device *pdev,
+				 struct mx25_tsadc *tsadc)
+{
+	unsigned clk_div;
+
+	/*
+	 * According to the datasheet the ADC clock should never
+	 * exceed 1,75 MHz. Base clock is the IPG and the ADC unit uses
+	 * a funny clock divider. To keep the ADC conversion time constant
+	 * adapt the ADC internal clock divider to the IPG clock rate.
+	 */
+
+	dev_dbg(&pdev->dev, "Found master clock at %lu Hz\n",
+		clk_get_rate(tsadc->clk));
+
+	clk_div = DIV_ROUND_UP(clk_get_rate(tsadc->clk), 1750000);
+	dev_dbg(&pdev->dev, "Setting up ADC clock divider to %u\n", clk_div);
+
+	/* adc clock = IPG clock / (2 * div + 2) */
+	clk_div -= 2;
+	clk_div /= 2;
+
+	/*
+	 * the ADC clock divider changes its behaviour when values below 4
+	 * are used: it is fixed to "/ 10" in this case
+	 */
+	clk_div = max_t(unsigned, 4, clk_div);
+
+	dev_dbg(&pdev->dev, "Resulting ADC conversion clock at %lu Hz\n",
+		clk_get_rate(tsadc->clk) / (2 * clk_div + 2));
+
+	regmap_update_bits(tsadc->regs, MX25_TSC_TGCR,
+			   MX25_TGCR_ADCCLKCFG(0x1f),
+			   MX25_TGCR_ADCCLKCFG(clk_div));
+}
+
+static int mx25_tsadc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct mx25_tsadc *tsadc;
+	struct resource *res;
+	int ret;
+	void __iomem *iomem;
+
+	tsadc = devm_kzalloc(dev, sizeof(*tsadc), GFP_KERNEL);
+	if (!tsadc)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	iomem = devm_ioremap_resource(dev, res);
+	if (IS_ERR(iomem))
+		return PTR_ERR(iomem);
+
+	tsadc->regs = devm_regmap_init_mmio(dev, iomem,
+					    &mx25_tsadc_regmap_config);
+	if (IS_ERR(tsadc->regs)) {
+		dev_err(dev, "Failed to initialize regmap\n");
+		return PTR_ERR(tsadc->regs);
+	}
+
+	tsadc->clk = devm_clk_get(dev, "ipg");
+	if (IS_ERR(tsadc->clk)) {
+		dev_err(dev, "Failed to get ipg clock\n");
+		return PTR_ERR(tsadc->clk);
+	}
+
+	/* setup clock according to the datasheet */
+	mx25_tsadc_setup_clk(pdev, tsadc);
+
+	/* Enable clock and reset the component */
+	regmap_update_bits(tsadc->regs, MX25_TSC_TGCR, MX25_TGCR_CLK_EN,
+			   MX25_TGCR_CLK_EN);
+	regmap_update_bits(tsadc->regs, MX25_TSC_TGCR, MX25_TGCR_TSC_RST,
+			   MX25_TGCR_TSC_RST);
+
+	/* Setup powersaving mode, but enable internal reference voltage */
+	regmap_update_bits(tsadc->regs, MX25_TSC_TGCR, MX25_TGCR_POWERMODE_MASK,
+			   MX25_TGCR_POWERMODE_SAVE);
+	regmap_update_bits(tsadc->regs, MX25_TSC_TGCR, MX25_TGCR_INTREFEN,
+			   MX25_TGCR_INTREFEN);
+
+	ret = mx25_tsadc_setup_irq(pdev, tsadc);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, tsadc);
+
+	of_platform_populate(np, NULL, NULL, dev);
+
+	return 0;
+}
+
+static const struct of_device_id mx25_tsadc_ids[] = {
+	{ .compatible = "fsl,imx25-tsadc" },
+	{ /* Sentinel */ }
+};
+
+static struct platform_driver mx25_tsadc_driver = {
+	.driver = {
+		.name = "mx25-tsadc",
+		.of_match_table = of_match_ptr(mx25_tsadc_ids),
+	},
+	.probe = mx25_tsadc_probe,
+};
+module_platform_driver(mx25_tsadc_driver);
+
+MODULE_DESCRIPTION("MFD for ADC/TSC for Freescale mx25");
+MODULE_AUTHOR("Markus Pargmann <mpa@pengutronix.de>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:mx25-tsadc");
diff --git a/include/linux/mfd/imx25-tsadc.h b/include/linux/mfd/imx25-tsadc.h
new file mode 100644
index 000000000000..7fe4b8c3baac
--- /dev/null
+++ b/include/linux/mfd/imx25-tsadc.h
@@ -0,0 +1,140 @@
+#ifndef _LINUX_INCLUDE_MFD_IMX25_TSADC_H_
+#define _LINUX_INCLUDE_MFD_IMX25_TSADC_H_
+
+struct regmap;
+struct clk;
+
+struct mx25_tsadc {
+	struct regmap *regs;
+	struct irq_domain *domain;
+	struct clk *clk;
+};
+
+#define MX25_TSC_TGCR			0x00
+#define MX25_TSC_TGSR			0x04
+#define MX25_TSC_TICR			0x08
+
+/* The same register layout for TC and GC queue */
+#define MX25_ADCQ_FIFO			0x00
+#define MX25_ADCQ_CR			0x04
+#define MX25_ADCQ_SR			0x08
+#define MX25_ADCQ_MR			0x0c
+#define MX25_ADCQ_ITEM_7_0		0x20
+#define MX25_ADCQ_ITEM_15_8		0x24
+#define MX25_ADCQ_CFG(n)		(0x40 + ((n) * 0x4))
+
+#define MX25_ADCQ_MR_MASK		0xffffffff
+
+/* TGCR */
+#define MX25_TGCR_PDBTIME(x)		((x) << 25)
+#define MX25_TGCR_PDBTIME_MASK		GENMASK(31, 25)
+#define MX25_TGCR_PDBEN			BIT(24)
+#define MX25_TGCR_PDEN			BIT(23)
+#define MX25_TGCR_ADCCLKCFG(x)		((x) << 16)
+#define MX25_TGCR_GET_ADCCLK(x)		(((x) >> 16) & 0x1f)
+#define MX25_TGCR_INTREFEN		BIT(10)
+#define MX25_TGCR_POWERMODE_MASK	GENMASK(9, 8)
+#define MX25_TGCR_POWERMODE_SAVE	(1 << 8)
+#define MX25_TGCR_POWERMODE_ON		(2 << 8)
+#define MX25_TGCR_STLC			BIT(5)
+#define MX25_TGCR_SLPC			BIT(4)
+#define MX25_TGCR_FUNC_RST		BIT(2)
+#define MX25_TGCR_TSC_RST		BIT(1)
+#define MX25_TGCR_CLK_EN		BIT(0)
+
+/* TGSR */
+#define MX25_TGSR_SLP_INT		BIT(2)
+#define MX25_TGSR_GCQ_INT		BIT(1)
+#define MX25_TGSR_TCQ_INT		BIT(0)
+
+/* ADCQ_ITEM_* */
+#define _MX25_ADCQ_ITEM(item, x)	((x) << ((item) * 4))
+#define MX25_ADCQ_ITEM(item, x)		((item) >= 8 ? \
+		_MX25_ADCQ_ITEM((item) - 8, (x)) : _MX25_ADCQ_ITEM((item), (x)))
+
+/* ADCQ_FIFO (TCQFIFO and GCQFIFO) */
+#define MX25_ADCQ_FIFO_DATA(x)		(((x) >> 4) & 0xfff)
+#define MX25_ADCQ_FIFO_ID(x)		((x) & 0xf)
+
+/* ADCQ_CR (TCQR and GCQR) */
+#define MX25_ADCQ_CR_PDCFG_LEVEL	BIT(19)
+#define MX25_ADCQ_CR_PDMSK		BIT(18)
+#define MX25_ADCQ_CR_FRST		BIT(17)
+#define MX25_ADCQ_CR_QRST		BIT(16)
+#define MX25_ADCQ_CR_RWAIT_MASK		GENMASK(15, 12)
+#define MX25_ADCQ_CR_RWAIT(x)		((x) << 12)
+#define MX25_ADCQ_CR_WMRK_MASK		GENMASK(11, 8)
+#define MX25_ADCQ_CR_WMRK(x)		((x) << 8)
+#define MX25_ADCQ_CR_LITEMID_MASK	(0xf << 4)
+#define MX25_ADCQ_CR_LITEMID(x)		((x) << 4)
+#define MX25_ADCQ_CR_RPT		BIT(3)
+#define MX25_ADCQ_CR_FQS		BIT(2)
+#define MX25_ADCQ_CR_QSM_MASK		GENMASK(1, 0)
+#define MX25_ADCQ_CR_QSM_PD		0x1
+#define MX25_ADCQ_CR_QSM_FQS		0x2
+#define MX25_ADCQ_CR_QSM_FQS_PD		0x3
+
+/* ADCQ_SR (TCQSR and GCQSR) */
+#define MX25_ADCQ_SR_FDRY		BIT(15)
+#define MX25_ADCQ_SR_FULL		BIT(14)
+#define MX25_ADCQ_SR_EMPT		BIT(13)
+#define MX25_ADCQ_SR_FDN(x)		(((x) >> 8) & 0x1f)
+#define MX25_ADCQ_SR_FRR		BIT(6)
+#define MX25_ADCQ_SR_FUR		BIT(5)
+#define MX25_ADCQ_SR_FOR		BIT(4)
+#define MX25_ADCQ_SR_EOQ		BIT(1)
+#define MX25_ADCQ_SR_PD			BIT(0)
+
+/* ADCQ_MR (TCQMR and GCQMR) */
+#define MX25_ADCQ_MR_FDRY_DMA		BIT(31)
+#define MX25_ADCQ_MR_FER_DMA		BIT(22)
+#define MX25_ADCQ_MR_FUR_DMA		BIT(21)
+#define MX25_ADCQ_MR_FOR_DMA		BIT(20)
+#define MX25_ADCQ_MR_EOQ_DMA		BIT(17)
+#define MX25_ADCQ_MR_PD_DMA		BIT(16)
+#define MX25_ADCQ_MR_FDRY_IRQ		BIT(15)
+#define MX25_ADCQ_MR_FER_IRQ		BIT(6)
+#define MX25_ADCQ_MR_FUR_IRQ		BIT(5)
+#define MX25_ADCQ_MR_FOR_IRQ		BIT(4)
+#define MX25_ADCQ_MR_EOQ_IRQ		BIT(1)
+#define MX25_ADCQ_MR_PD_IRQ		BIT(0)
+
+/* ADCQ_CFG (TICR, TCC0-7,GCC0-7) */
+#define MX25_ADCQ_CFG_SETTLING_TIME(x)	((x) << 24)
+#define MX25_ADCQ_CFG_IGS		(1 << 20)
+#define MX25_ADCQ_CFG_NOS_MASK		GENMASK(19, 16)
+#define MX25_ADCQ_CFG_NOS(x)		(((x) - 1) << 16)
+#define MX25_ADCQ_CFG_WIPER		(1 << 15)
+#define MX25_ADCQ_CFG_YNLR		(1 << 14)
+#define MX25_ADCQ_CFG_YPLL_HIGH		(0 << 12)
+#define MX25_ADCQ_CFG_YPLL_OFF		(1 << 12)
+#define MX25_ADCQ_CFG_YPLL_LOW		(3 << 12)
+#define MX25_ADCQ_CFG_XNUR_HIGH		(0 << 10)
+#define MX25_ADCQ_CFG_XNUR_OFF		(1 << 10)
+#define MX25_ADCQ_CFG_XNUR_LOW		(3 << 10)
+#define MX25_ADCQ_CFG_XPUL_HIGH		(0 << 9)
+#define MX25_ADCQ_CFG_XPUL_OFF		(1 << 9)
+#define MX25_ADCQ_CFG_REFP(sel)		((sel) << 7)
+#define MX25_ADCQ_CFG_REFP_YP		MX25_ADCQ_CFG_REFP(0)
+#define MX25_ADCQ_CFG_REFP_XP		MX25_ADCQ_CFG_REFP(1)
+#define MX25_ADCQ_CFG_REFP_EXT		MX25_ADCQ_CFG_REFP(2)
+#define MX25_ADCQ_CFG_REFP_INT		MX25_ADCQ_CFG_REFP(3)
+#define MX25_ADCQ_CFG_REFP_MASK		GENMASK(8, 7)
+#define MX25_ADCQ_CFG_IN(sel)		((sel) << 4)
+#define MX25_ADCQ_CFG_IN_XP		MX25_ADCQ_CFG_IN(0)
+#define MX25_ADCQ_CFG_IN_YP		MX25_ADCQ_CFG_IN(1)
+#define MX25_ADCQ_CFG_IN_XN		MX25_ADCQ_CFG_IN(2)
+#define MX25_ADCQ_CFG_IN_YN		MX25_ADCQ_CFG_IN(3)
+#define MX25_ADCQ_CFG_IN_WIPER		MX25_ADCQ_CFG_IN(4)
+#define MX25_ADCQ_CFG_IN_AUX0		MX25_ADCQ_CFG_IN(5)
+#define MX25_ADCQ_CFG_IN_AUX1		MX25_ADCQ_CFG_IN(6)
+#define MX25_ADCQ_CFG_IN_AUX2		MX25_ADCQ_CFG_IN(7)
+#define MX25_ADCQ_CFG_REFN(sel)		((sel) << 2)
+#define MX25_ADCQ_CFG_REFN_XN		MX25_ADCQ_CFG_REFN(0)
+#define MX25_ADCQ_CFG_REFN_YN		MX25_ADCQ_CFG_REFN(1)
+#define MX25_ADCQ_CFG_REFN_NGND		MX25_ADCQ_CFG_REFN(2)
+#define MX25_ADCQ_CFG_REFN_NGND2	MX25_ADCQ_CFG_REFN(3)
+#define MX25_ADCQ_CFG_REFN_MASK		GENMASK(3, 2)
+#define MX25_ADCQ_CFG_PENIACK		(1 << 1)
+
+#endif  /* _LINUX_INCLUDE_MFD_IMX25_TSADC_H_ */
-- 
cgit v1.2.3


From 65b6555971d0498b18a0b2fdc6dd7c822f2d69b7 Mon Sep 17 00:00:00 2001
From: "Andrew F. Davis" <afd@ti.com>
Date: Mon, 25 Jan 2016 09:43:44 -0600
Subject: mfd: tps65912: Remove old driver in preparation for new driver

The old tps65912 driver is being replaced, delete old driver.

Signed-off-by: Andrew F. Davis <afd@ti.com>
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/gpio/Kconfig                   |   6 -
 drivers/gpio/Makefile                  |   1 -
 drivers/gpio/gpio-tps65912.c           | 151 ---------
 drivers/mfd/Kconfig                    |  26 --
 drivers/mfd/Makefile                   |   4 -
 drivers/mfd/tps65912-core.c            | 175 -----------
 drivers/mfd/tps65912-i2c.c             | 139 ---------
 drivers/mfd/tps65912-irq.c             | 217 -------------
 drivers/mfd/tps65912-spi.c             | 140 ---------
 drivers/regulator/Kconfig              |   6 -
 drivers/regulator/Makefile             |   1 -
 drivers/regulator/tps65912-regulator.c | 541 ---------------------------------
 include/linux/mfd/tps65912.h           | 328 --------------------
 13 files changed, 1735 deletions(-)
 delete mode 100644 drivers/gpio/gpio-tps65912.c
 delete mode 100644 drivers/mfd/tps65912-core.c
 delete mode 100644 drivers/mfd/tps65912-i2c.c
 delete mode 100644 drivers/mfd/tps65912-irq.c
 delete mode 100644 drivers/mfd/tps65912-spi.c
 delete mode 100644 drivers/regulator/tps65912-regulator.c
 delete mode 100644 include/linux/mfd/tps65912.h

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index c88dd24a4b1f..98eaeddc0ae3 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -858,12 +858,6 @@ config GPIO_TPS65910
 	  Select this option to enable GPIO driver for the TPS65910
 	  chip family.
 
-config GPIO_TPS65912
-	tristate "TI TPS65912 GPIO"
-	depends on (MFD_TPS65912_I2C || MFD_TPS65912_SPI)
-	help
-	  This driver supports TPS65912 gpio chip
-
 config GPIO_TWL4030
 	tristate "TWL4030, TWL5030, and TPS659x0 GPIOs"
 	depends on TWL4030_CORE
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index ece7d7cbdc80..edc33f0a6788 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -97,7 +97,6 @@ obj-$(CONFIG_GPIO_TIMBERDALE)	+= gpio-timberdale.o
 obj-$(CONFIG_GPIO_PALMAS)	+= gpio-palmas.o
 obj-$(CONFIG_GPIO_TPS6586X)	+= gpio-tps6586x.o
 obj-$(CONFIG_GPIO_TPS65910)	+= gpio-tps65910.o
-obj-$(CONFIG_GPIO_TPS65912)	+= gpio-tps65912.o
 obj-$(CONFIG_GPIO_TS5500)	+= gpio-ts5500.o
 obj-$(CONFIG_GPIO_TWL4030)	+= gpio-twl4030.o
 obj-$(CONFIG_GPIO_TWL6040)	+= gpio-twl6040.o
diff --git a/drivers/gpio/gpio-tps65912.c b/drivers/gpio/gpio-tps65912.c
deleted file mode 100644
index 4f2029c7da3a..000000000000
--- a/drivers/gpio/gpio-tps65912.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright 2011 Texas Instruments Inc.
- *
- * Author: Margarita Olaya <magi@slimlogic.co.uk>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under  the terms of the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
- *
- * This driver is based on wm8350 implementation.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/gpio.h>
-#include <linux/mfd/core.h>
-#include <linux/platform_device.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/mfd/tps65912.h>
-
-struct tps65912_gpio_data {
-	struct tps65912 *tps65912;
-	struct gpio_chip gpio_chip;
-};
-
-static int tps65912_gpio_get(struct gpio_chip *gc, unsigned offset)
-{
-	struct tps65912_gpio_data *tps65912_gpio = gpiochip_get_data(gc);
-	struct tps65912 *tps65912 = tps65912_gpio->tps65912;
-	int val;
-
-	val = tps65912_reg_read(tps65912, TPS65912_GPIO1 + offset);
-
-	if (val & GPIO_STS_MASK)
-		return 1;
-
-	return 0;
-}
-
-static void tps65912_gpio_set(struct gpio_chip *gc, unsigned offset,
-			      int value)
-{
-	struct tps65912_gpio_data *tps65912_gpio = gpiochip_get_data(gc);
-	struct tps65912 *tps65912 = tps65912_gpio->tps65912;
-
-	if (value)
-		tps65912_set_bits(tps65912, TPS65912_GPIO1 + offset,
-							GPIO_SET_MASK);
-	else
-		tps65912_clear_bits(tps65912, TPS65912_GPIO1 + offset,
-								GPIO_SET_MASK);
-}
-
-static int tps65912_gpio_output(struct gpio_chip *gc, unsigned offset,
-				int value)
-{
-	struct tps65912_gpio_data *tps65912_gpio = gpiochip_get_data(gc);
-	struct tps65912 *tps65912 = tps65912_gpio->tps65912;
-
-	/* Set the initial value */
-	tps65912_gpio_set(gc, offset, value);
-
-	return tps65912_set_bits(tps65912, TPS65912_GPIO1 + offset,
-								GPIO_CFG_MASK);
-}
-
-static int tps65912_gpio_input(struct gpio_chip *gc, unsigned offset)
-{
-	struct tps65912_gpio_data *tps65912_gpio = gpiochip_get_data(gc);
-	struct tps65912 *tps65912 = tps65912_gpio->tps65912;
-
-	return tps65912_clear_bits(tps65912, TPS65912_GPIO1 + offset,
-								GPIO_CFG_MASK);
-}
-
-static struct gpio_chip template_chip = {
-	.label			= "tps65912",
-	.owner			= THIS_MODULE,
-	.direction_input	= tps65912_gpio_input,
-	.direction_output	= tps65912_gpio_output,
-	.get			= tps65912_gpio_get,
-	.set			= tps65912_gpio_set,
-	.can_sleep		= true,
-	.ngpio			= 5,
-	.base			= -1,
-};
-
-static int tps65912_gpio_probe(struct platform_device *pdev)
-{
-	struct tps65912 *tps65912 = dev_get_drvdata(pdev->dev.parent);
-	struct tps65912_board *pdata = dev_get_platdata(tps65912->dev);
-	struct tps65912_gpio_data *tps65912_gpio;
-	int ret;
-
-	tps65912_gpio = devm_kzalloc(&pdev->dev, sizeof(*tps65912_gpio),
-				     GFP_KERNEL);
-	if (tps65912_gpio == NULL)
-		return -ENOMEM;
-
-	tps65912_gpio->tps65912 = tps65912;
-	tps65912_gpio->gpio_chip = template_chip;
-	tps65912_gpio->gpio_chip.parent = &pdev->dev;
-	if (pdata && pdata->gpio_base)
-		tps65912_gpio->gpio_chip.base = pdata->gpio_base;
-
-	ret = gpiochip_add_data(&tps65912_gpio->gpio_chip, tps65912_gpio);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "Failed to register gpiochip, %d\n", ret);
-		return ret;
-	}
-
-	platform_set_drvdata(pdev, tps65912_gpio);
-
-	return ret;
-}
-
-static int tps65912_gpio_remove(struct platform_device *pdev)
-{
-	struct tps65912_gpio_data  *tps65912_gpio = platform_get_drvdata(pdev);
-
-	gpiochip_remove(&tps65912_gpio->gpio_chip);
-	return 0;
-}
-
-static struct platform_driver tps65912_gpio_driver = {
-	.driver = {
-		.name = "tps65912-gpio",
-	},
-	.probe = tps65912_gpio_probe,
-	.remove = tps65912_gpio_remove,
-};
-
-static int __init tps65912_gpio_init(void)
-{
-	return platform_driver_register(&tps65912_gpio_driver);
-}
-subsys_initcall(tps65912_gpio_init);
-
-static void __exit tps65912_gpio_exit(void)
-{
-	platform_driver_unregister(&tps65912_gpio_driver);
-}
-module_exit(tps65912_gpio_exit);
-
-MODULE_AUTHOR("Margarita Olaya Cabrera <magi@slimlogic.co.uk>");
-MODULE_DESCRIPTION("GPIO interface for TPS65912 PMICs");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("platform:tps65912-gpio");
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 9ca66de0c1c1..dd2fc0158c7a 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1180,32 +1180,6 @@ config MFD_TPS65910
 	  if you say yes here you get support for the TPS65910 series of
 	  Power Management chips.
 
-config MFD_TPS65912
-	bool "TI TPS65912 Power Management chip"
-	depends on GPIOLIB
-	select MFD_CORE
-	help
-	  If you say yes here you get support for the TPS65912 series of
-	  PM chips.
-
-config MFD_TPS65912_I2C
-	bool "TI TPS65912 Power Management chip with I2C"
-	select MFD_CORE
-	select MFD_TPS65912
-	depends on I2C=y && GPIOLIB
-	help
-	  If you say yes here you get support for the TPS65912 series of
-	  PM chips with I2C interface.
-
-config MFD_TPS65912_SPI
-	bool "TI TPS65912 Power Management chip with SPI"
-	select MFD_CORE
-	select MFD_TPS65912
-	depends on SPI_MASTER && GPIOLIB
-	help
-	  If you say yes here you get support for the TPS65912 series of
-	  PM chips with SPI interface.
-
 config MFD_TPS80031
 	bool "TI TPS80031/TPS80032 Power Management chips"
 	depends on I2C=y
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 0f230a6103f8..e33e0b48f591 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -73,10 +73,6 @@ obj-$(CONFIG_TPS6507X)		+= tps6507x.o
 obj-$(CONFIG_MFD_TPS65217)	+= tps65217.o
 obj-$(CONFIG_MFD_TPS65218)	+= tps65218.o
 obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o
-tps65912-objs                   := tps65912-core.o tps65912-irq.o
-obj-$(CONFIG_MFD_TPS65912)	+= tps65912.o
-obj-$(CONFIG_MFD_TPS65912_I2C)	+= tps65912-i2c.o
-obj-$(CONFIG_MFD_TPS65912_SPI)  += tps65912-spi.o
 obj-$(CONFIG_MFD_TPS80031)	+= tps80031.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
diff --git a/drivers/mfd/tps65912-core.c b/drivers/mfd/tps65912-core.c
deleted file mode 100644
index 1f82d60b1d0f..000000000000
--- a/drivers/mfd/tps65912-core.c
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * tps65912-core.c  --  TI TPS65912x
- *
- * Copyright 2011 Texas Instruments Inc.
- *
- * Author: Margarita Olaya Cabrera <magi@slimlogic.co.uk>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under  the terms of the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  This driver is based on wm8350 implementation.
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/slab.h>
-#include <linux/gpio.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/tps65912.h>
-
-static const struct mfd_cell tps65912s[] = {
-	{
-		.name = "tps65912-pmic",
-	},
-};
-
-int tps65912_set_bits(struct tps65912 *tps65912, u8 reg, u8 mask)
-{
-	u8 data;
-	int err;
-
-	mutex_lock(&tps65912->io_mutex);
-
-	err = tps65912->read(tps65912, reg, 1, &data);
-	if (err) {
-		dev_err(tps65912->dev, "Read from reg 0x%x failed\n", reg);
-		goto out;
-	}
-
-	data |= mask;
-	err = tps65912->write(tps65912, reg, 1, &data);
-	if (err)
-		dev_err(tps65912->dev, "Write to reg 0x%x failed\n", reg);
-
-out:
-	mutex_unlock(&tps65912->io_mutex);
-	return err;
-}
-EXPORT_SYMBOL_GPL(tps65912_set_bits);
-
-int tps65912_clear_bits(struct tps65912 *tps65912, u8 reg, u8 mask)
-{
-	u8 data;
-	int err;
-
-	mutex_lock(&tps65912->io_mutex);
-	err = tps65912->read(tps65912, reg, 1, &data);
-	if (err) {
-		dev_err(tps65912->dev, "Read from reg 0x%x failed\n", reg);
-		goto out;
-	}
-
-	data &= ~mask;
-	err = tps65912->write(tps65912, reg, 1, &data);
-	if (err)
-		dev_err(tps65912->dev, "Write to reg 0x%x failed\n", reg);
-
-out:
-	mutex_unlock(&tps65912->io_mutex);
-	return err;
-}
-EXPORT_SYMBOL_GPL(tps65912_clear_bits);
-
-static inline int tps65912_read(struct tps65912 *tps65912, u8 reg)
-{
-	u8 val;
-	int err;
-
-	err = tps65912->read(tps65912, reg, 1, &val);
-	if (err < 0)
-		return err;
-
-	return val;
-}
-
-static inline int tps65912_write(struct tps65912 *tps65912, u8 reg, u8 val)
-{
-	return tps65912->write(tps65912, reg, 1, &val);
-}
-
-int tps65912_reg_read(struct tps65912 *tps65912, u8 reg)
-{
-	int data;
-
-	mutex_lock(&tps65912->io_mutex);
-
-	data = tps65912_read(tps65912, reg);
-	if (data < 0)
-		dev_err(tps65912->dev, "Read from reg 0x%x failed\n", reg);
-
-	mutex_unlock(&tps65912->io_mutex);
-	return data;
-}
-EXPORT_SYMBOL_GPL(tps65912_reg_read);
-
-int tps65912_reg_write(struct tps65912 *tps65912, u8 reg, u8 val)
-{
-	int err;
-
-	mutex_lock(&tps65912->io_mutex);
-
-	err = tps65912_write(tps65912, reg, val);
-	if (err < 0)
-		dev_err(tps65912->dev, "Write for reg 0x%x failed\n", reg);
-
-	mutex_unlock(&tps65912->io_mutex);
-	return err;
-}
-EXPORT_SYMBOL_GPL(tps65912_reg_write);
-
-int tps65912_device_init(struct tps65912 *tps65912)
-{
-	struct tps65912_board *pmic_plat_data = dev_get_platdata(tps65912->dev);
-	struct tps65912_platform_data *init_data;
-	int ret, dcdc_avs, value;
-
-	init_data = kzalloc(sizeof(struct tps65912_platform_data), GFP_KERNEL);
-	if (init_data == NULL)
-		return -ENOMEM;
-
-	mutex_init(&tps65912->io_mutex);
-	dev_set_drvdata(tps65912->dev, tps65912);
-
-	dcdc_avs = (pmic_plat_data->is_dcdc1_avs << 0 |
-			pmic_plat_data->is_dcdc2_avs  << 1 |
-				pmic_plat_data->is_dcdc3_avs << 2 |
-					pmic_plat_data->is_dcdc4_avs << 3);
-	if (dcdc_avs) {
-		tps65912->read(tps65912, TPS65912_I2C_SPI_CFG, 1, &value);
-		dcdc_avs |= value;
-		tps65912->write(tps65912, TPS65912_I2C_SPI_CFG, 1, &dcdc_avs);
-	}
-
-	ret = mfd_add_devices(tps65912->dev, -1,
-			      tps65912s, ARRAY_SIZE(tps65912s),
-			      NULL, 0, NULL);
-	if (ret < 0)
-		goto err;
-
-	init_data->irq = pmic_plat_data->irq;
-	init_data->irq_base = pmic_plat_data->irq_base;
-	ret = tps65912_irq_init(tps65912, init_data->irq, init_data);
-	if (ret < 0)
-		goto err;
-
-	kfree(init_data);
-	return ret;
-
-err:
-	kfree(init_data);
-	mfd_remove_devices(tps65912->dev);
-	return ret;
-}
-
-void tps65912_device_exit(struct tps65912 *tps65912)
-{
-	mfd_remove_devices(tps65912->dev);
-	tps65912_irq_exit(tps65912);
-}
-
-MODULE_AUTHOR("Margarita Olaya	<magi@slimlogic.co.uk>");
-MODULE_DESCRIPTION("TPS65912x chip family multi-function driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/tps65912-i2c.c b/drivers/mfd/tps65912-i2c.c
deleted file mode 100644
index 7e55640b3ed5..000000000000
--- a/drivers/mfd/tps65912-i2c.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * tps65912-i2c.c  --  I2C access for TI TPS65912x PMIC
- *
- * Copyright 2011 Texas Instruments Inc.
- *
- * Author: Margarita Olaya Cabrera <magi@slimlogic.co.uk>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under  the terms of the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  This driver is based on wm8350 implementation.
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/gpio.h>
-#include <linux/i2c.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/tps65912.h>
-
-static int tps65912_i2c_read(struct tps65912 *tps65912, u8 reg,
-				  int bytes, void *dest)
-{
-	struct i2c_client *i2c = tps65912->control_data;
-	struct i2c_msg xfer[2];
-	int ret;
-
-	/* Write register */
-	xfer[0].addr = i2c->addr;
-	xfer[0].flags = 0;
-	xfer[0].len = 1;
-	xfer[0].buf = &reg;
-
-	/* Read data */
-	xfer[1].addr = i2c->addr;
-	xfer[1].flags = I2C_M_RD;
-	xfer[1].len = bytes;
-	xfer[1].buf = dest;
-
-	ret = i2c_transfer(i2c->adapter, xfer, 2);
-	if (ret == 2)
-		ret = 0;
-	else if (ret >= 0)
-		ret = -EIO;
-	return ret;
-}
-
-static int tps65912_i2c_write(struct tps65912 *tps65912, u8 reg,
-				   int bytes, void *src)
-{
-	struct i2c_client *i2c = tps65912->control_data;
-	/* we add 1 byte for device register */
-	u8 msg[TPS6591X_MAX_REGISTER + 1];
-	int ret;
-
-	if (bytes > TPS6591X_MAX_REGISTER)
-		return -EINVAL;
-
-	msg[0] = reg;
-	memcpy(&msg[1], src, bytes);
-
-	ret = i2c_master_send(i2c, msg, bytes + 1);
-	if (ret < 0)
-		return ret;
-	if (ret != bytes + 1)
-		return -EIO;
-
-	return 0;
-}
-
-static int tps65912_i2c_probe(struct i2c_client *i2c,
-			    const struct i2c_device_id *id)
-{
-	struct tps65912 *tps65912;
-
-	tps65912 = devm_kzalloc(&i2c->dev,
-				sizeof(struct tps65912), GFP_KERNEL);
-	if (tps65912 == NULL)
-		return -ENOMEM;
-
-	i2c_set_clientdata(i2c, tps65912);
-	tps65912->dev = &i2c->dev;
-	tps65912->control_data = i2c;
-	tps65912->read = tps65912_i2c_read;
-	tps65912->write = tps65912_i2c_write;
-
-	return tps65912_device_init(tps65912);
-}
-
-static int tps65912_i2c_remove(struct i2c_client *i2c)
-{
-	struct tps65912 *tps65912 = i2c_get_clientdata(i2c);
-
-	tps65912_device_exit(tps65912);
-
-	return 0;
-}
-
-static const struct i2c_device_id tps65912_i2c_id[] = {
-	{"tps65912", 0 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, tps65912_i2c_id);
-
-static struct i2c_driver tps65912_i2c_driver = {
-	.driver = {
-		   .name = "tps65912",
-	},
-	.probe = tps65912_i2c_probe,
-	.remove = tps65912_i2c_remove,
-	.id_table = tps65912_i2c_id,
-};
-
-static int __init tps65912_i2c_init(void)
-{
-	int ret;
-
-	ret = i2c_add_driver(&tps65912_i2c_driver);
-	if (ret != 0)
-		pr_err("Failed to register TPS65912 I2C driver: %d\n", ret);
-
-	return ret;
-}
-/* init early so consumer devices can complete system boot */
-subsys_initcall(tps65912_i2c_init);
-
-static void __exit tps65912_i2c_exit(void)
-{
-	i2c_del_driver(&tps65912_i2c_driver);
-}
-module_exit(tps65912_i2c_exit);
-
-MODULE_AUTHOR("Margarita Olaya	<magi@slimlogic.co.uk>");
-MODULE_DESCRIPTION("TPS6591x chip family multi-function driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/tps65912-irq.c b/drivers/mfd/tps65912-irq.c
deleted file mode 100644
index db2c29cb709b..000000000000
--- a/drivers/mfd/tps65912-irq.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * tps65912-irq.c  --  TI TPS6591x
- *
- * Copyright 2011 Texas Instruments Inc.
- *
- * Author: Margarita Olaya <magi@slimlogic.co.uk>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under  the terms of the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
- *
- * This driver is based on wm8350 implementation.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/bug.h>
-#include <linux/device.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/gpio.h>
-#include <linux/mfd/tps65912.h>
-
-static inline int irq_to_tps65912_irq(struct tps65912 *tps65912,
-							int irq)
-{
-	return irq - tps65912->irq_base;
-}
-
-/*
- * This is a threaded IRQ handler so can access I2C/SPI.  Since the
- * IRQ handler explicitly clears the IRQ it handles the IRQ line
- * will be reasserted and the physical IRQ will be handled again if
- * another interrupt is asserted while we run - in the normal course
- * of events this is a rare occurrence so we save I2C/SPI reads. We're
- * also assuming that it's rare to get lots of interrupts firing
- * simultaneously so try to minimise I/O.
- */
-static irqreturn_t tps65912_irq(int irq, void *irq_data)
-{
-	struct tps65912 *tps65912 = irq_data;
-	u32 irq_sts;
-	u32 irq_mask;
-	u8 reg;
-	int i;
-
-
-	tps65912->read(tps65912, TPS65912_INT_STS, 1, &reg);
-	irq_sts = reg;
-	tps65912->read(tps65912, TPS65912_INT_STS2, 1, &reg);
-	irq_sts |= reg << 8;
-	tps65912->read(tps65912, TPS65912_INT_STS3, 1, &reg);
-	irq_sts |= reg << 16;
-	tps65912->read(tps65912, TPS65912_INT_STS4, 1, &reg);
-	irq_sts |= reg << 24;
-
-	tps65912->read(tps65912, TPS65912_INT_MSK, 1, &reg);
-	irq_mask = reg;
-	tps65912->read(tps65912, TPS65912_INT_MSK2, 1, &reg);
-	irq_mask |= reg << 8;
-	tps65912->read(tps65912, TPS65912_INT_MSK3, 1, &reg);
-	irq_mask |= reg << 16;
-	tps65912->read(tps65912, TPS65912_INT_MSK4, 1, &reg);
-	irq_mask |= reg << 24;
-
-	irq_sts &= ~irq_mask;
-	if (!irq_sts)
-		return IRQ_NONE;
-
-	for (i = 0; i < tps65912->irq_num; i++) {
-		if (!(irq_sts & (1 << i)))
-			continue;
-
-		handle_nested_irq(tps65912->irq_base + i);
-	}
-
-	/* Write the STS register back to clear IRQs we handled */
-	reg = irq_sts & 0xFF;
-	irq_sts >>= 8;
-	if (reg)
-		tps65912->write(tps65912, TPS65912_INT_STS, 1, &reg);
-	reg = irq_sts & 0xFF;
-	irq_sts >>= 8;
-	if (reg)
-		tps65912->write(tps65912, TPS65912_INT_STS2, 1, &reg);
-	reg = irq_sts & 0xFF;
-	irq_sts >>= 8;
-	if (reg)
-		tps65912->write(tps65912, TPS65912_INT_STS3, 1, &reg);
-	reg = irq_sts & 0xFF;
-	if (reg)
-		tps65912->write(tps65912, TPS65912_INT_STS4, 1, &reg);
-
-	return IRQ_HANDLED;
-}
-
-static void tps65912_irq_lock(struct irq_data *data)
-{
-	struct tps65912 *tps65912 = irq_data_get_irq_chip_data(data);
-
-	mutex_lock(&tps65912->irq_lock);
-}
-
-static void tps65912_irq_sync_unlock(struct irq_data *data)
-{
-	struct tps65912 *tps65912 = irq_data_get_irq_chip_data(data);
-	u32 reg_mask;
-	u8 reg;
-
-	tps65912->read(tps65912, TPS65912_INT_MSK, 1, &reg);
-	reg_mask = reg;
-	tps65912->read(tps65912, TPS65912_INT_MSK2, 1, &reg);
-	reg_mask |= reg << 8;
-	tps65912->read(tps65912, TPS65912_INT_MSK3, 1, &reg);
-	reg_mask |= reg << 16;
-	tps65912->read(tps65912, TPS65912_INT_MSK4, 1, &reg);
-	reg_mask |= reg << 24;
-
-	if (tps65912->irq_mask != reg_mask) {
-		reg = tps65912->irq_mask & 0xFF;
-		tps65912->write(tps65912, TPS65912_INT_MSK, 1, &reg);
-		reg = tps65912->irq_mask >> 8 & 0xFF;
-		tps65912->write(tps65912, TPS65912_INT_MSK2, 1, &reg);
-		reg = tps65912->irq_mask >> 16 & 0xFF;
-		tps65912->write(tps65912, TPS65912_INT_MSK3, 1, &reg);
-		reg = tps65912->irq_mask >> 24 & 0xFF;
-		tps65912->write(tps65912, TPS65912_INT_MSK4, 1, &reg);
-	}
-
-	mutex_unlock(&tps65912->irq_lock);
-}
-
-static void tps65912_irq_enable(struct irq_data *data)
-{
-	struct tps65912 *tps65912 = irq_data_get_irq_chip_data(data);
-
-	tps65912->irq_mask &= ~(1 << irq_to_tps65912_irq(tps65912, data->irq));
-}
-
-static void tps65912_irq_disable(struct irq_data *data)
-{
-	struct tps65912 *tps65912 = irq_data_get_irq_chip_data(data);
-
-	tps65912->irq_mask |= (1 << irq_to_tps65912_irq(tps65912, data->irq));
-}
-
-static struct irq_chip tps65912_irq_chip = {
-	.name = "tps65912",
-	.irq_bus_lock = tps65912_irq_lock,
-	.irq_bus_sync_unlock = tps65912_irq_sync_unlock,
-	.irq_disable = tps65912_irq_disable,
-	.irq_enable = tps65912_irq_enable,
-};
-
-int tps65912_irq_init(struct tps65912 *tps65912, int irq,
-			    struct tps65912_platform_data *pdata)
-{
-	int ret, cur_irq;
-	int flags = IRQF_ONESHOT;
-	u8 reg;
-
-	if (!irq) {
-		dev_warn(tps65912->dev, "No interrupt support, no core IRQ\n");
-		return 0;
-	}
-
-	if (!pdata || !pdata->irq_base) {
-		dev_warn(tps65912->dev, "No interrupt support, no IRQ base\n");
-		return 0;
-	}
-
-	/* Clear unattended interrupts */
-	tps65912->read(tps65912, TPS65912_INT_STS, 1, &reg);
-	tps65912->write(tps65912, TPS65912_INT_STS, 1, &reg);
-	tps65912->read(tps65912, TPS65912_INT_STS2, 1, &reg);
-	tps65912->write(tps65912, TPS65912_INT_STS2, 1, &reg);
-	tps65912->read(tps65912, TPS65912_INT_STS3, 1, &reg);
-	tps65912->write(tps65912, TPS65912_INT_STS3, 1, &reg);
-	tps65912->read(tps65912, TPS65912_INT_STS4, 1, &reg);
-	tps65912->write(tps65912, TPS65912_INT_STS4, 1, &reg);
-
-	/* Mask top level interrupts */
-	tps65912->irq_mask = 0xFFFFFFFF;
-
-	mutex_init(&tps65912->irq_lock);
-	tps65912->chip_irq = irq;
-	tps65912->irq_base = pdata->irq_base;
-
-	tps65912->irq_num = TPS65912_NUM_IRQ;
-
-	/* Register with genirq */
-	for (cur_irq = tps65912->irq_base;
-	     cur_irq < tps65912->irq_num + tps65912->irq_base;
-	     cur_irq++) {
-		irq_set_chip_data(cur_irq, tps65912);
-		irq_set_chip_and_handler(cur_irq, &tps65912_irq_chip,
-					 handle_edge_irq);
-		irq_set_nested_thread(cur_irq, 1);
-		irq_clear_status_flags(cur_irq, IRQ_NOREQUEST | IRQ_NOPROBE);
-	}
-
-	ret = request_threaded_irq(irq, NULL, tps65912_irq, flags,
-				   "tps65912", tps65912);
-
-	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
-	if (ret != 0)
-		dev_err(tps65912->dev, "Failed to request IRQ: %d\n", ret);
-
-	return ret;
-}
-
-int tps65912_irq_exit(struct tps65912 *tps65912)
-{
-	free_irq(tps65912->chip_irq, tps65912);
-	return 0;
-}
diff --git a/drivers/mfd/tps65912-spi.c b/drivers/mfd/tps65912-spi.c
deleted file mode 100644
index d59aa55b1495..000000000000
--- a/drivers/mfd/tps65912-spi.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * tps65912-spi.c  --  SPI access for TI TPS65912x PMIC
- *
- * Copyright 2011 Texas Instruments Inc.
- *
- * Author: Margarita Olaya Cabrera <magi@slimlogic.co.uk>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under  the terms of the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  This driver is based on wm8350 implementation.
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/gpio.h>
-#include <linux/spi/spi.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/tps65912.h>
-
-static int tps65912_spi_write(struct tps65912 *tps65912, u8 addr,
-							int bytes, void *src)
-{
-	struct spi_device *spi = tps65912->control_data;
-	u8 *data = (u8 *) src;
-	int ret;
-	/* bit 23 is the read/write bit */
-	unsigned long spi_data = 1 << 23 | addr << 15 | *data;
-	struct spi_transfer xfer;
-	struct spi_message msg;
-	u32 tx_buf;
-
-	tx_buf = spi_data;
-
-	xfer.tx_buf	= &tx_buf;
-	xfer.rx_buf	= NULL;
-	xfer.len	= sizeof(unsigned long);
-	xfer.bits_per_word = 24;
-
-	spi_message_init(&msg);
-	spi_message_add_tail(&xfer, &msg);
-
-	ret = spi_sync(spi, &msg);
-	return ret;
-}
-
-static int tps65912_spi_read(struct tps65912 *tps65912, u8 addr,
-							int bytes, void *dest)
-{
-	struct spi_device *spi = tps65912->control_data;
-	/* bit 23 is the read/write bit */
-	unsigned long spi_data = 0 << 23 | addr << 15;
-	struct spi_transfer xfer;
-	struct spi_message msg;
-	int ret;
-	u8 *data = (u8 *) dest;
-	u32 tx_buf, rx_buf;
-
-	tx_buf = spi_data;
-	rx_buf = 0;
-
-	xfer.tx_buf	= &tx_buf;
-	xfer.rx_buf	= &rx_buf;
-	xfer.len	= sizeof(unsigned long);
-	xfer.bits_per_word = 24;
-
-	spi_message_init(&msg);
-	spi_message_add_tail(&xfer, &msg);
-
-	if (spi == NULL)
-		return 0;
-
-	ret = spi_sync(spi, &msg);
-	if (ret == 0)
-		*data = (u8) (rx_buf & 0xFF);
-	return ret;
-}
-
-static int tps65912_spi_probe(struct spi_device *spi)
-{
-	struct tps65912 *tps65912;
-
-	tps65912 = devm_kzalloc(&spi->dev,
-				sizeof(struct tps65912), GFP_KERNEL);
-	if (tps65912 == NULL)
-		return -ENOMEM;
-
-	tps65912->dev = &spi->dev;
-	tps65912->control_data = spi;
-	tps65912->read = tps65912_spi_read;
-	tps65912->write = tps65912_spi_write;
-
-	spi_set_drvdata(spi, tps65912);
-
-	return tps65912_device_init(tps65912);
-}
-
-static int tps65912_spi_remove(struct spi_device *spi)
-{
-	struct tps65912 *tps65912 = spi_get_drvdata(spi);
-
-	tps65912_device_exit(tps65912);
-
-	return 0;
-}
-
-static struct spi_driver tps65912_spi_driver = {
-	.driver = {
-		.name = "tps65912",
-	},
-	.probe	= tps65912_spi_probe,
-	.remove = tps65912_spi_remove,
-};
-
-static int __init tps65912_spi_init(void)
-{
-	int ret;
-
-	ret = spi_register_driver(&tps65912_spi_driver);
-	if (ret != 0)
-		pr_err("Failed to register TPS65912 SPI driver: %d\n", ret);
-
-	return 0;
-}
-/* init early so consumer devices can complete system boot */
-subsys_initcall(tps65912_spi_init);
-
-static void __exit tps65912_spi_exit(void)
-{
-	spi_unregister_driver(&tps65912_spi_driver);
-}
-module_exit(tps65912_spi_exit);
-
-MODULE_AUTHOR("Margarita Olaya	<magi@slimlogic.co.uk>");
-MODULE_DESCRIPTION("SPI support for TPS65912 chip family mfd");
-MODULE_LICENSE("GPL");
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 8155e80dd3f8..eb1b4a26cb3e 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -760,12 +760,6 @@ config REGULATOR_TPS65910
 	help
 	  This driver supports TPS65910/TPS65911 voltage regulator chips.
 
-config REGULATOR_TPS65912
-	tristate "TI TPS65912 Power regulator"
-	depends on (MFD_TPS65912_I2C || MFD_TPS65912_SPI)
-	help
-	    This driver supports TPS65912 voltage regulator chip.
-
 config REGULATOR_TPS80031
 	tristate "TI TPS80031/TPS80032 power regualtor driver"
 	depends on MFD_TPS80031
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 980b1943fa81..85da3192701c 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -95,7 +95,6 @@ obj-$(CONFIG_REGULATOR_TPS65218) += tps65218-regulator.o
 obj-$(CONFIG_REGULATOR_TPS6524X) += tps6524x-regulator.o
 obj-$(CONFIG_REGULATOR_TPS6586X) += tps6586x-regulator.o
 obj-$(CONFIG_REGULATOR_TPS65910) += tps65910-regulator.o
-obj-$(CONFIG_REGULATOR_TPS65912) += tps65912-regulator.o
 obj-$(CONFIG_REGULATOR_TPS80031) += tps80031-regulator.o
 obj-$(CONFIG_REGULATOR_TWL4030) += twl-regulator.o
 obj-$(CONFIG_REGULATOR_VEXPRESS) += vexpress.o
diff --git a/drivers/regulator/tps65912-regulator.c b/drivers/regulator/tps65912-regulator.c
deleted file mode 100644
index 9503d5481a52..000000000000
--- a/drivers/regulator/tps65912-regulator.c
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- * tps65912.c  --  TI tps65912
- *
- * Copyright 2011 Texas Instruments Inc.
- *
- * Author: Margarita Olaya Cabrera <magi@slimlogic.co.uk>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under  the terms of the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
- *
- * This driver is based on wm8350 implementation.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/platform_device.h>
-#include <linux/regulator/driver.h>
-#include <linux/regulator/machine.h>
-#include <linux/slab.h>
-#include <linux/gpio.h>
-#include <linux/mfd/tps65912.h>
-
-/* DCDC's */
-#define TPS65912_REG_DCDC1	0
-#define TPS65912_REG_DCDC2	1
-#define TPS65912_REG_DCDC3	2
-#define TPS65912_REG_DCDC4	3
-
-/* LDOs */
-#define TPS65912_REG_LDO1	4
-#define TPS65912_REG_LDO2	5
-#define TPS65912_REG_LDO3	6
-#define TPS65912_REG_LDO4	7
-#define TPS65912_REG_LDO5	8
-#define TPS65912_REG_LDO6	9
-#define TPS65912_REG_LDO7	10
-#define TPS65912_REG_LDO8	11
-#define TPS65912_REG_LDO9	12
-#define TPS65912_REG_LDO10	13
-
-/* Number of step-down converters available */
-#define TPS65912_NUM_DCDC	4
-
-/* Number of LDO voltage regulators  available */
-#define TPS65912_NUM_LDO	10
-
-/* Number of total regulators available */
-#define TPS65912_NUM_REGULATOR		(TPS65912_NUM_DCDC + TPS65912_NUM_LDO)
-
-#define TPS65912_REG_ENABLED	0x80
-#define OP_SELREG_MASK		0x40
-#define OP_SELREG_SHIFT		6
-
-struct tps_info {
-	const char *name;
-};
-
-static struct tps_info tps65912_regs[] = {
-	{
-		.name = "DCDC1",
-	},
-	{
-		.name = "DCDC2",
-	},
-	{
-		.name = "DCDC3",
-	},
-	{
-		.name = "DCDC4",
-	},
-	{
-		.name = "LDO1",
-	},
-	{
-		.name = "LDO2",
-	},
-	{
-		.name = "LDO3",
-	},
-	{
-		.name = "LDO4",
-	},
-	{
-		.name = "LDO5",
-	},
-	{
-		.name = "LDO6",
-	},
-	{
-		.name = "LDO7",
-	},
-	{
-		.name = "LDO8",
-	},
-	{
-		.name = "LDO9",
-	},
-	{
-		.name = "LDO10",
-	},
-};
-
-struct tps65912_reg {
-	struct regulator_desc desc[TPS65912_NUM_REGULATOR];
-	struct tps65912 *mfd;
-	struct regulator_dev *rdev[TPS65912_NUM_REGULATOR];
-	struct tps_info *info[TPS65912_NUM_REGULATOR];
-	/* for read/write access */
-	struct mutex io_lock;
-	int mode;
-	int (*get_ctrl_reg)(int);
-	int dcdc_range[TPS65912_NUM_DCDC];
-	int pwm_mode_reg;
-	int eco_reg;
-};
-
-static const struct regulator_linear_range tps65912_ldo_ranges[] = {
-	REGULATOR_LINEAR_RANGE(800000, 0, 32, 25000),
-	REGULATOR_LINEAR_RANGE(1650000, 33, 60, 50000),
-	REGULATOR_LINEAR_RANGE(3100000, 61, 63, 100000),
-};
-
-static int tps65912_get_range(struct tps65912_reg *pmic, int id)
-{
-	struct tps65912 *mfd = pmic->mfd;
-	int range;
-
-	switch (id) {
-	case TPS65912_REG_DCDC1:
-		range = tps65912_reg_read(mfd, TPS65912_DCDC1_LIMIT);
-		break;
-	case TPS65912_REG_DCDC2:
-		range = tps65912_reg_read(mfd, TPS65912_DCDC2_LIMIT);
-		break;
-	case TPS65912_REG_DCDC3:
-		range = tps65912_reg_read(mfd, TPS65912_DCDC3_LIMIT);
-		break;
-	case TPS65912_REG_DCDC4:
-		range = tps65912_reg_read(mfd, TPS65912_DCDC4_LIMIT);
-		break;
-	default:
-		return 0;
-	}
-
-	if (range >= 0)
-		range = (range & DCDC_LIMIT_RANGE_MASK)
-			>> DCDC_LIMIT_RANGE_SHIFT;
-
-	pmic->dcdc_range[id] = range;
-	return range;
-}
-
-static unsigned long tps65912_vsel_to_uv_range0(u8 vsel)
-{
-	unsigned long uv;
-
-	uv = ((vsel * 12500) + 500000);
-	return uv;
-}
-
-static unsigned long tps65912_vsel_to_uv_range1(u8 vsel)
-{
-	unsigned long uv;
-
-	 uv = ((vsel * 12500) + 700000);
-	return uv;
-}
-
-static unsigned long tps65912_vsel_to_uv_range2(u8 vsel)
-{
-	unsigned long uv;
-
-	uv = ((vsel * 25000) + 500000);
-	return uv;
-}
-
-static unsigned long tps65912_vsel_to_uv_range3(u8 vsel)
-{
-	unsigned long uv;
-
-	if (vsel == 0x3f)
-		uv = 3800000;
-	else
-		uv = ((vsel * 50000) + 500000);
-
-	return uv;
-}
-
-static int tps65912_get_ctrl_register(int id)
-{
-	if (id >= TPS65912_REG_DCDC1 && id <= TPS65912_REG_LDO4)
-		return id * 3 + TPS65912_DCDC1_AVS;
-	else if (id >= TPS65912_REG_LDO5 && id <= TPS65912_REG_LDO10)
-		return id - TPS65912_REG_LDO5 + TPS65912_LDO5;
-	else
-		return -EINVAL;
-}
-
-static int tps65912_get_sel_register(struct tps65912_reg *pmic, int id)
-{
-	struct tps65912 *mfd = pmic->mfd;
-	int opvsel;
-	u8 reg = 0;
-
-	if (id >= TPS65912_REG_DCDC1 && id <= TPS65912_REG_LDO4) {
-		opvsel = tps65912_reg_read(mfd, id * 3 + TPS65912_DCDC1_OP);
-		if (opvsel & OP_SELREG_MASK)
-			reg = id * 3 + TPS65912_DCDC1_AVS;
-		else
-			reg = id * 3 + TPS65912_DCDC1_OP;
-	} else if (id >= TPS65912_REG_LDO5 && id <= TPS65912_REG_LDO10) {
-		reg = id - TPS65912_REG_LDO5 + TPS65912_LDO5;
-	} else {
-		return -EINVAL;
-	}
-
-	return reg;
-}
-
-static int tps65912_get_mode_regiters(struct tps65912_reg *pmic, int id)
-{
-	switch (id) {
-	case TPS65912_REG_DCDC1:
-		pmic->pwm_mode_reg = TPS65912_DCDC1_CTRL;
-		pmic->eco_reg = TPS65912_DCDC1_AVS;
-		break;
-	case TPS65912_REG_DCDC2:
-		pmic->pwm_mode_reg = TPS65912_DCDC2_CTRL;
-		pmic->eco_reg = TPS65912_DCDC2_AVS;
-		break;
-	case TPS65912_REG_DCDC3:
-		pmic->pwm_mode_reg = TPS65912_DCDC3_CTRL;
-		pmic->eco_reg = TPS65912_DCDC3_AVS;
-		break;
-	case TPS65912_REG_DCDC4:
-		pmic->pwm_mode_reg = TPS65912_DCDC4_CTRL;
-		pmic->eco_reg = TPS65912_DCDC4_AVS;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int tps65912_reg_is_enabled(struct regulator_dev *dev)
-{
-	struct tps65912_reg *pmic = rdev_get_drvdata(dev);
-	struct tps65912 *mfd = pmic->mfd;
-	int reg, value, id = rdev_get_id(dev);
-
-	if (id < TPS65912_REG_DCDC1 || id > TPS65912_REG_LDO10)
-		return -EINVAL;
-
-	reg = pmic->get_ctrl_reg(id);
-	if (reg < 0)
-		return reg;
-
-	value = tps65912_reg_read(mfd, reg);
-	if (value < 0)
-		return value;
-
-	return value & TPS65912_REG_ENABLED;
-}
-
-static int tps65912_reg_enable(struct regulator_dev *dev)
-{
-	struct tps65912_reg *pmic = rdev_get_drvdata(dev);
-	struct tps65912 *mfd = pmic->mfd;
-	int id = rdev_get_id(dev);
-	int reg;
-
-	if (id < TPS65912_REG_DCDC1 || id > TPS65912_REG_LDO10)
-		return -EINVAL;
-
-	reg = pmic->get_ctrl_reg(id);
-	if (reg < 0)
-		return reg;
-
-	return tps65912_set_bits(mfd, reg, TPS65912_REG_ENABLED);
-}
-
-static int tps65912_reg_disable(struct regulator_dev *dev)
-{
-	struct tps65912_reg *pmic = rdev_get_drvdata(dev);
-	struct tps65912 *mfd = pmic->mfd;
-	int id = rdev_get_id(dev), reg;
-
-	reg = pmic->get_ctrl_reg(id);
-	if (reg < 0)
-		return reg;
-
-	return tps65912_clear_bits(mfd, reg, TPS65912_REG_ENABLED);
-}
-
-static int tps65912_set_mode(struct regulator_dev *dev, unsigned int mode)
-{
-	struct tps65912_reg *pmic = rdev_get_drvdata(dev);
-	struct tps65912 *mfd = pmic->mfd;
-	int pwm_mode, eco, id = rdev_get_id(dev);
-
-	tps65912_get_mode_regiters(pmic, id);
-
-	pwm_mode = tps65912_reg_read(mfd, pmic->pwm_mode_reg);
-	eco = tps65912_reg_read(mfd, pmic->eco_reg);
-
-	pwm_mode &= DCDCCTRL_DCDC_MODE_MASK;
-	eco &= DCDC_AVS_ECO_MASK;
-
-	switch (mode) {
-	case REGULATOR_MODE_FAST:
-		/* Verify if mode alredy set */
-		if (pwm_mode && !eco)
-			break;
-		tps65912_set_bits(mfd, pmic->pwm_mode_reg, DCDCCTRL_DCDC_MODE_MASK);
-		tps65912_clear_bits(mfd, pmic->eco_reg, DCDC_AVS_ECO_MASK);
-		break;
-	case REGULATOR_MODE_NORMAL:
-	case REGULATOR_MODE_IDLE:
-		if (!pwm_mode && !eco)
-			break;
-		tps65912_clear_bits(mfd, pmic->pwm_mode_reg, DCDCCTRL_DCDC_MODE_MASK);
-		tps65912_clear_bits(mfd, pmic->eco_reg, DCDC_AVS_ECO_MASK);
-		break;
-	case REGULATOR_MODE_STANDBY:
-		if (!pwm_mode && eco)
-			break;
-		tps65912_clear_bits(mfd, pmic->pwm_mode_reg, DCDCCTRL_DCDC_MODE_MASK);
-		tps65912_set_bits(mfd, pmic->eco_reg, DCDC_AVS_ECO_MASK);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static unsigned int tps65912_get_mode(struct regulator_dev *dev)
-{
-	struct tps65912_reg *pmic = rdev_get_drvdata(dev);
-	struct tps65912 *mfd = pmic->mfd;
-	int pwm_mode, eco, mode = 0, id = rdev_get_id(dev);
-
-	tps65912_get_mode_regiters(pmic, id);
-
-	pwm_mode = tps65912_reg_read(mfd, pmic->pwm_mode_reg);
-	eco = tps65912_reg_read(mfd, pmic->eco_reg);
-
-	pwm_mode &= DCDCCTRL_DCDC_MODE_MASK;
-	eco &= DCDC_AVS_ECO_MASK;
-
-	if (pwm_mode && !eco)
-		mode = REGULATOR_MODE_FAST;
-	else if (!pwm_mode && !eco)
-		mode = REGULATOR_MODE_NORMAL;
-	else if (!pwm_mode && eco)
-		mode = REGULATOR_MODE_STANDBY;
-
-	return mode;
-}
-
-static int tps65912_list_voltage(struct regulator_dev *dev, unsigned selector)
-{
-	struct tps65912_reg *pmic = rdev_get_drvdata(dev);
-	int range, voltage = 0, id = rdev_get_id(dev);
-
-	if (id > TPS65912_REG_DCDC4)
-		return -EINVAL;
-
-	range = pmic->dcdc_range[id];
-
-	switch (range) {
-	case 0:
-		/* 0.5 - 1.2875V in 12.5mV steps */
-		voltage = tps65912_vsel_to_uv_range0(selector);
-		break;
-	case 1:
-		/* 0.7 - 1.4875V in 12.5mV steps */
-		voltage = tps65912_vsel_to_uv_range1(selector);
-		break;
-	case 2:
-		/* 0.5 - 2.075V in 25mV steps */
-		voltage = tps65912_vsel_to_uv_range2(selector);
-		break;
-	case 3:
-		/* 0.5 - 3.8V in 50mV steps */
-		voltage = tps65912_vsel_to_uv_range3(selector);
-		break;
-	}
-	return voltage;
-}
-
-static int tps65912_get_voltage_sel(struct regulator_dev *dev)
-{
-	struct tps65912_reg *pmic = rdev_get_drvdata(dev);
-	struct tps65912 *mfd = pmic->mfd;
-	int id = rdev_get_id(dev);
-	int reg, vsel;
-
-	reg = tps65912_get_sel_register(pmic, id);
-	if (reg < 0)
-		return reg;
-
-	vsel = tps65912_reg_read(mfd, reg);
-	vsel &= 0x3F;
-
-	return vsel;
-}
-
-static int tps65912_set_voltage_sel(struct regulator_dev *dev,
-					 unsigned selector)
-{
-	struct tps65912_reg *pmic = rdev_get_drvdata(dev);
-	struct tps65912 *mfd = pmic->mfd;
-	int id = rdev_get_id(dev);
-	int value;
-	u8 reg;
-
-	reg = tps65912_get_sel_register(pmic, id);
-	value = tps65912_reg_read(mfd, reg);
-	value &= 0xC0;
-	return tps65912_reg_write(mfd, reg, selector | value);
-}
-
-/* Operations permitted on DCDCx */
-static struct regulator_ops tps65912_ops_dcdc = {
-	.is_enabled = tps65912_reg_is_enabled,
-	.enable = tps65912_reg_enable,
-	.disable = tps65912_reg_disable,
-	.set_mode = tps65912_set_mode,
-	.get_mode = tps65912_get_mode,
-	.get_voltage_sel = tps65912_get_voltage_sel,
-	.set_voltage_sel = tps65912_set_voltage_sel,
-	.list_voltage = tps65912_list_voltage,
-};
-
-/* Operations permitted on LDOx */
-static struct regulator_ops tps65912_ops_ldo = {
-	.is_enabled = tps65912_reg_is_enabled,
-	.enable = tps65912_reg_enable,
-	.disable = tps65912_reg_disable,
-	.get_voltage_sel = tps65912_get_voltage_sel,
-	.set_voltage_sel = tps65912_set_voltage_sel,
-	.list_voltage = regulator_list_voltage_linear_range,
-	.map_voltage = regulator_map_voltage_linear_range,
-};
-
-static int tps65912_probe(struct platform_device *pdev)
-{
-	struct tps65912 *tps65912 = dev_get_drvdata(pdev->dev.parent);
-	struct regulator_config config = { };
-	struct tps_info *info;
-	struct regulator_init_data *reg_data;
-	struct regulator_dev *rdev;
-	struct tps65912_reg *pmic;
-	struct tps65912_board *pmic_plat_data;
-	int i;
-
-	pmic_plat_data = dev_get_platdata(tps65912->dev);
-	if (!pmic_plat_data)
-		return -EINVAL;
-
-	reg_data = pmic_plat_data->tps65912_pmic_init_data;
-
-	pmic = devm_kzalloc(&pdev->dev, sizeof(*pmic), GFP_KERNEL);
-	if (!pmic)
-		return -ENOMEM;
-
-	mutex_init(&pmic->io_lock);
-	pmic->mfd = tps65912;
-	platform_set_drvdata(pdev, pmic);
-
-	pmic->get_ctrl_reg = &tps65912_get_ctrl_register;
-	info = tps65912_regs;
-
-	for (i = 0; i < TPS65912_NUM_REGULATOR; i++, info++, reg_data++) {
-		int range = 0;
-		/* Register the regulators */
-		pmic->info[i] = info;
-
-		pmic->desc[i].name = info->name;
-		pmic->desc[i].id = i;
-		pmic->desc[i].n_voltages = 64;
-		if (i > TPS65912_REG_DCDC4) {
-			pmic->desc[i].ops = &tps65912_ops_ldo;
-			pmic->desc[i].linear_ranges = tps65912_ldo_ranges;
-			pmic->desc[i].n_linear_ranges =
-					ARRAY_SIZE(tps65912_ldo_ranges);
-		} else {
-			pmic->desc[i].ops = &tps65912_ops_dcdc;
-		}
-		pmic->desc[i].type = REGULATOR_VOLTAGE;
-		pmic->desc[i].owner = THIS_MODULE;
-		range = tps65912_get_range(pmic, i);
-
-		config.dev = tps65912->dev;
-		config.init_data = reg_data;
-		config.driver_data = pmic;
-
-		rdev = devm_regulator_register(&pdev->dev, &pmic->desc[i],
-					       &config);
-		if (IS_ERR(rdev)) {
-			dev_err(tps65912->dev,
-				"failed to register %s regulator\n",
-				pdev->name);
-			return PTR_ERR(rdev);
-		}
-
-		/* Save regulator for cleanup */
-		pmic->rdev[i] = rdev;
-	}
-	return 0;
-}
-
-static struct platform_driver tps65912_driver = {
-	.driver = {
-		.name = "tps65912-pmic",
-	},
-	.probe = tps65912_probe,
-};
-
-static int __init tps65912_init(void)
-{
-	return platform_driver_register(&tps65912_driver);
-}
-subsys_initcall(tps65912_init);
-
-static void __exit tps65912_cleanup(void)
-{
-	platform_driver_unregister(&tps65912_driver);
-}
-module_exit(tps65912_cleanup);
-
-MODULE_AUTHOR("Margarita Olaya Cabrera <magi@slimlogic.co.uk>");
-MODULE_DESCRIPTION("TPS65912 voltage regulator driver");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("platform:tps65912-pmic");
diff --git a/include/linux/mfd/tps65912.h b/include/linux/mfd/tps65912.h
deleted file mode 100644
index 6d309032dc0d..000000000000
--- a/include/linux/mfd/tps65912.h
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * tps65912.h  --  TI TPS6591x
- *
- * Copyright 2011 Texas Instruments Inc.
- *
- * Author: Margarita Olaya <magi@slimlogic.co.uk>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under  the terms of the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
- *
- */
-
-#ifndef __LINUX_MFD_TPS65912_H
-#define __LINUX_MFD_TPS65912_H
-
-/* TPS regulator type list */
-#define REGULATOR_LDO		0
-#define REGULATOR_DCDC		1
-
-/*
- * List of registers for TPS65912
- */
-
-#define TPS65912_DCDC1_CTRL		0x00
-#define TPS65912_DCDC2_CTRL		0x01
-#define TPS65912_DCDC3_CTRL		0x02
-#define TPS65912_DCDC4_CTRL		0x03
-#define TPS65912_DCDC1_OP		0x04
-#define TPS65912_DCDC1_AVS		0x05
-#define TPS65912_DCDC1_LIMIT		0x06
-#define TPS65912_DCDC2_OP		0x07
-#define TPS65912_DCDC2_AVS		0x08
-#define TPS65912_DCDC2_LIMIT		0x09
-#define TPS65912_DCDC3_OP		0x0A
-#define TPS65912_DCDC3_AVS		0x0B
-#define TPS65912_DCDC3_LIMIT		0x0C
-#define TPS65912_DCDC4_OP		0x0D
-#define TPS65912_DCDC4_AVS		0x0E
-#define TPS65912_DCDC4_LIMIT		0x0F
-#define TPS65912_LDO1_OP		0x10
-#define TPS65912_LDO1_AVS		0x11
-#define TPS65912_LDO1_LIMIT		0x12
-#define TPS65912_LDO2_OP		0x13
-#define TPS65912_LDO2_AVS		0x14
-#define TPS65912_LDO2_LIMIT		0x15
-#define TPS65912_LDO3_OP		0x16
-#define TPS65912_LDO3_AVS		0x17
-#define TPS65912_LDO3_LIMIT		0x18
-#define TPS65912_LDO4_OP		0x19
-#define TPS65912_LDO4_AVS		0x1A
-#define TPS65912_LDO4_LIMIT		0x1B
-#define TPS65912_LDO5			0x1C
-#define TPS65912_LDO6			0x1D
-#define TPS65912_LDO7			0x1E
-#define TPS65912_LDO8			0x1F
-#define TPS65912_LDO9			0x20
-#define TPS65912_LDO10			0x21
-#define TPS65912_THRM			0x22
-#define TPS65912_CLK32OUT		0x23
-#define TPS65912_DEVCTRL		0x24
-#define TPS65912_DEVCTRL2		0x25
-#define TPS65912_I2C_SPI_CFG		0x26
-#define TPS65912_KEEP_ON		0x27
-#define TPS65912_KEEP_ON2		0x28
-#define TPS65912_SET_OFF1		0x29
-#define TPS65912_SET_OFF2		0x2A
-#define TPS65912_DEF_VOLT		0x2B
-#define TPS65912_DEF_VOLT_MAPPING	0x2C
-#define TPS65912_DISCHARGE		0x2D
-#define TPS65912_DISCHARGE2		0x2E
-#define TPS65912_EN1_SET1		0x2F
-#define TPS65912_EN1_SET2		0x30
-#define TPS65912_EN2_SET1		0x31
-#define TPS65912_EN2_SET2		0x32
-#define TPS65912_EN3_SET1		0x33
-#define TPS65912_EN3_SET2		0x34
-#define TPS65912_EN4_SET1		0x35
-#define TPS65912_EN4_SET2		0x36
-#define TPS65912_PGOOD			0x37
-#define TPS65912_PGOOD2			0x38
-#define TPS65912_INT_STS		0x39
-#define TPS65912_INT_MSK		0x3A
-#define TPS65912_INT_STS2		0x3B
-#define TPS65912_INT_MSK2		0x3C
-#define TPS65912_INT_STS3		0x3D
-#define TPS65912_INT_MSK3		0x3E
-#define TPS65912_INT_STS4		0x3F
-#define TPS65912_INT_MSK4		0x40
-#define TPS65912_GPIO1			0x41
-#define TPS65912_GPIO2			0x42
-#define TPS65912_GPIO3			0x43
-#define TPS65912_GPIO4			0x44
-#define TPS65912_GPIO5			0x45
-#define TPS65912_VMON			0x46
-#define TPS65912_LEDA_CTRL1		0x47
-#define TPS65912_LEDA_CTRL2		0x48
-#define TPS65912_LEDA_CTRL3		0x49
-#define TPS65912_LEDA_CTRL4		0x4A
-#define TPS65912_LEDA_CTRL5		0x4B
-#define TPS65912_LEDA_CTRL6		0x4C
-#define TPS65912_LEDA_CTRL7		0x4D
-#define TPS65912_LEDA_CTRL8		0x4E
-#define TPS65912_LEDB_CTRL1		0x4F
-#define TPS65912_LEDB_CTRL2		0x50
-#define TPS65912_LEDB_CTRL3		0x51
-#define TPS65912_LEDB_CTRL4		0x52
-#define TPS65912_LEDB_CTRL5		0x53
-#define TPS65912_LEDB_CTRL6		0x54
-#define TPS65912_LEDB_CTRL7		0x55
-#define TPS65912_LEDB_CTRL8		0x56
-#define TPS65912_LEDC_CTRL1		0x57
-#define TPS65912_LEDC_CTRL2		0x58
-#define TPS65912_LEDC_CTRL3		0x59
-#define TPS65912_LEDC_CTRL4		0x5A
-#define TPS65912_LEDC_CTRL5		0x5B
-#define TPS65912_LEDC_CTRL6		0x5C
-#define TPS65912_LEDC_CTRL7		0x5D
-#define TPS65912_LEDC_CTRL8		0x5E
-#define TPS65912_LED_RAMP_UP_TIME	0x5F
-#define TPS65912_LED_RAMP_DOWN_TIME	0x60
-#define TPS65912_LED_SEQ_EN		0x61
-#define TPS65912_LOADSWITCH		0x62
-#define TPS65912_SPARE			0x63
-#define TPS65912_VERNUM			0x64
-#define TPS6591X_MAX_REGISTER		0x64
-
-/* IRQ Definitions */
-#define TPS65912_IRQ_PWRHOLD_F		0
-#define TPS65912_IRQ_VMON		1
-#define TPS65912_IRQ_PWRON		2
-#define TPS65912_IRQ_PWRON_LP		3
-#define TPS65912_IRQ_PWRHOLD_R		4
-#define TPS65912_IRQ_HOTDIE		5
-#define TPS65912_IRQ_GPIO1_R		6
-#define TPS65912_IRQ_GPIO1_F		7
-#define TPS65912_IRQ_GPIO2_R		8
-#define TPS65912_IRQ_GPIO2_F		9
-#define TPS65912_IRQ_GPIO3_R		10
-#define TPS65912_IRQ_GPIO3_F		11
-#define TPS65912_IRQ_GPIO4_R		12
-#define TPS65912_IRQ_GPIO4_F		13
-#define TPS65912_IRQ_GPIO5_R		14
-#define TPS65912_IRQ_GPIO5_F		15
-#define TPS65912_IRQ_PGOOD_DCDC1	16
-#define TPS65912_IRQ_PGOOD_DCDC2	17
-#define TPS65912_IRQ_PGOOD_DCDC3	18
-#define TPS65912_IRQ_PGOOD_DCDC4	19
-#define TPS65912_IRQ_PGOOD_LDO1		20
-#define TPS65912_IRQ_PGOOD_LDO2		21
-#define TPS65912_IRQ_PGOOD_LDO3		22
-#define TPS65912_IRQ_PGOOD_LDO4		23
-#define TPS65912_IRQ_PGOOD_LDO5		24
-#define TPS65912_IRQ_PGOOD_LDO6		25
-#define TPS65912_IRQ_PGOOD_LDO7		26
-#define TPS65912_IRQ_PGOOD_LD08		27
-#define TPS65912_IRQ_PGOOD_LDO9		28
-#define TPS65912_IRQ_PGOOD_LDO10	29
-
-#define TPS65912_NUM_IRQ		30
-
-/* GPIO 1 and 2 Register Definitions */
-#define GPIO_SLEEP_MASK			0x80
-#define GPIO_SLEEP_SHIFT		7
-#define GPIO_DEB_MASK			0x10
-#define GPIO_DEB_SHIFT			4
-#define GPIO_CFG_MASK			0x04
-#define GPIO_CFG_SHIFT			2
-#define GPIO_STS_MASK			0x02
-#define GPIO_STS_SHIFT			1
-#define GPIO_SET_MASK			0x01
-#define GPIO_SET_SHIFT			0
-
-/* GPIO 3 Register Definitions */
-#define GPIO3_SLEEP_MASK		0x80
-#define GPIO3_SLEEP_SHIFT		7
-#define GPIO3_SEL_MASK			0x40
-#define GPIO3_SEL_SHIFT			6
-#define GPIO3_ODEN_MASK			0x20
-#define GPIO3_ODEN_SHIFT		5
-#define GPIO3_DEB_MASK			0x10
-#define GPIO3_DEB_SHIFT			4
-#define GPIO3_PDEN_MASK			0x08
-#define GPIO3_PDEN_SHIFT		3
-#define GPIO3_CFG_MASK			0x04
-#define GPIO3_CFG_SHIFT			2
-#define GPIO3_STS_MASK			0x02
-#define GPIO3_STS_SHIFT			1
-#define GPIO3_SET_MASK			0x01
-#define GPIO3_SET_SHIFT			0
-
-/* GPIO 4 Register Definitions */
-#define GPIO4_SLEEP_MASK		0x80
-#define GPIO4_SLEEP_SHIFT		7
-#define GPIO4_SEL_MASK			0x40
-#define GPIO4_SEL_SHIFT			6
-#define GPIO4_ODEN_MASK			0x20
-#define GPIO4_ODEN_SHIFT		5
-#define GPIO4_DEB_MASK			0x10
-#define GPIO4_DEB_SHIFT			4
-#define GPIO4_PDEN_MASK			0x08
-#define GPIO4_PDEN_SHIFT		3
-#define GPIO4_CFG_MASK			0x04
-#define GPIO4_CFG_SHIFT			2
-#define GPIO4_STS_MASK			0x02
-#define GPIO4_STS_SHIFT			1
-#define GPIO4_SET_MASK			0x01
-#define GPIO4_SET_SHIFT			0
-
-/* Register THERM  (0x80) register.RegisterDescription */
-#define THERM_THERM_HD_MASK		0x20
-#define THERM_THERM_HD_SHIFT		5
-#define THERM_THERM_TS_MASK		0x10
-#define THERM_THERM_TS_SHIFT		4
-#define THERM_THERM_HDSEL_MASK		0x0C
-#define THERM_THERM_HDSEL_SHIFT		2
-#define THERM_RSVD1_MASK		0x02
-#define THERM_RSVD1_SHIFT		1
-#define THERM_THERM_STATE_MASK		0x01
-#define THERM_THERM_STATE_SHIFT		0
-
-/* Register DCDCCTRL1 register.RegisterDescription */
-#define DCDCCTRL_VCON_ENABLE_MASK	0x80
-#define DCDCCTRL_VCON_ENABLE_SHIFT	7
-#define DCDCCTRL_VCON_RANGE1_MASK	0x40
-#define DCDCCTRL_VCON_RANGE1_SHIFT	6
-#define DCDCCTRL_VCON_RANGE0_MASK	0x20
-#define DCDCCTRL_VCON_RANGE0_SHIFT	5
-#define DCDCCTRL_TSTEP2_MASK		0x10
-#define DCDCCTRL_TSTEP2_SHIFT		4
-#define DCDCCTRL_TSTEP1_MASK		0x08
-#define DCDCCTRL_TSTEP1_SHIFT		3
-#define DCDCCTRL_TSTEP0_MASK		0x04
-#define DCDCCTRL_TSTEP0_SHIFT		2
-#define DCDCCTRL_DCDC1_MODE_MASK	0x02
-#define DCDCCTRL_DCDC1_MODE_SHIFT	1
-
-/* Register DCDCCTRL2 and DCDCCTRL3 register.RegisterDescription */
-#define DCDCCTRL_TSTEP2_MASK		0x10
-#define DCDCCTRL_TSTEP2_SHIFT		4
-#define DCDCCTRL_TSTEP1_MASK		0x08
-#define DCDCCTRL_TSTEP1_SHIFT		3
-#define DCDCCTRL_TSTEP0_MASK		0x04
-#define DCDCCTRL_TSTEP0_SHIFT		2
-#define DCDCCTRL_DCDC_MODE_MASK		0x02
-#define DCDCCTRL_DCDC_MODE_SHIFT	1
-#define DCDCCTRL_RSVD0_MASK		0x01
-#define DCDCCTRL_RSVD0_SHIFT		0
-
-/* Register DCDCCTRL4 register.RegisterDescription */
-#define DCDCCTRL_RAMP_TIME_MASK		0x01
-#define DCDCCTRL_RAMP_TIME_SHIFT	0
-
-/* Register DCDCx_AVS */
-#define DCDC_AVS_ENABLE_MASK		0x80
-#define DCDC_AVS_ENABLE_SHIFT		7
-#define DCDC_AVS_ECO_MASK		0x40
-#define DCDC_AVS_ECO_SHIFT		6
-
-/* Register DCDCx_LIMIT */
-#define DCDC_LIMIT_RANGE_MASK		0xC0
-#define DCDC_LIMIT_RANGE_SHIFT		6
-#define DCDC_LIMIT_MAX_SEL_MASK		0x3F
-#define DCDC_LIMIT_MAX_SEL_SHIFT	0
-
-/**
- * struct tps65912_board
- * Board platform dat may be used to initialize regulators.
- */
-struct tps65912_board {
-	int is_dcdc1_avs;
-	int is_dcdc2_avs;
-	int is_dcdc3_avs;
-	int is_dcdc4_avs;
-	int irq;
-	int irq_base;
-	int gpio_base;
-	struct regulator_init_data *tps65912_pmic_init_data;
-};
-
-/**
- * struct tps65912 - tps65912 sub-driver chip access routines
- */
-
-struct tps65912 {
-	struct device *dev;
-	/* for read/write acces */
-	struct mutex io_mutex;
-
-	/* For device IO interfaces: I2C or SPI */
-	void *control_data;
-
-	int (*read)(struct tps65912 *tps65912, u8 reg, int size, void *dest);
-	int (*write)(struct tps65912 *tps65912, u8 reg, int size, void *src);
-
-	/* Client devices */
-	struct tps65912_pmic *pmic;
-
-	/* GPIO Handling */
-	struct gpio_chip gpio;
-
-	/* IRQ Handling */
-	struct mutex irq_lock;
-	int chip_irq;
-	int irq_base;
-	int irq_num;
-	u32 irq_mask;
-};
-
-struct tps65912_platform_data {
-	int irq;
-	int irq_base;
-};
-
-unsigned int tps_chip(void);
-
-int tps65912_set_bits(struct tps65912 *tps65912, u8 reg, u8 mask);
-int tps65912_clear_bits(struct tps65912 *tps65912, u8 reg, u8 mask);
-int tps65912_reg_read(struct tps65912 *tps65912, u8 reg);
-int tps65912_reg_write(struct tps65912 *tps65912, u8 reg, u8 val);
-int tps65912_device_init(struct tps65912 *tps65912);
-void tps65912_device_exit(struct tps65912 *tps65912);
-int tps65912_irq_init(struct tps65912 *tps65912, int irq,
-			struct tps65912_platform_data *pdata);
-int tps65912_irq_exit(struct tps65912 *tps65912);
-
-#endif /*  __LINUX_MFD_TPS65912_H */
-- 
cgit v1.2.3


From 796f5692daac4ea47fa5252af742976cd1955f0b Mon Sep 17 00:00:00 2001
From: "Andrew F. Davis" <afd@ti.com>
Date: Mon, 25 Jan 2016 09:43:45 -0600
Subject: mfd: tps65912: Add driver for the TPS65912 PMIC

This patch adds support for TPS65912 PMIC MFD core. It provides
communication through the I2C and SPI interfaces. It contains
the following components:

 - Regulators
 - GPIO controller

Signed-off-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig          |  24 +++
 drivers/mfd/Makefile         |   3 +
 drivers/mfd/tps65912-core.c  | 111 ++++++++++++++
 drivers/mfd/tps65912-i2c.c   |  79 ++++++++++
 drivers/mfd/tps65912-spi.c   |  78 ++++++++++
 include/linux/mfd/tps65912.h | 341 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 636 insertions(+)
 create mode 100644 drivers/mfd/tps65912-core.c
 create mode 100644 drivers/mfd/tps65912-i2c.c
 create mode 100644 drivers/mfd/tps65912-spi.c
 create mode 100644 include/linux/mfd/tps65912.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index dd2fc0158c7a..1bc97c2761e4 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1180,6 +1180,30 @@ config MFD_TPS65910
 	  if you say yes here you get support for the TPS65910 series of
 	  Power Management chips.
 
+config MFD_TPS65912
+	tristate
+	select MFD_CORE
+	select REGMAP
+	select REGMAP_IRQ
+
+config MFD_TPS65912_I2C
+	tristate "TI TPS65912 Power Management chip with I2C"
+	select MFD_TPS65912
+	select REGMAP_I2C
+	depends on I2C
+	help
+	  If you say yes here you get support for the TPS65912 series of
+	  PM chips with I2C interface.
+
+config MFD_TPS65912_SPI
+	tristate "TI TPS65912 Power Management chip with SPI"
+	select MFD_TPS65912
+	select REGMAP_SPI
+	depends on SPI_MASTER
+	help
+	  If you say yes here you get support for the TPS65912 series of
+	  PM chips with SPI interface.
+
 config MFD_TPS80031
 	bool "TI TPS80031/TPS80032 Power Management chips"
 	depends on I2C=y
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index e33e0b48f591..1811202cee19 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -73,6 +73,9 @@ obj-$(CONFIG_TPS6507X)		+= tps6507x.o
 obj-$(CONFIG_MFD_TPS65217)	+= tps65217.o
 obj-$(CONFIG_MFD_TPS65218)	+= tps65218.o
 obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o
+obj-$(CONFIG_MFD_TPS65912)	+= tps65912-core.o
+obj-$(CONFIG_MFD_TPS65912_I2C)	+= tps65912-i2c.o
+obj-$(CONFIG_MFD_TPS65912_SPI)  += tps65912-spi.o
 obj-$(CONFIG_MFD_TPS80031)	+= tps80031.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
diff --git a/drivers/mfd/tps65912-core.c b/drivers/mfd/tps65912-core.c
new file mode 100644
index 000000000000..a88cfa80dbc4
--- /dev/null
+++ b/drivers/mfd/tps65912-core.c
@@ -0,0 +1,111 @@
+/*
+ * Core functions for TI TPS65912x PMICs
+ *
+ * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/
+ *	Andrew F. Davis <afd@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether expressed or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.
+ *
+ * Based on the TPS65218 driver and the previous TPS65912 driver by
+ * Margarita Olaya Cabrera <magi@slimlogic.co.uk>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+
+#include <linux/mfd/tps65912.h>
+
+static const struct mfd_cell tps65912_cells[] = {
+	{ .name = "tps65912-regulator", },
+	{ .name = "tps65912-gpio", },
+};
+
+static const struct regmap_irq tps65912_irqs[] = {
+	/* INT_STS IRQs */
+	REGMAP_IRQ_REG(TPS65912_IRQ_PWRHOLD_F, 0, TPS65912_INT_STS_PWRHOLD_F),
+	REGMAP_IRQ_REG(TPS65912_IRQ_VMON, 0, TPS65912_INT_STS_VMON),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PWRON, 0, TPS65912_INT_STS_PWRON),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PWRON_LP, 0, TPS65912_INT_STS_PWRON_LP),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PWRHOLD_R, 0, TPS65912_INT_STS_PWRHOLD_R),
+	REGMAP_IRQ_REG(TPS65912_IRQ_HOTDIE, 0, TPS65912_INT_STS_HOTDIE),
+	REGMAP_IRQ_REG(TPS65912_IRQ_GPIO1_R, 0, TPS65912_INT_STS_GPIO1_R),
+	REGMAP_IRQ_REG(TPS65912_IRQ_GPIO1_F, 0, TPS65912_INT_STS_GPIO1_F),
+	/* INT_STS2 IRQs */
+	REGMAP_IRQ_REG(TPS65912_IRQ_GPIO2_R, 1, TPS65912_INT_STS2_GPIO2_R),
+	REGMAP_IRQ_REG(TPS65912_IRQ_GPIO2_F, 1, TPS65912_INT_STS2_GPIO2_F),
+	REGMAP_IRQ_REG(TPS65912_IRQ_GPIO3_R, 1, TPS65912_INT_STS2_GPIO3_R),
+	REGMAP_IRQ_REG(TPS65912_IRQ_GPIO3_F, 1, TPS65912_INT_STS2_GPIO3_F),
+	REGMAP_IRQ_REG(TPS65912_IRQ_GPIO4_R, 1, TPS65912_INT_STS2_GPIO4_R),
+	REGMAP_IRQ_REG(TPS65912_IRQ_GPIO4_F, 1, TPS65912_INT_STS2_GPIO4_F),
+	REGMAP_IRQ_REG(TPS65912_IRQ_GPIO5_R, 1, TPS65912_INT_STS2_GPIO5_R),
+	REGMAP_IRQ_REG(TPS65912_IRQ_GPIO5_F, 1, TPS65912_INT_STS2_GPIO5_F),
+	/* INT_STS3 IRQs */
+	REGMAP_IRQ_REG(TPS65912_IRQ_PGOOD_DCDC1, 2, TPS65912_INT_STS3_PGOOD_DCDC1),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PGOOD_DCDC2, 2, TPS65912_INT_STS3_PGOOD_DCDC2),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PGOOD_DCDC3, 2, TPS65912_INT_STS3_PGOOD_DCDC3),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PGOOD_DCDC4, 2, TPS65912_INT_STS3_PGOOD_DCDC4),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PGOOD_LDO1, 2, TPS65912_INT_STS3_PGOOD_LDO1),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PGOOD_LDO2, 2, TPS65912_INT_STS3_PGOOD_LDO2),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PGOOD_LDO3, 2, TPS65912_INT_STS3_PGOOD_LDO3),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PGOOD_LDO4, 2, TPS65912_INT_STS3_PGOOD_LDO4),
+	/* INT_STS4 IRQs */
+	REGMAP_IRQ_REG(TPS65912_IRQ_PGOOD_LDO5, 3, TPS65912_INT_STS4_PGOOD_LDO5),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PGOOD_LDO6, 3, TPS65912_INT_STS4_PGOOD_LDO6),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PGOOD_LDO7, 3, TPS65912_INT_STS4_PGOOD_LDO7),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PGOOD_LDO8, 3, TPS65912_INT_STS4_PGOOD_LDO8),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PGOOD_LDO9, 3, TPS65912_INT_STS4_PGOOD_LDO9),
+	REGMAP_IRQ_REG(TPS65912_IRQ_PGOOD_LDO10, 3, TPS65912_INT_STS4_PGOOD_LDO10),
+};
+
+static struct regmap_irq_chip tps65912_irq_chip = {
+	.name = "tps65912",
+	.irqs = tps65912_irqs,
+	.num_irqs = ARRAY_SIZE(tps65912_irqs),
+	.num_regs = 4,
+	.irq_reg_stride = 2,
+	.mask_base = TPS65912_INT_MSK,
+	.status_base = TPS65912_INT_STS,
+	.ack_base = TPS65912_INT_STS,
+	.init_ack_masked = true,
+};
+
+int tps65912_device_init(struct tps65912 *tps)
+{
+	int ret;
+
+	ret = regmap_add_irq_chip(tps->regmap, tps->irq, IRQF_ONESHOT, 0,
+				  &tps65912_irq_chip, &tps->irq_data);
+	if (ret)
+		return ret;
+
+	ret = mfd_add_devices(tps->dev, PLATFORM_DEVID_AUTO, tps65912_cells,
+			      ARRAY_SIZE(tps65912_cells), NULL, 0,
+			      regmap_irq_get_domain(tps->irq_data));
+	if (ret) {
+		regmap_del_irq_chip(tps->irq, tps->irq_data);
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tps65912_device_init);
+
+int tps65912_device_exit(struct tps65912 *tps)
+{
+	regmap_del_irq_chip(tps->irq, tps->irq_data);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tps65912_device_exit);
+
+MODULE_AUTHOR("Andrew F. Davis <afd@ti.com>");
+MODULE_DESCRIPTION("TPS65912x MFD Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/tps65912-i2c.c b/drivers/mfd/tps65912-i2c.c
new file mode 100644
index 000000000000..45871403f995
--- /dev/null
+++ b/drivers/mfd/tps65912-i2c.c
@@ -0,0 +1,79 @@
+/*
+ * I2C access driver for TI TPS65912x PMICs
+ *
+ * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/
+ *	Andrew F. Davis <afd@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether expressed or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.
+ *
+ * Based on the TPS65218 driver and the previous TPS65912 driver by
+ * Margarita Olaya Cabrera <magi@slimlogic.co.uk>
+ */
+
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+
+#include <linux/mfd/tps65912.h>
+
+static const struct of_device_id tps65912_i2c_of_match_table[] = {
+	{ .compatible = "ti,tps65912", },
+	{ /* sentinel */ }
+};
+
+static int tps65912_i2c_probe(struct i2c_client *client,
+			      const struct i2c_device_id *ids)
+{
+	struct tps65912 *tps;
+
+	tps = devm_kzalloc(&client->dev, sizeof(*tps), GFP_KERNEL);
+	if (!tps)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, tps);
+	tps->dev = &client->dev;
+	tps->irq = client->irq;
+
+	tps->regmap = devm_regmap_init_i2c(client, &tps65912_regmap_config);
+	if (IS_ERR(tps->regmap)) {
+		dev_err(tps->dev, "Failed to initialize register map\n");
+		return PTR_ERR(tps->regmap);
+	}
+
+	return tps65912_device_init(tps);
+}
+
+static int tps65912_i2c_remove(struct i2c_client *client)
+{
+	struct tps65912 *tps = i2c_get_clientdata(client);
+
+	return tps65912_device_exit(tps);
+}
+
+static const struct i2c_device_id tps65912_i2c_id_table[] = {
+	{ "tps65912", 0 },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(i2c, tps65912_i2c_id_table);
+
+static struct i2c_driver tps65912_i2c_driver = {
+	.driver		= {
+		.name	= "tps65912",
+		.of_match_table = tps65912_i2c_of_match_table,
+	},
+	.probe		= tps65912_i2c_probe,
+	.remove		= tps65912_i2c_remove,
+	.id_table       = tps65912_i2c_id_table,
+};
+module_i2c_driver(tps65912_i2c_driver);
+
+MODULE_AUTHOR("Andrew F. Davis <afd@ti.com>");
+MODULE_DESCRIPTION("TPS65912x I2C Interface Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/tps65912-spi.c b/drivers/mfd/tps65912-spi.c
new file mode 100644
index 000000000000..4aeba9b6942a
--- /dev/null
+++ b/drivers/mfd/tps65912-spi.c
@@ -0,0 +1,78 @@
+/*
+ * SPI access driver for TI TPS65912x PMICs
+ *
+ * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/
+ *	Andrew F. Davis <afd@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether expressed or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.
+ *
+ * Based on the TPS65218 driver and the previous TPS65912 driver by
+ * Margarita Olaya Cabrera <magi@slimlogic.co.uk>
+ */
+
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/spi/spi.h>
+
+#include <linux/mfd/tps65912.h>
+
+static const struct of_device_id tps65912_spi_of_match_table[] = {
+	{ .compatible = "ti,tps65912", },
+	{ /* sentinel */ }
+};
+
+static int tps65912_spi_probe(struct spi_device *spi)
+{
+	struct tps65912 *tps;
+
+	tps = devm_kzalloc(&spi->dev, sizeof(*tps), GFP_KERNEL);
+	if (!tps)
+		return -ENOMEM;
+
+	spi_set_drvdata(spi, tps);
+	tps->dev = &spi->dev;
+	tps->irq = spi->irq;
+
+	tps->regmap = devm_regmap_init_spi(spi, &tps65912_regmap_config);
+	if (IS_ERR(tps->regmap)) {
+		dev_err(tps->dev, "Failed to initialize register map\n");
+		return PTR_ERR(tps->regmap);
+	}
+
+	return tps65912_device_init(tps);
+}
+
+static int tps65912_spi_remove(struct spi_device *client)
+{
+	struct tps65912 *tps = spi_get_drvdata(client);
+
+	return tps65912_device_exit(tps);
+}
+
+static const struct spi_device_id tps65912_spi_id_table[] = {
+	{ "tps65912", 0 },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(spi, tps65912_spi_id_table);
+
+static struct spi_driver tps65912_spi_driver = {
+	.driver		= {
+		.name	= "tps65912",
+		.of_match_table = tps65912_spi_of_match_table,
+	},
+	.probe		= tps65912_spi_probe,
+	.remove		= tps65912_spi_remove,
+	.id_table       = tps65912_spi_id_table,
+};
+module_spi_driver(tps65912_spi_driver);
+
+MODULE_AUTHOR("Andrew F. Davis <afd@ti.com>");
+MODULE_DESCRIPTION("TPS65912x SPI Interface Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/tps65912.h b/include/linux/mfd/tps65912.h
new file mode 100644
index 000000000000..1a603701550e
--- /dev/null
+++ b/include/linux/mfd/tps65912.h
@@ -0,0 +1,341 @@
+/*
+ * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/
+ *	Andrew F. Davis <afd@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether expressed or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.
+ *
+ * Based on the TPS65218 driver and the previous TPS65912 driver by
+ * Margarita Olaya Cabrera <magi@slimlogic.co.uk>
+ */
+
+#ifndef __LINUX_MFD_TPS65912_H
+#define __LINUX_MFD_TPS65912_H
+
+#include <linux/device.h>
+#include <linux/regmap.h>
+
+/* List of registers for TPS65912 */
+#define TPS65912_DCDC1_CTRL		0x00
+#define TPS65912_DCDC2_CTRL		0x01
+#define TPS65912_DCDC3_CTRL		0x02
+#define TPS65912_DCDC4_CTRL		0x03
+#define TPS65912_DCDC1_OP		0x04
+#define TPS65912_DCDC1_AVS		0x05
+#define TPS65912_DCDC1_LIMIT		0x06
+#define TPS65912_DCDC2_OP		0x07
+#define TPS65912_DCDC2_AVS		0x08
+#define TPS65912_DCDC2_LIMIT		0x09
+#define TPS65912_DCDC3_OP		0x0A
+#define TPS65912_DCDC3_AVS		0x0B
+#define TPS65912_DCDC3_LIMIT		0x0C
+#define TPS65912_DCDC4_OP		0x0D
+#define TPS65912_DCDC4_AVS		0x0E
+#define TPS65912_DCDC4_LIMIT		0x0F
+#define TPS65912_LDO1_OP		0x10
+#define TPS65912_LDO1_AVS		0x11
+#define TPS65912_LDO1_LIMIT		0x12
+#define TPS65912_LDO2_OP		0x13
+#define TPS65912_LDO2_AVS		0x14
+#define TPS65912_LDO2_LIMIT		0x15
+#define TPS65912_LDO3_OP		0x16
+#define TPS65912_LDO3_AVS		0x17
+#define TPS65912_LDO3_LIMIT		0x18
+#define TPS65912_LDO4_OP		0x19
+#define TPS65912_LDO4_AVS		0x1A
+#define TPS65912_LDO4_LIMIT		0x1B
+#define TPS65912_LDO5			0x1C
+#define TPS65912_LDO6			0x1D
+#define TPS65912_LDO7			0x1E
+#define TPS65912_LDO8			0x1F
+#define TPS65912_LDO9			0x20
+#define TPS65912_LDO10			0x21
+#define TPS65912_THRM			0x22
+#define TPS65912_CLK32OUT		0x23
+#define TPS65912_DEVCTRL		0x24
+#define TPS65912_DEVCTRL2		0x25
+#define TPS65912_I2C_SPI_CFG		0x26
+#define TPS65912_KEEP_ON		0x27
+#define TPS65912_KEEP_ON2		0x28
+#define TPS65912_SET_OFF1		0x29
+#define TPS65912_SET_OFF2		0x2A
+#define TPS65912_DEF_VOLT		0x2B
+#define TPS65912_DEF_VOLT_MAPPING	0x2C
+#define TPS65912_DISCHARGE		0x2D
+#define TPS65912_DISCHARGE2		0x2E
+#define TPS65912_EN1_SET1		0x2F
+#define TPS65912_EN1_SET2		0x30
+#define TPS65912_EN2_SET1		0x31
+#define TPS65912_EN2_SET2		0x32
+#define TPS65912_EN3_SET1		0x33
+#define TPS65912_EN3_SET2		0x34
+#define TPS65912_EN4_SET1		0x35
+#define TPS65912_EN4_SET2		0x36
+#define TPS65912_PGOOD			0x37
+#define TPS65912_PGOOD2			0x38
+#define TPS65912_INT_STS		0x39
+#define TPS65912_INT_MSK		0x3A
+#define TPS65912_INT_STS2		0x3B
+#define TPS65912_INT_MSK2		0x3C
+#define TPS65912_INT_STS3		0x3D
+#define TPS65912_INT_MSK3		0x3E
+#define TPS65912_INT_STS4		0x3F
+#define TPS65912_INT_MSK4		0x40
+#define TPS65912_GPIO1			0x41
+#define TPS65912_GPIO2			0x42
+#define TPS65912_GPIO3			0x43
+#define TPS65912_GPIO4			0x44
+#define TPS65912_GPIO5			0x45
+#define TPS65912_VMON			0x46
+#define TPS65912_LEDA_CTRL1		0x47
+#define TPS65912_LEDA_CTRL2		0x48
+#define TPS65912_LEDA_CTRL3		0x49
+#define TPS65912_LEDA_CTRL4		0x4A
+#define TPS65912_LEDA_CTRL5		0x4B
+#define TPS65912_LEDA_CTRL6		0x4C
+#define TPS65912_LEDA_CTRL7		0x4D
+#define TPS65912_LEDA_CTRL8		0x4E
+#define TPS65912_LEDB_CTRL1		0x4F
+#define TPS65912_LEDB_CTRL2		0x50
+#define TPS65912_LEDB_CTRL3		0x51
+#define TPS65912_LEDB_CTRL4		0x52
+#define TPS65912_LEDB_CTRL5		0x53
+#define TPS65912_LEDB_CTRL6		0x54
+#define TPS65912_LEDB_CTRL7		0x55
+#define TPS65912_LEDB_CTRL8		0x56
+#define TPS65912_LEDC_CTRL1		0x57
+#define TPS65912_LEDC_CTRL2		0x58
+#define TPS65912_LEDC_CTRL3		0x59
+#define TPS65912_LEDC_CTRL4		0x5A
+#define TPS65912_LEDC_CTRL5		0x5B
+#define TPS65912_LEDC_CTRL6		0x5C
+#define TPS65912_LEDC_CTRL7		0x5D
+#define TPS65912_LEDC_CTRL8		0x5E
+#define TPS65912_LED_RAMP_UP_TIME	0x5F
+#define TPS65912_LED_RAMP_DOWN_TIME	0x60
+#define TPS65912_LED_SEQ_EN		0x61
+#define TPS65912_LOADSWITCH		0x62
+#define TPS65912_SPARE			0x63
+#define TPS65912_VERNUM			0x64
+#define TPS6591X_MAX_REGISTER		0x64
+
+/* INT_STS Register field definitions */
+#define TPS65912_INT_STS_PWRHOLD_F	BIT(0)
+#define TPS65912_INT_STS_VMON		BIT(1)
+#define TPS65912_INT_STS_PWRON		BIT(2)
+#define TPS65912_INT_STS_PWRON_LP	BIT(3)
+#define TPS65912_INT_STS_PWRHOLD_R	BIT(4)
+#define TPS65912_INT_STS_HOTDIE		BIT(5)
+#define TPS65912_INT_STS_GPIO1_R	BIT(6)
+#define TPS65912_INT_STS_GPIO1_F	BIT(7)
+
+/* INT_STS Register field definitions */
+#define TPS65912_INT_STS2_GPIO2_R	BIT(0)
+#define TPS65912_INT_STS2_GPIO2_F	BIT(1)
+#define TPS65912_INT_STS2_GPIO3_R	BIT(2)
+#define TPS65912_INT_STS2_GPIO3_F	BIT(3)
+#define TPS65912_INT_STS2_GPIO4_R	BIT(4)
+#define TPS65912_INT_STS2_GPIO4_F	BIT(5)
+#define TPS65912_INT_STS2_GPIO5_R	BIT(6)
+#define TPS65912_INT_STS2_GPIO5_F	BIT(7)
+
+/* INT_STS Register field definitions */
+#define TPS65912_INT_STS3_PGOOD_DCDC1	BIT(0)
+#define TPS65912_INT_STS3_PGOOD_DCDC2	BIT(1)
+#define TPS65912_INT_STS3_PGOOD_DCDC3	BIT(2)
+#define TPS65912_INT_STS3_PGOOD_DCDC4	BIT(3)
+#define TPS65912_INT_STS3_PGOOD_LDO1	BIT(4)
+#define TPS65912_INT_STS3_PGOOD_LDO2	BIT(5)
+#define TPS65912_INT_STS3_PGOOD_LDO3	BIT(6)
+#define TPS65912_INT_STS3_PGOOD_LDO4	BIT(7)
+
+/* INT_STS Register field definitions */
+#define TPS65912_INT_STS4_PGOOD_LDO5	BIT(0)
+#define TPS65912_INT_STS4_PGOOD_LDO6	BIT(1)
+#define TPS65912_INT_STS4_PGOOD_LDO7	BIT(2)
+#define TPS65912_INT_STS4_PGOOD_LDO8	BIT(3)
+#define TPS65912_INT_STS4_PGOOD_LDO9	BIT(4)
+#define TPS65912_INT_STS4_PGOOD_LDO10	BIT(5)
+
+/* GPIO 1 and 2 Register field definitions */
+#define GPIO_SLEEP_MASK			0x80
+#define GPIO_SLEEP_SHIFT		7
+#define GPIO_DEB_MASK			0x10
+#define GPIO_DEB_SHIFT			4
+#define GPIO_CFG_MASK			0x04
+#define GPIO_CFG_SHIFT			2
+#define GPIO_STS_MASK			0x02
+#define GPIO_STS_SHIFT			1
+#define GPIO_SET_MASK			0x01
+#define GPIO_SET_SHIFT			0
+
+/* GPIO 3 Register field definitions */
+#define GPIO3_SLEEP_MASK		0x80
+#define GPIO3_SLEEP_SHIFT		7
+#define GPIO3_SEL_MASK			0x40
+#define GPIO3_SEL_SHIFT			6
+#define GPIO3_ODEN_MASK			0x20
+#define GPIO3_ODEN_SHIFT		5
+#define GPIO3_DEB_MASK			0x10
+#define GPIO3_DEB_SHIFT			4
+#define GPIO3_PDEN_MASK			0x08
+#define GPIO3_PDEN_SHIFT		3
+#define GPIO3_CFG_MASK			0x04
+#define GPIO3_CFG_SHIFT			2
+#define GPIO3_STS_MASK			0x02
+#define GPIO3_STS_SHIFT			1
+#define GPIO3_SET_MASK			0x01
+#define GPIO3_SET_SHIFT			0
+
+/* GPIO 4 Register field definitions */
+#define GPIO4_SLEEP_MASK		0x80
+#define GPIO4_SLEEP_SHIFT		7
+#define GPIO4_SEL_MASK			0x40
+#define GPIO4_SEL_SHIFT			6
+#define GPIO4_ODEN_MASK			0x20
+#define GPIO4_ODEN_SHIFT		5
+#define GPIO4_DEB_MASK			0x10
+#define GPIO4_DEB_SHIFT			4
+#define GPIO4_PDEN_MASK			0x08
+#define GPIO4_PDEN_SHIFT		3
+#define GPIO4_CFG_MASK			0x04
+#define GPIO4_CFG_SHIFT			2
+#define GPIO4_STS_MASK			0x02
+#define GPIO4_STS_SHIFT			1
+#define GPIO4_SET_MASK			0x01
+#define GPIO4_SET_SHIFT			0
+
+/* Register THERM  (0x80) register.RegisterDescription */
+#define THERM_THERM_HD_MASK		0x20
+#define THERM_THERM_HD_SHIFT		5
+#define THERM_THERM_TS_MASK		0x10
+#define THERM_THERM_TS_SHIFT		4
+#define THERM_THERM_HDSEL_MASK		0x0C
+#define THERM_THERM_HDSEL_SHIFT		2
+#define THERM_RSVD1_MASK		0x02
+#define THERM_RSVD1_SHIFT		1
+#define THERM_THERM_STATE_MASK		0x01
+#define THERM_THERM_STATE_SHIFT		0
+
+/* Register DCDCCTRL1 register.RegisterDescription */
+#define DCDCCTRL_VCON_ENABLE_MASK	0x80
+#define DCDCCTRL_VCON_ENABLE_SHIFT	7
+#define DCDCCTRL_VCON_RANGE1_MASK	0x40
+#define DCDCCTRL_VCON_RANGE1_SHIFT	6
+#define DCDCCTRL_VCON_RANGE0_MASK	0x20
+#define DCDCCTRL_VCON_RANGE0_SHIFT	5
+#define DCDCCTRL_TSTEP2_MASK		0x10
+#define DCDCCTRL_TSTEP2_SHIFT		4
+#define DCDCCTRL_TSTEP1_MASK		0x08
+#define DCDCCTRL_TSTEP1_SHIFT		3
+#define DCDCCTRL_TSTEP0_MASK		0x04
+#define DCDCCTRL_TSTEP0_SHIFT		2
+#define DCDCCTRL_DCDC1_MODE_MASK	0x02
+#define DCDCCTRL_DCDC1_MODE_SHIFT	1
+
+/* Register DCDCCTRL2 and DCDCCTRL3 register.RegisterDescription */
+#define DCDCCTRL_TSTEP2_MASK		0x10
+#define DCDCCTRL_TSTEP2_SHIFT		4
+#define DCDCCTRL_TSTEP1_MASK		0x08
+#define DCDCCTRL_TSTEP1_SHIFT		3
+#define DCDCCTRL_TSTEP0_MASK		0x04
+#define DCDCCTRL_TSTEP0_SHIFT		2
+#define DCDCCTRL_DCDC_MODE_MASK		0x02
+#define DCDCCTRL_DCDC_MODE_SHIFT	1
+#define DCDCCTRL_RSVD0_MASK		0x01
+#define DCDCCTRL_RSVD0_SHIFT		0
+
+/* Register DCDCCTRL4 register.RegisterDescription */
+#define DCDCCTRL_RAMP_TIME_MASK		0x01
+#define DCDCCTRL_RAMP_TIME_SHIFT	0
+
+/* Register DCDCx_AVS */
+#define DCDC_AVS_ENABLE_MASK		0x80
+#define DCDC_AVS_ENABLE_SHIFT		7
+#define DCDC_AVS_ECO_MASK		0x40
+#define DCDC_AVS_ECO_SHIFT		6
+
+/* Register DCDCx_LIMIT */
+#define DCDC_LIMIT_RANGE_MASK		0xC0
+#define DCDC_LIMIT_RANGE_SHIFT		6
+#define DCDC_LIMIT_MAX_SEL_MASK		0x3F
+#define DCDC_LIMIT_MAX_SEL_SHIFT	0
+
+/* Define the TPS65912 IRQ numbers */
+enum tps65912_irqs {
+	/* INT_STS registers */
+	TPS65912_IRQ_PWRHOLD_F,
+	TPS65912_IRQ_VMON,
+	TPS65912_IRQ_PWRON,
+	TPS65912_IRQ_PWRON_LP,
+	TPS65912_IRQ_PWRHOLD_R,
+	TPS65912_IRQ_HOTDIE,
+	TPS65912_IRQ_GPIO1_R,
+	TPS65912_IRQ_GPIO1_F,
+	/* INT_STS2 registers */
+	TPS65912_IRQ_GPIO2_R,
+	TPS65912_IRQ_GPIO2_F,
+	TPS65912_IRQ_GPIO3_R,
+	TPS65912_IRQ_GPIO3_F,
+	TPS65912_IRQ_GPIO4_R,
+	TPS65912_IRQ_GPIO4_F,
+	TPS65912_IRQ_GPIO5_R,
+	TPS65912_IRQ_GPIO5_F,
+	/* INT_STS3 registers */
+	TPS65912_IRQ_PGOOD_DCDC1,
+	TPS65912_IRQ_PGOOD_DCDC2,
+	TPS65912_IRQ_PGOOD_DCDC3,
+	TPS65912_IRQ_PGOOD_DCDC4,
+	TPS65912_IRQ_PGOOD_LDO1,
+	TPS65912_IRQ_PGOOD_LDO2,
+	TPS65912_IRQ_PGOOD_LDO3,
+	TPS65912_IRQ_PGOOD_LDO4,
+	/* INT_STS4 registers */
+	TPS65912_IRQ_PGOOD_LDO5,
+	TPS65912_IRQ_PGOOD_LDO6,
+	TPS65912_IRQ_PGOOD_LDO7,
+	TPS65912_IRQ_PGOOD_LDO8,
+	TPS65912_IRQ_PGOOD_LDO9,
+	TPS65912_IRQ_PGOOD_LDO10,
+};
+
+/*
+ * struct tps65912 - state holder for the tps65912 driver
+ *
+ * Device data may be used to access the TPS65912 chip
+ */
+struct tps65912 {
+	struct device *dev;
+	struct regmap *regmap;
+
+	/* IRQ Data */
+	int irq;
+	struct regmap_irq_chip_data *irq_data;
+};
+
+static const struct regmap_range tps65912_yes_ranges[] = {
+	regmap_reg_range(TPS65912_INT_STS, TPS65912_GPIO5),
+};
+
+static const struct regmap_access_table tps65912_volatile_table = {
+	.yes_ranges = tps65912_yes_ranges,
+	.n_yes_ranges = ARRAY_SIZE(tps65912_yes_ranges),
+};
+
+static const struct regmap_config tps65912_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_table = &tps65912_volatile_table,
+};
+
+int tps65912_device_init(struct tps65912 *tps);
+int tps65912_device_exit(struct tps65912 *tps);
+
+#endif /*  __LINUX_MFD_TPS65912_H */
-- 
cgit v1.2.3


From 795bb1c00dd338aa0d12f9a7f1f4776fb3160416 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Mon, 8 Feb 2016 13:14:59 +0100
Subject: net: bulk free infrastructure for NAPI context, use napi_consume_skb

Discovered that network stack were hitting the kmem_cache/SLUB
slowpath when freeing SKBs.  Doing bulk free with kmem_cache_free_bulk
can speedup this slowpath.

NAPI context is a bit special, lets take advantage of that for bulk
free'ing SKBs.

In NAPI context we are running in softirq, which gives us certain
protection.  A softirq can run on several CPUs at once.  BUT the
important part is a softirq will never preempt another softirq running
on the same CPU.  This gives us the opportunity to access per-cpu
variables in softirq context.

Extend napi_alloc_cache (before only contained page_frag_cache) to be
a struct with a small array based stack for holding SKBs.  Introduce a
SKB defer and flush API for accessing this.

Introduce napi_consume_skb() as replacement for e.g. dev_consume_skb_any()
when running in NAPI context.  A small trick to handle/detect if we
are called from netpoll is to see if budget is 0.  In that case, we
need to invoke dev_consume_skb_irq().

Joint work with Alexander Duyck.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  3 ++
 net/core/dev.c         |  1 +
 net/core/skbuff.c      | 83 ++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 81 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a8fc2220e8ce..b56c0103fa15 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2404,6 +2404,9 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
 {
 	return __napi_alloc_skb(napi, length, GFP_ATOMIC);
 }
+void napi_consume_skb(struct sk_buff *skb, int budget);
+
+void __kfree_skb_flush(void);
 
 /**
  * __dev_alloc_pages - allocate page for network Rx
diff --git a/net/core/dev.c b/net/core/dev.c
index f1284835b8c9..9b2c7a999e71 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5155,6 +5155,7 @@ static void net_rx_action(struct softirq_action *h)
 		}
 	}
 
+	__kfree_skb_flush();
 	local_irq_disable();
 
 	list_splice_tail_init(&sd->poll_list, &list);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b0cce744e2a0..b64187b87773 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -347,8 +347,16 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
 }
 EXPORT_SYMBOL(build_skb);
 
+#define NAPI_SKB_CACHE_SIZE	64
+
+struct napi_alloc_cache {
+	struct page_frag_cache page;
+	size_t skb_count;
+	void *skb_cache[NAPI_SKB_CACHE_SIZE];
+};
+
 static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
-static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
+static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
 
 static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
@@ -378,9 +386,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
 
 static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
-	struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
 
-	return __alloc_page_frag(nc, fragsz, gfp_mask);
+	return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
 }
 
 void *napi_alloc_frag(unsigned int fragsz)
@@ -474,7 +482,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
 struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
 				 gfp_t gfp_mask)
 {
-	struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
 	struct sk_buff *skb;
 	void *data;
 
@@ -494,7 +502,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
 	if (sk_memalloc_socks())
 		gfp_mask |= __GFP_MEMALLOC;
 
-	data = __alloc_page_frag(nc, len, gfp_mask);
+	data = __alloc_page_frag(&nc->page, len, gfp_mask);
 	if (unlikely(!data))
 		return NULL;
 
@@ -505,7 +513,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
 	}
 
 	/* use OR instead of assignment to avoid clearing of bits in mask */
-	if (nc->pfmemalloc)
+	if (nc->page.pfmemalloc)
 		skb->pfmemalloc = 1;
 	skb->head_frag = 1;
 
@@ -747,6 +755,69 @@ void consume_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(consume_skb);
 
+void __kfree_skb_flush(void)
+{
+	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+
+	/* flush skb_cache if containing objects */
+	if (nc->skb_count) {
+		kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
+				     nc->skb_cache);
+		nc->skb_count = 0;
+	}
+}
+
+static void __kfree_skb_defer(struct sk_buff *skb)
+{
+	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+
+	/* drop skb->head and call any destructors for packet */
+	skb_release_all(skb);
+
+	/* record skb to CPU local list */
+	nc->skb_cache[nc->skb_count++] = skb;
+
+#ifdef CONFIG_SLUB
+	/* SLUB writes into objects when freeing */
+	prefetchw(skb);
+#endif
+
+	/* flush skb_cache if it is filled */
+	if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
+		kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
+				     nc->skb_cache);
+		nc->skb_count = 0;
+	}
+}
+
+void napi_consume_skb(struct sk_buff *skb, int budget)
+{
+	if (unlikely(!skb))
+		return;
+
+	/* if budget is 0 assume netpoll w/ IRQs disabled */
+	if (unlikely(!budget)) {
+		dev_consume_skb_irq(skb);
+		return;
+	}
+
+	if (likely(atomic_read(&skb->users) == 1))
+		smp_rmb();
+	else if (likely(!atomic_dec_and_test(&skb->users)))
+		return;
+	/* if reaching here SKB is ready to free */
+	trace_consume_skb(skb);
+
+	/* if SKB is a clone, don't handle this case */
+	if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
+		__kfree_skb(skb);
+		return;
+	}
+
+	__kfree_skb_defer(skb);
+}
+EXPORT_SYMBOL(napi_consume_skb);
+
 /* Make sure a field is enclosed inside headers_start/headers_end section */
 #define CHECK_SKB_FIELD(field) \
 	BUILD_BUG_ON(offsetof(struct sk_buff, field) <		\
-- 
cgit v1.2.3


From 15fad714be86eab13e7568fecaf475b2a9730d3e Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Mon, 8 Feb 2016 13:15:04 +0100
Subject: net: bulk free SKBs that were delay free'ed due to IRQ context

The network stack defers SKBs free, in-case free happens in IRQ or
when IRQs are disabled. This happens in __dev_kfree_skb_irq() that
writes SKBs that were free'ed during IRQ to the softirq completion
queue (softnet_data.completion_queue).

These SKBs are naturally delayed, and cleaned up during NET_TX_SOFTIRQ
in function net_tx_action().  Take advantage of this a use the skb
defer and flush API, as we are already in softirq context.

For modern drivers this rarely happens. Although most drivers do call
dev_kfree_skb_any(), which detects the situation and calls
__dev_kfree_skb_irq() when needed.  This due to netpoll can call from
IRQ context.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 1 +
 net/core/dev.c         | 8 +++++++-
 net/core/skbuff.c      | 8 ++++++--
 3 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b56c0103fa15..6ec86f1a2ed9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2407,6 +2407,7 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
 void napi_consume_skb(struct sk_buff *skb, int budget);
 
 void __kfree_skb_flush(void);
+void __kfree_skb_defer(struct sk_buff *skb);
 
 /**
  * __dev_alloc_pages - allocate page for network Rx
diff --git a/net/core/dev.c b/net/core/dev.c
index 9b2c7a999e71..3f4071a84a03 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3829,8 +3829,14 @@ static void net_tx_action(struct softirq_action *h)
 				trace_consume_skb(skb);
 			else
 				trace_kfree_skb(skb, net_tx_action);
-			__kfree_skb(skb);
+
+			if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
+				__kfree_skb(skb);
+			else
+				__kfree_skb_defer(skb);
 		}
+
+		__kfree_skb_flush();
 	}
 
 	if (sd->output_queue) {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b64187b87773..a5bd067ec1a3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -767,7 +767,7 @@ void __kfree_skb_flush(void)
 	}
 }
 
-static void __kfree_skb_defer(struct sk_buff *skb)
+static inline void _kfree_skb_defer(struct sk_buff *skb)
 {
 	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
 
@@ -789,6 +789,10 @@ static void __kfree_skb_defer(struct sk_buff *skb)
 		nc->skb_count = 0;
 	}
 }
+void __kfree_skb_defer(struct sk_buff *skb)
+{
+	_kfree_skb_defer(skb);
+}
 
 void napi_consume_skb(struct sk_buff *skb, int budget)
 {
@@ -814,7 +818,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
 		return;
 	}
 
-	__kfree_skb_defer(skb);
+	_kfree_skb_defer(skb);
 }
 EXPORT_SYMBOL(napi_consume_skb);
 
-- 
cgit v1.2.3


From afbc4f312b5e6e87fcd383eb6764e09f1324c78e Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 9 Feb 2016 13:21:06 +0100
Subject: gpio: move sysfs mock device to the gpio_device

Since gpio_device is the struct that survives if the backing
gpio_chip is removed, move the sysfs mock device to this state
container so it becomes part of the dangling state of the
GPIO device on removal.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-sysfs.c | 23 ++++++++++++-----------
 drivers/gpio/gpiolib.c       |  4 ++--
 drivers/gpio/gpiolib.h       | 11 +++++++----
 include/linux/gpio/driver.h  |  2 --
 4 files changed, 21 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
index 28d3bf2328aa..94ba4bb8b4f8 100644
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -572,7 +572,7 @@ int gpiod_export(struct gpio_desc *desc, bool direction_may_change)
 	mutex_lock(&sysfs_lock);
 
 	/* check if chip is being removed */
-	if (!chip || !chip->cdev) {
+	if (!chip || !gdev->mockdev) {
 		status = -ENODEV;
 		goto err_unlock;
 	}
@@ -718,9 +718,10 @@ err_unlock:
 }
 EXPORT_SYMBOL_GPL(gpiod_unexport);
 
-int gpiochip_sysfs_register(struct gpio_chip *chip)
+int gpiochip_sysfs_register(struct gpio_device *gdev)
 {
 	struct device	*dev;
+	struct gpio_chip *chip = gdev->chip;
 
 	/*
 	 * Many systems add gpio chips for SOC support very early,
@@ -732,7 +733,7 @@ int gpiochip_sysfs_register(struct gpio_chip *chip)
 		return 0;
 
 	/* use chip->base for the ID; it's already known to be unique */
-	dev = device_create_with_groups(&gpio_class, chip->parent,
+	dev = device_create_with_groups(&gpio_class, &gdev->dev,
 					MKDEV(0, 0),
 					chip, gpiochip_groups,
 					"gpiochip%d", chip->base);
@@ -740,25 +741,26 @@ int gpiochip_sysfs_register(struct gpio_chip *chip)
 		return PTR_ERR(dev);
 
 	mutex_lock(&sysfs_lock);
-	chip->cdev = dev;
+	gdev->mockdev = dev;
 	mutex_unlock(&sysfs_lock);
 
 	return 0;
 }
 
-void gpiochip_sysfs_unregister(struct gpio_chip *chip)
+void gpiochip_sysfs_unregister(struct gpio_device *gdev)
 {
 	struct gpio_desc *desc;
+	struct gpio_chip *chip = gdev->chip;
 	unsigned int i;
 
-	if (!chip->cdev)
+	if (!gdev->mockdev)
 		return;
 
-	device_unregister(chip->cdev);
+	device_unregister(gdev->mockdev);
 
 	/* prevent further gpiod exports */
 	mutex_lock(&sysfs_lock);
-	chip->cdev = NULL;
+	gdev->mockdev = NULL;
 	mutex_unlock(&sysfs_lock);
 
 	/* unregister gpiod class devices owned by sysfs */
@@ -787,7 +789,7 @@ static int __init gpiolib_sysfs_init(void)
 	 */
 	spin_lock_irqsave(&gpio_lock, flags);
 	list_for_each_entry(gdev, &gpio_devices, list) {
-		if (gdev->chip->cdev)
+		if (gdev->mockdev)
 			continue;
 
 		/*
@@ -800,12 +802,11 @@ static int __init gpiolib_sysfs_init(void)
 		 * gpio_lock prevents us from doing this.
 		 */
 		spin_unlock_irqrestore(&gpio_lock, flags);
-		status = gpiochip_sysfs_register(gdev->chip);
+		status = gpiochip_sysfs_register(gdev);
 		spin_lock_irqsave(&gpio_lock, flags);
 	}
 	spin_unlock_irqrestore(&gpio_lock, flags);
 
-
 	return status;
 }
 postcore_initcall(gpiolib_sysfs_init);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 36f8be3f910b..5763290f777c 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -558,7 +558,7 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 	if (status)
 		goto err_remove_chardev;
 
-	status = gpiochip_sysfs_register(chip);
+	status = gpiochip_sysfs_register(gdev);
 	if (status)
 		goto err_remove_device;
 
@@ -615,7 +615,7 @@ void gpiochip_remove(struct gpio_chip *chip)
 	gdev->chip = NULL;
 
 	/* FIXME: should the legacy sysfs handling be moved to gpio_device? */
-	gpiochip_sysfs_unregister(chip);
+	gpiochip_sysfs_unregister(gdev);
 	gpiochip_irqchip_remove(chip);
 	acpi_gpiochip_remove(chip);
 	gpiochip_remove_pin_ranges(chip);
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 1524ba0ca99d..c5a5b57463c7 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -27,6 +27,8 @@ struct acpi_device;
  * @id: numerical ID number for the GPIO chip
  * @dev: the GPIO device struct
  * @chrdev: character device for the GPIO device
+ * @mockdev: class device used by the deprecated sysfs interface (may be
+ * NULL)
  * @owner: helps prevent removal of modules exporting active GPIOs
  * @chip: pointer to the corresponding gpiochip, holding static
  * data for this device
@@ -41,6 +43,7 @@ struct gpio_device {
 	int			id;
 	struct device		dev;
 	struct cdev		chrdev;
+	struct device		*mockdev;
 	struct module		*owner;
 	struct gpio_chip	*chip;
 	struct list_head        list;
@@ -190,17 +193,17 @@ static int __maybe_unused gpio_chip_hwgpio(const struct gpio_desc *desc)
 
 #ifdef CONFIG_GPIO_SYSFS
 
-int gpiochip_sysfs_register(struct gpio_chip *chip);
-void gpiochip_sysfs_unregister(struct gpio_chip *chip);
+int gpiochip_sysfs_register(struct gpio_device *gdev);
+void gpiochip_sysfs_unregister(struct gpio_device *gdev);
 
 #else
 
-static inline int gpiochip_sysfs_register(struct gpio_chip *chip)
+static inline int gpiochip_sysfs_register(struct gpio_device *gdev)
 {
 	return 0;
 }
 
-static inline void gpiochip_sysfs_unregister(struct gpio_chip *chip)
+static inline void gpiochip_sysfs_unregister(struct gpio_device *gdev)
 {
 }
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index f3f1dbd43c9b..4db64ab534ef 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -24,7 +24,6 @@ struct gpio_device;
  * @label: for diagnostics
  * @gpiodev: the internal state holder, opaque struct
  * @parent: optional parent device providing the GPIOs
- * @cdev: class device used by sysfs interface (may be NULL)
  * @owner: helps prevent removal of modules exporting active GPIOs
  * @data: per-instance data assigned by the driver
  * @request: optional hook for chip-specific activation, such as
@@ -110,7 +109,6 @@ struct gpio_chip {
 	const char		*label;
 	struct gpio_device	*gpiodev;
 	struct device		*parent;
-	struct device		*cdev;
 	struct module		*owner;
 	void			*data;
 
-- 
cgit v1.2.3


From 1c3cdb186172ee3be24005a7ff0e849bc17b67b8 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 9 Feb 2016 13:51:59 +0100
Subject: gpio: move descriptors into gpio_device

We need gpio_device to hold the descriptors so that they can
be lifecycled with the struct gpio_device held from userspace.
Move the descriptor array into gpio_device. Also rename it from
"desc" (singularis) to "descs" (pluralis) to reflect the fact
that it is an array.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-sysfs.c |  2 +-
 drivers/gpio/gpiolib.c       | 53 ++++++++++++++++++--------------------------
 drivers/gpio/gpiolib.h       |  4 +++-
 include/linux/gpio/driver.h  |  2 --
 4 files changed, 26 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
index 94ba4bb8b4f8..de65633471af 100644
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -765,7 +765,7 @@ void gpiochip_sysfs_unregister(struct gpio_device *gdev)
 
 	/* unregister gpiod class devices owned by sysfs */
 	for (i = 0; i < chip->ngpio; i++) {
-		desc = &chip->desc[i];
+		desc = &chip->gpiodev->descs[i];
 		if (test_and_clear_bit(FLAG_SYSFS, &desc->flags))
 			gpiod_free(desc);
 	}
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 5763290f777c..f3fcd415a77b 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -88,7 +88,7 @@ struct gpio_desc *gpio_to_desc(unsigned gpio)
 		if (gdev->chip->base <= gpio &&
 		    gdev->chip->base + gdev->chip->ngpio > gpio) {
 			spin_unlock_irqrestore(&gpio_lock, flags);
-			return &gdev->chip->desc[gpio - gdev->chip->base];
+			return &gdev->descs[gpio - gdev->chip->base];
 		}
 	}
 
@@ -110,7 +110,7 @@ struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip,
 	if (hwnum >= chip->ngpio)
 		return ERR_PTR(-EINVAL);
 
-	return &chip->desc[hwnum];
+	return &chip->gpiodev->descs[hwnum];
 }
 
 /**
@@ -120,7 +120,7 @@ struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip,
  */
 int desc_to_gpio(const struct gpio_desc *desc)
 {
-	return desc->chip->base + (desc - &desc->chip->desc[0]);
+	return desc->chip->base + (desc - &desc->chip->gpiodev->descs[0]);
 }
 EXPORT_SYMBOL_GPL(desc_to_gpio);
 
@@ -277,7 +277,7 @@ static struct gpio_desc *gpio_name_to_desc(const char * const name)
 		int i;
 
 		for (i = 0; i != gdev->chip->ngpio; ++i) {
-			struct gpio_desc *gpio = &gdev->chip->desc[i];
+			struct gpio_desc *gpio = &gdev->descs[i];
 
 			if (!gpio->name || !name)
 				continue;
@@ -320,7 +320,7 @@ static int gpiochip_set_desc_names(struct gpio_chip *gc)
 
 	/* Then add all names to the GPIO descriptors */
 	for (i = 0; i != gc->ngpio; ++i)
-		gc->desc[i].name = gc->names[i];
+		gc->gpiodev->descs[i].name = gc->names[i];
 
 	return 0;
 }
@@ -431,7 +431,6 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 	int		status = 0;
 	unsigned	i;
 	int		base = chip->base;
-	struct gpio_desc *descs;
 	struct gpio_device *gdev;
 
 	/*
@@ -470,9 +469,9 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 	else
 		gdev->owner = THIS_MODULE;
 
-	/* FIXME: devm_kcalloc() these and move to gpio_device */
-	descs = kcalloc(chip->ngpio, sizeof(descs[0]), GFP_KERNEL);
-	if (!descs) {
+	gdev->descs = devm_kcalloc(&gdev->dev, chip->ngpio,
+				   sizeof(gdev->descs[0]), GFP_KERNEL);
+	if (!gdev->descs) {
 		status = -ENOMEM;
 		goto err_free_gdev;
 	}
@@ -483,7 +482,7 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 	if (chip->ngpio == 0) {
 		chip_err(chip, "tried to insert a GPIO chip with zero lines\n");
 		status = -EINVAL;
-		goto err_free_descs;
+		goto err_free_gdev;
 	}
 
 	spin_lock_irqsave(&gpio_lock, flags);
@@ -493,7 +492,7 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 		if (base < 0) {
 			status = base;
 			spin_unlock_irqrestore(&gpio_lock, flags);
-			goto err_free_descs;
+			goto err_free_gdev;
 		}
 		chip->base = base;
 	}
@@ -501,11 +500,11 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 	status = gpiodev_add_to_list(gdev);
 	if (status) {
 		spin_unlock_irqrestore(&gpio_lock, flags);
-		goto err_free_descs;
+		goto err_free_gdev;
 	}
 
 	for (i = 0; i < chip->ngpio; i++) {
-		struct gpio_desc *desc = &descs[i];
+		struct gpio_desc *desc = &gdev->descs[i];
 
 		/* REVISIT: maybe a pointer to gpio_device is better */
 		desc->chip = chip;
@@ -518,7 +517,6 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 		 */
 		desc->flags = !chip->direction_input ? (1 << FLAG_IS_OUT) : 0;
 	}
-	chip->desc = descs;
 
 	spin_unlock_irqrestore(&gpio_lock, flags);
 
@@ -583,9 +581,6 @@ err_remove_from_list:
 	spin_lock_irqsave(&gpio_lock, flags);
 	list_del(&gdev->list);
 	spin_unlock_irqrestore(&gpio_lock, flags);
-	chip->desc = NULL;
-err_free_descs:
-	kfree(descs);
 err_free_gdev:
 	ida_simple_remove(&gpio_ida, gdev->id);
 	kfree(gdev);
@@ -608,7 +603,7 @@ void gpiochip_remove(struct gpio_chip *chip)
 	struct gpio_device *gdev = chip->gpiodev;
 	struct gpio_desc *desc;
 	unsigned long	flags;
-	unsigned	id;
+	unsigned	i;
 	bool		requested = false;
 
 	/* Numb the device, cancelling all outstanding operations */
@@ -623,8 +618,8 @@ void gpiochip_remove(struct gpio_chip *chip)
 	of_gpiochip_remove(chip);
 
 	spin_lock_irqsave(&gpio_lock, flags);
-	for (id = 0; id < chip->ngpio; id++) {
-		desc = &chip->desc[id];
+	for (i = 0; i < chip->ngpio; i++) {
+		desc = &gdev->descs[i];
 		desc->chip = NULL;
 		if (test_bit(FLAG_REQUESTED, &desc->flags))
 			requested = true;
@@ -635,10 +630,6 @@ void gpiochip_remove(struct gpio_chip *chip)
 		dev_crit(&chip->gpiodev->dev,
 			 "REMOVING GPIOCHIP WITH GPIOS STILL REQUESTED\n");
 
-	/* FIXME: need to be moved to gpio_device and held there */
-	kfree(chip->desc);
-	chip->desc = NULL;
-
 	/*
 	 * The gpiochip side puts its use of the device to rest here:
 	 * if there are no userspace clients, the chardev and device will
@@ -1250,7 +1241,7 @@ const char *gpiochip_is_requested(struct gpio_chip *chip, unsigned offset)
 	if (offset >= chip->ngpio)
 		return NULL;
 
-	desc = &chip->desc[offset];
+	desc = &chip->gpiodev->descs[offset];
 
 	if (test_bit(FLAG_REQUESTED, &desc->flags) == 0)
 		return NULL;
@@ -1837,14 +1828,14 @@ int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
 	if (offset >= chip->ngpio)
 		return -EINVAL;
 
-	if (test_bit(FLAG_IS_OUT, &chip->desc[offset].flags)) {
+	if (test_bit(FLAG_IS_OUT, &chip->gpiodev->descs[offset].flags)) {
 		chip_err(chip,
 			  "%s: tried to flag a GPIO set as output for IRQ\n",
 			  __func__);
 		return -EIO;
 	}
 
-	set_bit(FLAG_USED_AS_IRQ, &chip->desc[offset].flags);
+	set_bit(FLAG_USED_AS_IRQ, &chip->gpiodev->descs[offset].flags);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(gpiochip_lock_as_irq);
@@ -1862,7 +1853,7 @@ void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
 	if (offset >= chip->ngpio)
 		return;
 
-	clear_bit(FLAG_USED_AS_IRQ, &chip->desc[offset].flags);
+	clear_bit(FLAG_USED_AS_IRQ, &chip->gpiodev->descs[offset].flags);
 }
 EXPORT_SYMBOL_GPL(gpiochip_unlock_as_irq);
 
@@ -2549,8 +2540,8 @@ static void gpiochip_free_hogs(struct gpio_chip *chip)
 	int id;
 
 	for (id = 0; id < chip->ngpio; id++) {
-		if (test_bit(FLAG_IS_HOGGED, &chip->desc[id].flags))
-			gpiochip_free_own_desc(&chip->desc[id]);
+		if (test_bit(FLAG_IS_HOGGED, &chip->gpiodev->descs[id].flags))
+			gpiochip_free_own_desc(&chip->gpiodev->descs[id]);
 	}
 }
 
@@ -2673,7 +2664,7 @@ static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 {
 	unsigned		i;
 	unsigned		gpio = chip->base;
-	struct gpio_desc	*gdesc = &chip->desc[0];
+	struct gpio_desc	*gdesc = &chip->gpiodev->descs[0];
 	int			is_out;
 	int			is_irq;
 
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index c5a5b57463c7..39b8301c98b6 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -32,6 +32,7 @@ struct acpi_device;
  * @owner: helps prevent removal of modules exporting active GPIOs
  * @chip: pointer to the corresponding gpiochip, holding static
  * data for this device
+ * @descs: array of ngpio descriptors.
  * @list: links gpio_device:s together for traversal
  *
  * This state container holds most of the runtime variable data
@@ -46,6 +47,7 @@ struct gpio_device {
 	struct device		*mockdev;
 	struct module		*owner;
 	struct gpio_chip	*chip;
+	struct gpio_desc	*descs;
 	struct list_head        list;
 };
 
@@ -152,7 +154,7 @@ int gpiod_hog(struct gpio_desc *desc, const char *name,
  */
 static int __maybe_unused gpio_chip_hwgpio(const struct gpio_desc *desc)
 {
-	return desc - &desc->chip->desc[0];
+	return desc - &desc->chip->gpiodev->descs[0];
 }
 
 /* With descriptor prefix */
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 4db64ab534ef..bfc842c2fc57 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -52,7 +52,6 @@ struct gpio_device;
  *	get rid of the static GPIO number space in the long run.
  * @ngpio: the number of GPIOs handled by this controller; the last GPIO
  *	handled is (base + ngpio - 1).
- * @desc: array of ngpio descriptors. Private.
  * @names: if set, must be an array of strings to use as alternative
  *      names for the GPIOs in this chip. Any entry in the array
  *      may be NULL if there is no alias for the GPIO, however the
@@ -140,7 +139,6 @@ struct gpio_chip {
 						struct gpio_chip *chip);
 	int			base;
 	u16			ngpio;
-	struct gpio_desc	*desc;
 	const char		*const *names;
 	bool			can_sleep;
 	bool			irq_not_threaded;
-- 
cgit v1.2.3


From 6cee3821e4e4bd6e6cdf0870b6c72d455460bd39 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 11 Feb 2016 20:16:45 +0100
Subject: gpio/pinctrl: sunxi: stop poking around in private vars

This kind of hacks disturbs the refactoring of the gpiolib.

The descriptor table belongs to the gpiolib, if we want to know
something about something in it, use or define the proper accessor
functions. Let's add this gpiochip_lins_is_irq() to do what the
sunxi driver is trying at so we can privatize the descriptors
properly.

Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c                | 9 +++++++++
 drivers/pinctrl/sunxi/pinctrl-sunxi.c | 4 ++--
 include/linux/gpio/driver.h           | 1 +
 3 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index f3fcd415a77b..ff8d55ad790c 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1857,6 +1857,15 @@ void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
 }
 EXPORT_SYMBOL_GPL(gpiochip_unlock_as_irq);
 
+bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset)
+{
+	if (offset >= chip->ngpio)
+		return false;
+
+	return test_bit(FLAG_USED_AS_IRQ, &chip->gpiodev->descs[offset].flags);
+}
+EXPORT_SYMBOL_GPL(gpiochip_line_is_irq);
+
 /**
  * gpiod_get_raw_value_cansleep() - return a gpio's raw value
  * @desc: gpio whose value will be returned
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index 7a2465f5e71e..3e95bfe66a06 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -457,8 +457,8 @@ static int sunxi_pinctrl_gpio_get(struct gpio_chip *chip, unsigned offset)
 	struct sunxi_pinctrl *pctl = gpiochip_get_data(chip);
 	u32 reg = sunxi_data_reg(offset);
 	u8 index = sunxi_data_offset(offset);
-	u32 set_mux = pctl->desc->irq_read_needs_mux &&
-			test_bit(FLAG_USED_AS_IRQ, &chip->desc[offset].flags);
+	bool set_mux = pctl->desc->irq_read_needs_mux &&
+		gpiochip_line_is_irq(chip, offset);
 	u32 val;
 
 	if (set_mux)
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index bfc842c2fc57..41c6144c473b 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -208,6 +208,7 @@ extern struct gpio_chip *gpiochip_find(void *data,
 /* lock/unlock as IRQ */
 int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
 void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
+bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset);
 
 /* get driver data */
 static inline void *gpiochip_get_data(struct gpio_chip *chip)
-- 
cgit v1.2.3


From e1379b56e9e88653fcb58cbaa71cd6b1cc304918 Mon Sep 17 00:00:00 2001
From: Cory Tusar <cory.tusar@pid1solutions.com>
Date: Wed, 10 Feb 2016 14:32:07 -0500
Subject: misc: eeprom_93xx46: Add quirks to support Atmel AT93C46D device.

Atmel devices in this family have some quirks not found in other similar
chips - they do not support a sequential read of the entire EEPROM
contents, and the control word sent at the start of each operation
varies in bit length.

This commit adds quirk support to the driver and modifies the read
implementation to support non-sequential reads for consistency with
other misc/eeprom drivers.

Tested on a custom Freescale VF610-based platform, with an AT93C46D
device attached via dspi2.  The spi-gpio driver was used to allow the
necessary non-byte-sized transfers.

Signed-off-by: Cory Tusar <cory.tusar@pid1solutions.com>
Tested-by: Chris Healy <chris.healy@zii.aero>
Reviewed-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/eeprom/eeprom_93xx46.c | 126 ++++++++++++++++++++++++++----------
 include/linux/eeprom_93xx46.h       |   6 ++
 2 files changed, 97 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c
index f0c30b366218..a3c3136abf3e 100644
--- a/drivers/misc/eeprom/eeprom_93xx46.c
+++ b/drivers/misc/eeprom/eeprom_93xx46.c
@@ -27,6 +27,15 @@
 #define ADDR_ERAL	0x20
 #define ADDR_EWEN	0x30
 
+struct eeprom_93xx46_devtype_data {
+	unsigned int quirks;
+};
+
+static const struct eeprom_93xx46_devtype_data atmel_at93c46d_data = {
+	.quirks = EEPROM_93XX46_QUIRK_SINGLE_WORD_READ |
+		  EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH,
+};
+
 struct eeprom_93xx46_dev {
 	struct spi_device *spi;
 	struct eeprom_93xx46_platform_data *pdata;
@@ -35,6 +44,16 @@ struct eeprom_93xx46_dev {
 	int addrlen;
 };
 
+static inline bool has_quirk_single_word_read(struct eeprom_93xx46_dev *edev)
+{
+	return edev->pdata->quirks & EEPROM_93XX46_QUIRK_SINGLE_WORD_READ;
+}
+
+static inline bool has_quirk_instruction_length(struct eeprom_93xx46_dev *edev)
+{
+	return edev->pdata->quirks & EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH;
+}
+
 static ssize_t
 eeprom_93xx46_bin_read(struct file *filp, struct kobject *kobj,
 		       struct bin_attribute *bin_attr,
@@ -42,58 +61,73 @@ eeprom_93xx46_bin_read(struct file *filp, struct kobject *kobj,
 {
 	struct eeprom_93xx46_dev *edev;
 	struct device *dev;
-	struct spi_message m;
-	struct spi_transfer t[2];
-	int bits, ret;
-	u16 cmd_addr;
+	ssize_t ret = 0;
 
 	dev = kobj_to_dev(kobj);
 	edev = dev_get_drvdata(dev);
 
-	cmd_addr = OP_READ << edev->addrlen;
+	mutex_lock(&edev->lock);
 
-	if (edev->addrlen == 7) {
-		cmd_addr |= off & 0x7f;
-		bits = 10;
-	} else {
-		cmd_addr |= (off >> 1) & 0x3f;
-		bits = 9;
-	}
+	if (edev->pdata->prepare)
+		edev->pdata->prepare(edev);
 
-	dev_dbg(&edev->spi->dev, "read cmd 0x%x, %d Hz\n",
-		cmd_addr, edev->spi->max_speed_hz);
+	while (count) {
+		struct spi_message m;
+		struct spi_transfer t[2] = { { 0 } };
+		u16 cmd_addr = OP_READ << edev->addrlen;
+		size_t nbytes = count;
+		int bits;
+		int err;
+
+		if (edev->addrlen == 7) {
+			cmd_addr |= off & 0x7f;
+			bits = 10;
+			if (has_quirk_single_word_read(edev))
+				nbytes = 1;
+		} else {
+			cmd_addr |= (off >> 1) & 0x3f;
+			bits = 9;
+			if (has_quirk_single_word_read(edev))
+				nbytes = 2;
+		}
 
-	spi_message_init(&m);
-	memset(t, 0, sizeof(t));
+		dev_dbg(&edev->spi->dev, "read cmd 0x%x, %d Hz\n",
+			cmd_addr, edev->spi->max_speed_hz);
 
-	t[0].tx_buf = (char *)&cmd_addr;
-	t[0].len = 2;
-	t[0].bits_per_word = bits;
-	spi_message_add_tail(&t[0], &m);
+		spi_message_init(&m);
 
-	t[1].rx_buf = buf;
-	t[1].len = count;
-	t[1].bits_per_word = 8;
-	spi_message_add_tail(&t[1], &m);
+		t[0].tx_buf = (char *)&cmd_addr;
+		t[0].len = 2;
+		t[0].bits_per_word = bits;
+		spi_message_add_tail(&t[0], &m);
 
-	mutex_lock(&edev->lock);
+		t[1].rx_buf = buf;
+		t[1].len = count;
+		t[1].bits_per_word = 8;
+		spi_message_add_tail(&t[1], &m);
 
-	if (edev->pdata->prepare)
-		edev->pdata->prepare(edev);
+		err = spi_sync(edev->spi, &m);
+		/* have to wait at least Tcsl ns */
+		ndelay(250);
 
-	ret = spi_sync(edev->spi, &m);
-	/* have to wait at least Tcsl ns */
-	ndelay(250);
-	if (ret) {
-		dev_err(&edev->spi->dev, "read %zu bytes at %d: err. %d\n",
-			count, (int)off, ret);
+		if (err) {
+			dev_err(&edev->spi->dev, "read %zu bytes at %d: err. %d\n",
+				nbytes, (int)off, err);
+			ret = err;
+			break;
+		}
+
+		buf += nbytes;
+		off += nbytes;
+		count -= nbytes;
+		ret += nbytes;
 	}
 
 	if (edev->pdata->finish)
 		edev->pdata->finish(edev);
 
 	mutex_unlock(&edev->lock);
-	return ret ? : count;
+	return ret;
 }
 
 static int eeprom_93xx46_ew(struct eeprom_93xx46_dev *edev, int is_on)
@@ -112,7 +146,13 @@ static int eeprom_93xx46_ew(struct eeprom_93xx46_dev *edev, int is_on)
 		bits = 9;
 	}
 
-	dev_dbg(&edev->spi->dev, "ew cmd 0x%04x\n", cmd_addr);
+	if (has_quirk_instruction_length(edev)) {
+		cmd_addr <<= 2;
+		bits += 2;
+	}
+
+	dev_dbg(&edev->spi->dev, "ew%s cmd 0x%04x, %d bits\n",
+			is_on ? "en" : "ds", cmd_addr, bits);
 
 	spi_message_init(&m);
 	memset(&t, 0, sizeof(t));
@@ -247,6 +287,13 @@ static int eeprom_93xx46_eral(struct eeprom_93xx46_dev *edev)
 		bits = 9;
 	}
 
+	if (has_quirk_instruction_length(edev)) {
+		cmd_addr <<= 2;
+		bits += 2;
+	}
+
+	dev_dbg(&edev->spi->dev, "eral cmd 0x%04x, %d bits\n", cmd_addr, bits);
+
 	spi_message_init(&m);
 	memset(&t, 0, sizeof(t));
 
@@ -298,12 +345,15 @@ static DEVICE_ATTR(erase, S_IWUSR, NULL, eeprom_93xx46_store_erase);
 
 static const struct of_device_id eeprom_93xx46_of_table[] = {
 	{ .compatible = "eeprom-93xx46", },
+	{ .compatible = "atmel,at93c46d", .data = &atmel_at93c46d_data, },
 	{}
 };
 MODULE_DEVICE_TABLE(of, eeprom_93xx46_of_table);
 
 static int eeprom_93xx46_probe_dt(struct spi_device *spi)
 {
+	const struct of_device_id *of_id =
+		of_match_device(eeprom_93xx46_of_table, &spi->dev);
 	struct device_node *np = spi->dev.of_node;
 	struct eeprom_93xx46_platform_data *pd;
 	u32 tmp;
@@ -331,6 +381,12 @@ static int eeprom_93xx46_probe_dt(struct spi_device *spi)
 	if (of_property_read_bool(np, "read-only"))
 		pd->flags |= EE_READONLY;
 
+	if (of_id->data) {
+		const struct eeprom_93xx46_devtype_data *data = of_id->data;
+
+		pd->quirks = data->quirks;
+	}
+
 	spi->dev.platform_data = pd;
 
 	return 0;
diff --git a/include/linux/eeprom_93xx46.h b/include/linux/eeprom_93xx46.h
index 06791811e49d..92fa4c37ac1f 100644
--- a/include/linux/eeprom_93xx46.h
+++ b/include/linux/eeprom_93xx46.h
@@ -9,6 +9,12 @@ struct eeprom_93xx46_platform_data {
 #define EE_ADDR16	0x02		/* 16 bit addr. cfg */
 #define EE_READONLY	0x08		/* forbid writing */
 
+	unsigned int	quirks;
+/* Single word read transfers only; no sequential read. */
+#define EEPROM_93XX46_QUIRK_SINGLE_WORD_READ		(1 << 0)
+/* Instructions such as EWEN are (addrlen + 2) in length. */
+#define EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH		(1 << 1)
+
 	/*
 	 * optional hooks to control additional logic
 	 * before and after spi transfer.
-- 
cgit v1.2.3


From 3ca9b1ac28398c6fe0bed335d2d71a35e1c5f7c9 Mon Sep 17 00:00:00 2001
From: Cory Tusar <cory.tusar@pid1solutions.com>
Date: Wed, 10 Feb 2016 14:32:08 -0500
Subject: misc: eeprom_93xx46: Add support for a GPIO 'select' line.

This commit adds support to the eeprom_93x46 driver allowing a GPIO line
to function as a 'select' or 'enable' signal prior to accessing the
EEPROM.

Signed-off-by: Cory Tusar <cory.tusar@pid1solutions.com>
Tested-by: Chris Healy <chris.healy@zii.aero>
Reviewed-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/eeprom/eeprom_93xx46.c | 35 +++++++++++++++++++++++++++++++++++
 include/linux/eeprom_93xx46.h       |  3 +++
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c
index a3c3136abf3e..f62ab29e293c 100644
--- a/drivers/misc/eeprom/eeprom_93xx46.c
+++ b/drivers/misc/eeprom/eeprom_93xx46.c
@@ -10,11 +10,13 @@
 
 #include <linux/delay.h>
 #include <linux/device.h>
+#include <linux/gpio/consumer.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/of_gpio.h>
 #include <linux/slab.h>
 #include <linux/spi/spi.h>
 #include <linux/sysfs.h>
@@ -343,6 +345,20 @@ static ssize_t eeprom_93xx46_store_erase(struct device *dev,
 }
 static DEVICE_ATTR(erase, S_IWUSR, NULL, eeprom_93xx46_store_erase);
 
+static void select_assert(void *context)
+{
+	struct eeprom_93xx46_dev *edev = context;
+
+	gpiod_set_value_cansleep(edev->pdata->select, 1);
+}
+
+static void select_deassert(void *context)
+{
+	struct eeprom_93xx46_dev *edev = context;
+
+	gpiod_set_value_cansleep(edev->pdata->select, 0);
+}
+
 static const struct of_device_id eeprom_93xx46_of_table[] = {
 	{ .compatible = "eeprom-93xx46", },
 	{ .compatible = "atmel,at93c46d", .data = &atmel_at93c46d_data, },
@@ -357,6 +373,8 @@ static int eeprom_93xx46_probe_dt(struct spi_device *spi)
 	struct device_node *np = spi->dev.of_node;
 	struct eeprom_93xx46_platform_data *pd;
 	u32 tmp;
+	int gpio;
+	enum of_gpio_flags of_flags;
 	int ret;
 
 	pd = devm_kzalloc(&spi->dev, sizeof(*pd), GFP_KERNEL);
@@ -381,6 +399,23 @@ static int eeprom_93xx46_probe_dt(struct spi_device *spi)
 	if (of_property_read_bool(np, "read-only"))
 		pd->flags |= EE_READONLY;
 
+	gpio = of_get_named_gpio_flags(np, "select-gpios", 0, &of_flags);
+	if (gpio_is_valid(gpio)) {
+		unsigned long flags =
+			of_flags == OF_GPIO_ACTIVE_LOW ? GPIOF_ACTIVE_LOW : 0;
+
+		ret = devm_gpio_request_one(&spi->dev, gpio, flags,
+					    "eeprom_93xx46_select");
+		if (ret)
+			return ret;
+
+		pd->select = gpio_to_desc(gpio);
+		pd->prepare = select_assert;
+		pd->finish = select_deassert;
+
+		gpiod_direction_output(pd->select, 0);
+	}
+
 	if (of_id->data) {
 		const struct eeprom_93xx46_devtype_data *data = of_id->data;
 
diff --git a/include/linux/eeprom_93xx46.h b/include/linux/eeprom_93xx46.h
index 92fa4c37ac1f..885f587a3555 100644
--- a/include/linux/eeprom_93xx46.h
+++ b/include/linux/eeprom_93xx46.h
@@ -3,6 +3,8 @@
  * platform description for 93xx46 EEPROMs.
  */
 
+struct gpio_desc;
+
 struct eeprom_93xx46_platform_data {
 	unsigned char	flags;
 #define EE_ADDR8	0x01		/*  8 bit addr. cfg */
@@ -21,4 +23,5 @@ struct eeprom_93xx46_platform_data {
 	 */
 	void (*prepare)(void *);
 	void (*finish)(void *);
+	struct gpio_desc *select;
 };
-- 
cgit v1.2.3


From 4fd411514291ae75053003e33a6a4a56f97467d0 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Fri, 12 Feb 2016 10:02:42 +0800
Subject: mfd: axp20x: Split the driver into core and i2c bits

The axp20x driver assumes the device is i2c based. This is not the
case with later chips, which use a proprietary 2 wire serial bus
by Allwinner called "Reduced Serial Bus".

This patch follows the example of mfd/wm831x and splits it into
an interface independent core, and an i2c specific glue layer.
MFD_AXP20X and the new MFD_AXP20X_I2C are changed to tristate
symbols, allowing the driver to be built as modules.

Whitespace and other style errors in the moved i2c specific code
have been fixed. Included but unused header files are removed as
well.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig        |  14 +++---
 drivers/mfd/Makefile       |   1 +
 drivers/mfd/axp20x-i2c.c   | 104 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/axp20x.c       |  90 +++++++--------------------------------
 include/linux/mfd/axp20x.h |  33 +++++++++++++-
 5 files changed, 161 insertions(+), 81 deletions(-)
 create mode 100644 drivers/mfd/axp20x-i2c.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 9ca66de0c1c1..0037b9c933d9 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -91,14 +91,18 @@ config MFD_BCM590XX
 	  Support for the BCM590xx PMUs from Broadcom
 
 config MFD_AXP20X
-	bool "X-Powers AXP20X"
+	tristate
 	select MFD_CORE
-	select REGMAP_I2C
 	select REGMAP_IRQ
-	depends on I2C=y
+
+config MFD_AXP20X_I2C
+	tristate "X-Powers AXP series PMICs with I2C"
+	select MFD_AXP20X
+	select REGMAP_I2C
+	depends on I2C
 	help
-	  If you say Y here you get support for the X-Powers AXP202, AXP209 and
-	  AXP288 power management IC (PMIC).
+	  If you say Y here you get support for the X-Powers AXP series power
+	  management ICs (PMICs) controlled with I2C.
 	  This driver include only the core APIs. You have to select individual
 	  components like regulators or the PEK (Power Enable Key) under the
 	  corresponding menus.
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 0f230a6103f8..dba4f99d9044 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -111,6 +111,7 @@ obj-$(CONFIG_PMIC_DA9052)	+= da9052-core.o
 obj-$(CONFIG_MFD_DA9052_SPI)	+= da9052-spi.o
 obj-$(CONFIG_MFD_DA9052_I2C)	+= da9052-i2c.o
 obj-$(CONFIG_MFD_AXP20X)	+= axp20x.o
+obj-$(CONFIG_MFD_AXP20X_I2C)	+= axp20x-i2c.o
 
 obj-$(CONFIG_MFD_LP3943)	+= lp3943.o
 obj-$(CONFIG_MFD_LP8788)	+= lp8788.o lp8788-irq.o
diff --git a/drivers/mfd/axp20x-i2c.c b/drivers/mfd/axp20x-i2c.c
new file mode 100644
index 000000000000..b1b865822c07
--- /dev/null
+++ b/drivers/mfd/axp20x-i2c.c
@@ -0,0 +1,104 @@
+/*
+ * I2C driver for the X-Powers' Power Management ICs
+ *
+ * AXP20x typically comprises an adaptive USB-Compatible PWM charger, BUCK DC-DC
+ * converters, LDOs, multiple 12-bit ADCs of voltage, current and temperature
+ * as well as configurable GPIOs.
+ *
+ * This driver supports the I2C variants.
+ *
+ * Copyright (C) 2014 Carlo Caione
+ *
+ * Author: Carlo Caione <carlo@caione.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/mfd/axp20x.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+static int axp20x_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct axp20x_dev *axp20x;
+	int ret;
+
+	axp20x = devm_kzalloc(&i2c->dev, sizeof(*axp20x), GFP_KERNEL);
+	if (!axp20x)
+		return -ENOMEM;
+
+	axp20x->dev = &i2c->dev;
+	axp20x->irq = i2c->irq;
+	dev_set_drvdata(axp20x->dev, axp20x);
+
+	ret = axp20x_match_device(axp20x);
+	if (ret)
+		return ret;
+
+	axp20x->regmap = devm_regmap_init_i2c(i2c, axp20x->regmap_cfg);
+	if (IS_ERR(axp20x->regmap)) {
+		ret = PTR_ERR(axp20x->regmap);
+		dev_err(&i2c->dev, "regmap init failed: %d\n", ret);
+		return ret;
+	}
+
+	return axp20x_device_probe(axp20x);
+}
+
+static int axp20x_i2c_remove(struct i2c_client *i2c)
+{
+	struct axp20x_dev *axp20x = i2c_get_clientdata(i2c);
+
+	return axp20x_device_remove(axp20x);
+}
+
+static const struct of_device_id axp20x_i2c_of_match[] = {
+	{ .compatible = "x-powers,axp152", .data = (void *)AXP152_ID },
+	{ .compatible = "x-powers,axp202", .data = (void *)AXP202_ID },
+	{ .compatible = "x-powers,axp209", .data = (void *)AXP209_ID },
+	{ .compatible = "x-powers,axp221", .data = (void *)AXP221_ID },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, axp20x_i2c_of_match);
+
+/*
+ * This is useless for OF-enabled devices, but it is needed by I2C subsystem
+ */
+static const struct i2c_device_id axp20x_i2c_id[] = {
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, axp20x_i2c_id);
+
+static const struct acpi_device_id axp20x_i2c_acpi_match[] = {
+	{
+		.id = "INT33F4",
+		.driver_data = AXP288_ID,
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, axp20x_i2c_acpi_match);
+
+static struct i2c_driver axp20x_i2c_driver = {
+	.driver = {
+		.name	= "axp20x-i2c",
+		.of_match_table	= of_match_ptr(axp20x_i2c_of_match),
+		.acpi_match_table = ACPI_PTR(axp20x_i2c_acpi_match),
+	},
+	.probe		= axp20x_i2c_probe,
+	.remove		= axp20x_i2c_remove,
+	.id_table	= axp20x_i2c_id,
+};
+
+module_i2c_driver(axp20x_i2c_driver);
+
+MODULE_DESCRIPTION("PMIC MFD I2C driver for AXP20X");
+MODULE_AUTHOR("Carlo Caione <carlo@caione.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c
index cec51e689d1d..8e569bcfe3bc 100644
--- a/drivers/mfd/axp20x.c
+++ b/drivers/mfd/axp20x.c
@@ -1,10 +1,12 @@
 /*
- * axp20x.c - MFD core driver for the X-Powers' Power Management ICs
+ * MFD core driver for the X-Powers' Power Management ICs
  *
  * AXP20x typically comprises an adaptive USB-Compatible PWM charger, BUCK DC-DC
  * converters, LDOs, multiple 12-bit ADCs of voltage, current and temperature
  * as well as configurable GPIOs.
  *
+ * This file contains the interface independent core functions.
+ *
  * Copyright (C) 2014 Carlo Caione
  *
  * Author: Carlo Caione <carlo@caione.org>
@@ -15,18 +17,15 @@
  */
 
 #include <linux/err.h>
-#include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
-#include <linux/slab.h>
 #include <linux/regulator/consumer.h>
 #include <linux/mfd/axp20x.h>
 #include <linux/mfd/core.h>
 #include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/acpi.h>
 
 #define AXP20X_OFF	0x80
@@ -378,32 +377,6 @@ static const struct regmap_irq axp288_regmap_irqs[] = {
 	INIT_REGMAP_IRQ(AXP288, BC_USB_CHNG,            5, 1),
 };
 
-static const struct of_device_id axp20x_of_match[] = {
-	{ .compatible = "x-powers,axp152", .data = (void *) AXP152_ID },
-	{ .compatible = "x-powers,axp202", .data = (void *) AXP202_ID },
-	{ .compatible = "x-powers,axp209", .data = (void *) AXP209_ID },
-	{ .compatible = "x-powers,axp221", .data = (void *) AXP221_ID },
-	{ },
-};
-MODULE_DEVICE_TABLE(of, axp20x_of_match);
-
-/*
- * This is useless for OF-enabled devices, but it is needed by I2C subsystem
- */
-static const struct i2c_device_id axp20x_i2c_id[] = {
-	{ },
-};
-MODULE_DEVICE_TABLE(i2c, axp20x_i2c_id);
-
-static const struct acpi_device_id axp20x_acpi_match[] = {
-	{
-		.id = "INT33F4",
-		.driver_data = AXP288_ID,
-	},
-	{ },
-};
-MODULE_DEVICE_TABLE(acpi, axp20x_acpi_match);
-
 static const struct regmap_irq_chip axp152_regmap_irq_chip = {
 	.name			= "axp152_irq_chip",
 	.status_base		= AXP152_IRQ1_STATE,
@@ -608,7 +581,7 @@ static void axp20x_power_off(void)
 		     AXP20X_OFF);
 }
 
-static int axp20x_match_device(struct axp20x_dev *axp20x)
+int axp20x_match_device(struct axp20x_dev *axp20x)
 {
 	struct device *dev = axp20x->dev;
 	const struct acpi_device_id *acpi_id;
@@ -665,38 +638,18 @@ static int axp20x_match_device(struct axp20x_dev *axp20x)
 
 	return 0;
 }
+EXPORT_SYMBOL(axp20x_match_device);
 
-static int axp20x_i2c_probe(struct i2c_client *i2c,
-			 const struct i2c_device_id *id)
+int axp20x_device_probe(struct axp20x_dev *axp20x)
 {
-	struct axp20x_dev *axp20x;
 	int ret;
 
-	axp20x = devm_kzalloc(&i2c->dev, sizeof(*axp20x), GFP_KERNEL);
-	if (!axp20x)
-		return -ENOMEM;
-
-	axp20x->i2c_client = i2c;
-	axp20x->dev = &i2c->dev;
-	dev_set_drvdata(axp20x->dev, axp20x);
-
-	ret = axp20x_match_device(axp20x);
-	if (ret)
-		return ret;
-
-	axp20x->regmap = devm_regmap_init_i2c(i2c, axp20x->regmap_cfg);
-	if (IS_ERR(axp20x->regmap)) {
-		ret = PTR_ERR(axp20x->regmap);
-		dev_err(&i2c->dev, "regmap init failed: %d\n", ret);
-		return ret;
-	}
-
-	ret = regmap_add_irq_chip(axp20x->regmap, i2c->irq,
+	ret = regmap_add_irq_chip(axp20x->regmap, axp20x->irq,
 				  IRQF_ONESHOT | IRQF_SHARED, -1,
 				  axp20x->regmap_irq_chip,
 				  &axp20x->regmap_irqc);
 	if (ret) {
-		dev_err(&i2c->dev, "failed to add irq chip: %d\n", ret);
+		dev_err(axp20x->dev, "failed to add irq chip: %d\n", ret);
 		return ret;
 	}
 
@@ -704,8 +657,8 @@ static int axp20x_i2c_probe(struct i2c_client *i2c,
 			axp20x->nr_cells, NULL, 0, NULL);
 
 	if (ret) {
-		dev_err(&i2c->dev, "failed to add MFD devices: %d\n", ret);
-		regmap_del_irq_chip(i2c->irq, axp20x->regmap_irqc);
+		dev_err(axp20x->dev, "failed to add MFD devices: %d\n", ret);
+		regmap_del_irq_chip(axp20x->irq, axp20x->regmap_irqc);
 		return ret;
 	}
 
@@ -714,38 +667,25 @@ static int axp20x_i2c_probe(struct i2c_client *i2c,
 		pm_power_off = axp20x_power_off;
 	}
 
-	dev_info(&i2c->dev, "AXP20X driver loaded\n");
+	dev_info(axp20x->dev, "AXP20X driver loaded\n");
 
 	return 0;
 }
+EXPORT_SYMBOL(axp20x_device_probe);
 
-static int axp20x_i2c_remove(struct i2c_client *i2c)
+int axp20x_device_remove(struct axp20x_dev *axp20x)
 {
-	struct axp20x_dev *axp20x = i2c_get_clientdata(i2c);
-
 	if (axp20x == axp20x_pm_power_off) {
 		axp20x_pm_power_off = NULL;
 		pm_power_off = NULL;
 	}
 
 	mfd_remove_devices(axp20x->dev);
-	regmap_del_irq_chip(axp20x->i2c_client->irq, axp20x->regmap_irqc);
+	regmap_del_irq_chip(axp20x->irq, axp20x->regmap_irqc);
 
 	return 0;
 }
-
-static struct i2c_driver axp20x_i2c_driver = {
-	.driver = {
-		.name	= "axp20x",
-		.of_match_table	= of_match_ptr(axp20x_of_match),
-		.acpi_match_table = ACPI_PTR(axp20x_acpi_match),
-	},
-	.probe		= axp20x_i2c_probe,
-	.remove		= axp20x_i2c_remove,
-	.id_table	= axp20x_i2c_id,
-};
-
-module_i2c_driver(axp20x_i2c_driver);
+EXPORT_SYMBOL(axp20x_device_remove);
 
 MODULE_DESCRIPTION("PMIC MFD core driver for AXP20X");
 MODULE_AUTHOR("Carlo Caione <carlo@caione.org>");
diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index b24c771cebd5..00697c6ad8b0 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -396,7 +396,7 @@ enum axp288_irqs {
 
 struct axp20x_dev {
 	struct device			*dev;
-	struct i2c_client		*i2c_client;
+	int				irq;
 	struct regmap			*regmap;
 	struct regmap_irq_chip_data	*regmap_irqc;
 	long				variant;
@@ -462,4 +462,35 @@ static inline int axp20x_read_variable_width(struct regmap *regmap,
 	return result;
 }
 
+/**
+ * axp20x_match_device(): Setup axp20x variant related fields
+ *
+ * @axp20x: axp20x device to setup (.dev field must be set)
+ * @dev: device associated with this axp20x device
+ *
+ * This lets the axp20x core configure the mfd cells and register maps
+ * for later use.
+ */
+int axp20x_match_device(struct axp20x_dev *axp20x);
+
+/**
+ * axp20x_device_probe(): Probe a configured axp20x device
+ *
+ * @axp20x: axp20x device to probe (must be configured)
+ *
+ * This function lets the axp20x core register the axp20x mfd devices
+ * and irqchip. The axp20x device passed in must be fully configured
+ * with axp20x_match_device, its irq set, and regmap created.
+ */
+int axp20x_device_probe(struct axp20x_dev *axp20x);
+
+/**
+ * axp20x_device_probe(): Remove a axp20x device
+ *
+ * @axp20x: axp20x device to remove
+ *
+ * This tells the axp20x core to remove the associated mfd devices
+ */
+int axp20x_device_remove(struct axp20x_dev *axp20x);
+
 #endif /* __LINUX_MFD_AXP20X_H */
-- 
cgit v1.2.3


From 02071f0f797c989b342f46fbdf472ddb1c2cdee9 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Fri, 12 Feb 2016 10:02:44 +0800
Subject: mfd: axp20x: Add support for RSB based AXP223 PMIC

The AXP223 is a new PMIC commonly paired with Allwinner A23/A33 SoCs.
It is functionally identical to AXP221; only the regulator default
voltage/status and the external host interface are different.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig        | 11 +++++++
 drivers/mfd/Makefile       |  1 +
 drivers/mfd/axp20x-rsb.c   | 80 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/axp20x.c       |  2 ++
 include/linux/mfd/axp20x.h |  1 +
 5 files changed, 95 insertions(+)
 create mode 100644 drivers/mfd/axp20x-rsb.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 0037b9c933d9..ae3990b5a2bf 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -107,6 +107,17 @@ config MFD_AXP20X_I2C
 	  components like regulators or the PEK (Power Enable Key) under the
 	  corresponding menus.
 
+config MFD_AXP20X_RSB
+	tristate "X-Powers AXP series PMICs with RSB"
+	select MFD_AXP20X
+	depends on SUNXI_RSB
+	help
+	  If you say Y here you get support for the X-Powers AXP series power
+	  management ICs (PMICs) controlled with RSB.
+	  This driver include only the core APIs. You have to select individual
+	  components like regulators or the PEK (Power Enable Key) under the
+	  corresponding menus.
+
 config MFD_CROS_EC
 	tristate "ChromeOS Embedded Controller"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index dba4f99d9044..c69ea744fd1a 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -112,6 +112,7 @@ obj-$(CONFIG_MFD_DA9052_SPI)	+= da9052-spi.o
 obj-$(CONFIG_MFD_DA9052_I2C)	+= da9052-i2c.o
 obj-$(CONFIG_MFD_AXP20X)	+= axp20x.o
 obj-$(CONFIG_MFD_AXP20X_I2C)	+= axp20x-i2c.o
+obj-$(CONFIG_MFD_AXP20X_RSB)	+= axp20x-rsb.o
 
 obj-$(CONFIG_MFD_LP3943)	+= lp3943.o
 obj-$(CONFIG_MFD_LP8788)	+= lp8788.o lp8788-irq.o
diff --git a/drivers/mfd/axp20x-rsb.c b/drivers/mfd/axp20x-rsb.c
new file mode 100644
index 000000000000..28c20247c112
--- /dev/null
+++ b/drivers/mfd/axp20x-rsb.c
@@ -0,0 +1,80 @@
+/*
+ * RSB driver for the X-Powers' Power Management ICs
+ *
+ * AXP20x typically comprises an adaptive USB-Compatible PWM charger, BUCK DC-DC
+ * converters, LDOs, multiple 12-bit ADCs of voltage, current and temperature
+ * as well as configurable GPIOs.
+ *
+ * This driver supports the RSB variants.
+ *
+ * Copyright (C) 2015 Chen-Yu Tsai
+ *
+ * Author: Chen-Yu Tsai <wens@csie.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/err.h>
+#include <linux/mfd/axp20x.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/sunxi-rsb.h>
+
+static int axp20x_rsb_probe(struct sunxi_rsb_device *rdev)
+{
+	struct axp20x_dev *axp20x;
+	int ret;
+
+	axp20x = devm_kzalloc(&rdev->dev, sizeof(*axp20x), GFP_KERNEL);
+	if (!axp20x)
+		return -ENOMEM;
+
+	axp20x->dev = &rdev->dev;
+	axp20x->irq = rdev->irq;
+	dev_set_drvdata(&rdev->dev, axp20x);
+
+	ret = axp20x_match_device(axp20x);
+	if (ret)
+		return ret;
+
+	axp20x->regmap = devm_regmap_init_sunxi_rsb(rdev, axp20x->regmap_cfg);
+	if (IS_ERR(axp20x->regmap)) {
+		ret = PTR_ERR(axp20x->regmap);
+		dev_err(&rdev->dev, "regmap init failed: %d\n", ret);
+		return ret;
+	}
+
+	return axp20x_device_probe(axp20x);
+}
+
+static int axp20x_rsb_remove(struct sunxi_rsb_device *rdev)
+{
+	struct axp20x_dev *axp20x = sunxi_rsb_device_get_drvdata(rdev);
+
+	return axp20x_device_remove(axp20x);
+}
+
+static const struct of_device_id axp20x_rsb_of_match[] = {
+	{ .compatible = "x-powers,axp223", .data = (void *)AXP223_ID },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, axp20x_rsb_of_match);
+
+static struct sunxi_rsb_driver axp20x_rsb_driver = {
+	.driver = {
+		.name	= "axp20x-rsb",
+		.of_match_table	= of_match_ptr(axp20x_rsb_of_match),
+	},
+	.probe	= axp20x_rsb_probe,
+	.remove	= axp20x_rsb_remove,
+};
+module_sunxi_rsb_driver(axp20x_rsb_driver);
+
+MODULE_DESCRIPTION("PMIC MFD sunXi RSB driver for AXP20X");
+MODULE_AUTHOR("Chen-Yu Tsai <wens@csie.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c
index 3054ea4b95e8..a57d6e940610 100644
--- a/drivers/mfd/axp20x.c
+++ b/drivers/mfd/axp20x.c
@@ -35,6 +35,7 @@ static const char * const axp20x_model_names[] = {
 	"AXP202",
 	"AXP209",
 	"AXP221",
+	"AXP223",
 	"AXP288",
 };
 
@@ -618,6 +619,7 @@ int axp20x_match_device(struct axp20x_dev *axp20x)
 		axp20x->regmap_irq_chip = &axp20x_regmap_irq_chip;
 		break;
 	case AXP221_ID:
+	case AXP223_ID:
 		axp20x->nr_cells = ARRAY_SIZE(axp22x_cells);
 		axp20x->cells = axp22x_cells;
 		axp20x->regmap_cfg = &axp22x_regmap_config;
diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index 00697c6ad8b0..d82e7d51372b 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -18,6 +18,7 @@ enum {
 	AXP202_ID,
 	AXP209_ID,
 	AXP221_ID,
+	AXP223_ID,
 	AXP288_ID,
 	NR_AXP20X_VARIANTS,
 };
-- 
cgit v1.2.3


From 179bc67f69b6cb53ad68cfdec5a917c2a2248355 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Thu, 11 Feb 2016 20:48:04 +0000
Subject: net: local checksum offload for encapsulation

The arithmetic properties of the ones-complement checksum mean that a
 correctly checksummed inner packet, including its checksum, has a ones
 complement sum depending only on whatever value was used to initialise
 the checksum field before checksumming (in the case of TCP and UDP,
 this is the ones complement sum of the pseudo header, complemented).
Consequently, if we are going to offload the inner checksum with
 CHECKSUM_PARTIAL, we can compute the outer checksum based only on the
 packed data not covered by the inner checksum, and the initial value of
 the inner checksum field.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h    | 24 ++++++++++++++++++++++++
 net/ipv4/ip_tunnel_core.c | 10 +++++-----
 net/ipv4/udp.c            | 20 ++++++++++----------
 net/ipv6/ip6_checksum.c   | 14 +++++++-------
 4 files changed, 46 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6ec86f1a2ed9..cf906d1ce8a7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3702,5 +3702,29 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
 	return hdr_len + skb_gso_transport_seglen(skb);
 }
 
+/* Local Checksum Offload.
+ * Compute outer checksum based on the assumption that the
+ * inner checksum will be offloaded later.
+ * Fill in outer checksum adjustment (e.g. with sum of outer
+ * pseudo-header) before calling.
+ * Also ensure that inner checksum is in linear data area.
+ */
+static inline __wsum lco_csum(struct sk_buff *skb)
+{
+	char *inner_csum_field;
+	__wsum csum;
+
+	/* Start with complement of inner checksum adjustment */
+	inner_csum_field = skb->data + skb_checksum_start_offset(skb) +
+				skb->csum_offset;
+	csum = ~csum_unfold(*(__force __sum16 *)inner_csum_field);
+	/* Add in checksum of our headers (incl. outer checksum
+	 * adjustment filled in by caller)
+	 */
+	csum = skb_checksum(skb, 0, skb_checksum_start_offset(skb), csum);
+	/* The result is the checksum from skb->data to end of packet */
+	return csum;
+}
+
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 859d415c0b2d..d74ce93de1fe 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -166,20 +166,20 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
 		return skb;
 	}
 
-	/* If packet is not gso and we are resolving any partial checksum,
+	/* If packet is not gso and we are not offloading inner checksum,
 	 * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
 	 * on the outer header without confusing devices that implement
 	 * NETIF_F_IP_CSUM with encapsulation.
 	 */
-	if (csum_help)
-		skb->encapsulation = 0;
-
 	if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
+		skb->encapsulation = 0;
 		err = skb_checksum_help(skb);
 		if (unlikely(err))
 			goto error;
-	} else if (skb->ip_summed != CHECKSUM_PARTIAL)
+	} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
 		skb->ip_summed = CHECKSUM_NONE;
+		skb->encapsulation = 0;
+	}
 
 	return skb;
 error:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ac3cedb25a9f..a59341cf483e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -848,16 +848,18 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
 {
 	struct udphdr *uh = udp_hdr(skb);
 
-	if (nocheck)
+	if (nocheck) {
 		uh->check = 0;
-	else if (skb_is_gso(skb))
+	} else if (skb_is_gso(skb)) {
 		uh->check = ~udp_v4_check(len, saddr, daddr, 0);
-	else if (skb_dst(skb) && skb_dst(skb)->dev &&
-		 (skb_dst(skb)->dev->features &
-		  (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
-
-		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		uh->check = 0;
+		uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb));
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+	} else if (skb_dst(skb) && skb_dst(skb)->dev &&
+		   (skb_dst(skb)->dev->features &
+		    (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct udphdr, check);
@@ -865,8 +867,6 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
 	} else {
 		__wsum csum;
 
-		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
 		uh->check = 0;
 		csum = skb_checksum(skb, 0, len, 0);
 		uh->check = udp_v4_check(len, saddr, daddr, csum);
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 9a4d7322fb22..4924bd704e89 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -98,11 +98,13 @@ void udp6_set_csum(bool nocheck, struct sk_buff *skb,
 		uh->check = 0;
 	else if (skb_is_gso(skb))
 		uh->check = ~udp_v6_check(len, saddr, daddr, 0);
-	else if (skb_dst(skb) && skb_dst(skb)->dev &&
-		 (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
-
-		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
+	else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		uh->check = 0;
+		uh->check = udp_v6_check(len, saddr, daddr, lco_csum(skb));
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+	} else if (skb_dst(skb) && skb_dst(skb)->dev &&
+		   (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct udphdr, check);
@@ -110,8 +112,6 @@ void udp6_set_csum(bool nocheck, struct sk_buff *skb,
 	} else {
 		__wsum csum;
 
-		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
-
 		uh->check = 0;
 		csum = skb_checksum(skb, 0, len, 0);
 		uh->check = udp_v6_check(len, saddr, daddr, csum);
-- 
cgit v1.2.3


From e8ae7b000e64cf76283c72cae5e3ecd246618ef4 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Thu, 11 Feb 2016 21:03:37 +0000
Subject: Documentation/networking: add checksum-offloads.txt to explain LCO

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/00-INDEX              |   2 +
 Documentation/networking/checksum-offloads.txt | 119 +++++++++++++++++++++++++
 include/linux/skbuff.h                         |   2 +
 3 files changed, 123 insertions(+)
 create mode 100644 Documentation/networking/checksum-offloads.txt

(limited to 'include/linux')

diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index df27a1a50776..415154a487d0 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -44,6 +44,8 @@ can.txt
 	- documentation on CAN protocol family.
 cdc_mbim.txt
 	- 3G/LTE USB modem (Mobile Broadband Interface Model)
+checksum-offloads.txt
+	- Explanation of checksum offloads; LCO, RCO
 cops.txt
 	- info on the COPS LocalTalk Linux driver
 cs89x0.txt
diff --git a/Documentation/networking/checksum-offloads.txt b/Documentation/networking/checksum-offloads.txt
new file mode 100644
index 000000000000..de2a327766a7
--- /dev/null
+++ b/Documentation/networking/checksum-offloads.txt
@@ -0,0 +1,119 @@
+Checksum Offloads in the Linux Networking Stack
+
+
+Introduction
+============
+
+This document describes a set of techniques in the Linux networking stack
+ to take advantage of checksum offload capabilities of various NICs.
+
+The following technologies are described:
+ * TX Checksum Offload
+ * LCO: Local Checksum Offload
+ * RCO: Remote Checksum Offload
+
+Things that should be documented here but aren't yet:
+ * RX Checksum Offload
+ * CHECKSUM_UNNECESSARY conversion
+
+
+TX Checksum Offload
+===================
+
+The interface for offloading a transmit checksum to a device is explained
+ in detail in comments near the top of include/linux/skbuff.h.
+In brief, it allows to request the device fill in a single ones-complement
+ checksum defined by the sk_buff fields skb->csum_start and
+ skb->csum_offset.  The device should compute the 16-bit ones-complement
+ checksum (i.e. the 'IP-style' checksum) from csum_start to the end of the
+ packet, and fill in the result at (csum_start + csum_offset).
+Because csum_offset cannot be negative, this ensures that the previous
+ value of the checksum field is included in the checksum computation, thus
+ it can be used to supply any needed corrections to the checksum (such as
+ the sum of the pseudo-header for UDP or TCP).
+This interface only allows a single checksum to be offloaded.  Where
+ encapsulation is used, the packet may have multiple checksum fields in
+ different header layers, and the rest will have to be handled by another
+ mechanism such as LCO or RCO.
+No offloading of the IP header checksum is performed; it is always done in
+ software.  This is OK because when we build the IP header, we obviously
+ have it in cache, so summing it isn't expensive.  It's also rather short.
+The requirements for GSO are more complicated, because when segmenting an
+ encapsulated packet both the inner and outer checksums may need to be
+ edited or recomputed for each resulting segment.  See the skbuff.h comment
+ (section 'E') for more details.
+
+A driver declares its offload capabilities in netdev->hw_features; see
+ Documentation/networking/netdev-features for more.  Note that a device
+ which only advertises NETIF_F_IP[V6]_CSUM must still obey the csum_start
+ and csum_offset given in the SKB; if it tries to deduce these itself in
+ hardware (as some NICs do) the driver should check that the values in the
+ SKB match those which the hardware will deduce, and if not, fall back to
+ checksumming in software instead (with skb_checksum_help or one of the
+ skb_csum_off_chk* functions as mentioned in include/linux/skbuff.h).  This
+ is a pain, but that's what you get when hardware tries to be clever.
+
+The stack should, for the most part, assume that checksum offload is
+ supported by the underlying device.  The only place that should check is
+ validate_xmit_skb(), and the functions it calls directly or indirectly.
+ That function compares the offload features requested by the SKB (which
+ may include other offloads besides TX Checksum Offload) and, if they are
+ not supported or enabled on the device (determined by netdev->features),
+ performs the corresponding offload in software.  In the case of TX
+ Checksum Offload, that means calling skb_checksum_help(skb).
+
+
+LCO: Local Checksum Offload
+===========================
+
+LCO is a technique for efficiently computing the outer checksum of an
+ encapsulated datagram when the inner checksum is due to be offloaded.
+The ones-complement sum of a correctly checksummed TCP or UDP packet is
+ equal to the sum of the pseudo header, because everything else gets
+ 'cancelled out' by the checksum field.  This is because the sum was
+ complemented before being written to the checksum field.
+More generally, this holds in any case where the 'IP-style' ones complement
+ checksum is used, and thus any checksum that TX Checksum Offload supports.
+That is, if we have set up TX Checksum Offload with a start/offset pair, we
+ know that _after the device has filled in that checksum_, the ones
+ complement sum from csum_start to the end of the packet will be equal to
+ _whatever value we put in the checksum field beforehand_.  This allows us
+ to compute the outer checksum without looking at the payload: we simply
+ stop summing when we get to csum_start, then add the 16-bit word at
+ (csum_start + csum_offset).
+Then, when the true inner checksum is filled in (either by hardware or by
+ skb_checksum_help()), the outer checksum will become correct by virtue of
+ the arithmetic.
+
+LCO is performed by the stack when constructing an outer UDP header for an
+ encapsulation such as VXLAN or GENEVE, in udp_set_csum().  Similarly for
+ the IPv6 equivalents, in udp6_set_csum().
+It is also performed when constructing an IPv4 GRE header, in
+ net/ipv4/ip_gre.c:build_header().  It is *not* currently performed when
+ constructing an IPv6 GRE header; the GRE checksum is computed over the
+ whole packet in net/ipv6/ip6_gre.c:ip6gre_xmit2(), but it should be
+ possible to use LCO here as IPv6 GRE still uses an IP-style checksum.
+All of the LCO implementations use a helper function lco_csum(), in
+ include/linux/skbuff.h.
+
+LCO can safely be used for nested encapsulations; in this case, the outer
+ encapsulation layer will sum over both its own header and the 'middle'
+ header.  This does mean that the 'middle' header will get summed multiple
+ times, but there doesn't seem to be a way to avoid that without incurring
+ bigger costs (e.g. in SKB bloat).
+
+
+RCO: Remote Checksum Offload
+============================
+
+RCO is a technique for eliding the inner checksum of an encapsulated
+ datagram, allowing the outer checksum to be offloaded.  It does, however,
+ involve a change to the encapsulation protocols, which the receiver must
+ also support.  For this reason, it is disabled by default.
+RCO is detailed in the following Internet-Drafts:
+https://tools.ietf.org/html/draft-herbert-remotecsumoffload-00
+https://tools.ietf.org/html/draft-herbert-vxlan-rco-00
+In Linux, RCO is implemented individually in each encapsulation protocol,
+ and most tunnel types have flags controlling its use.  For instance, VXLAN
+ has the flag VXLAN_F_REMCSUM_TX (per struct vxlan_rdst) to indicate that
+ RCO should be used when transmitting to a given remote destination.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cf906d1ce8a7..39206751463e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3705,6 +3705,8 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
 /* Local Checksum Offload.
  * Compute outer checksum based on the assumption that the
  * inner checksum will be offloaded later.
+ * See Documentation/networking/checksum-offloads.txt for
+ * explanation of how this works.
  * Fill in outer checksum adjustment (e.g. with sum of outer
  * pseudo-header) before calling.
  * Also ensure that inner checksum is in linear data area.
-- 
cgit v1.2.3


From 95a001f22b1c5717eafd500a43832249ddd93662 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 1 Feb 2016 23:02:48 +0100
Subject: mtd: map: fix .set_vpp() documentation

As of commit 876fe76d793d03077eb61ba3afab4a383f46c554
"mtd: maps: physmap: Add reference counter to set_vpp()"
the comment in the header file is incorrect and misleading.
Fix it up.

Cc: Russell King <linux@arm.linux.org.uk>
Cc: Paul Parsons <lost.distance@yahoo.com>
Fixes: 876fe76d793d ("mtd: maps: physmap: Add reference counter to set_vpp()")
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/map.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 58f3ba709ade..5e0eb7ccabd4 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -240,8 +240,11 @@ struct map_info {
 	   If there is no cache to care about this can be set to NULL. */
 	void (*inval_cache)(struct map_info *, unsigned long, ssize_t);
 
-	/* set_vpp() must handle being reentered -- enable, enable, disable
-	   must leave it enabled. */
+	/* This will be called with 1 as parameter when the first map user
+	 * needs VPP, and called with 0 when the last user exits. The map
+	 * core maintains a reference counter, and assumes that VPP is a
+	 * global resource applying to all mapped flash chips on the system.
+	 */
 	void (*set_vpp)(struct map_info *, int);
 
 	unsigned long pfow_base;
-- 
cgit v1.2.3


From b37d83a6a41499d582b8faedff1913ec75d9e70b Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Fri, 12 Feb 2016 16:40:13 +0200
Subject: usb: Parse the new USB 3.1 SuperSpeedPlus Isoc endpoint companion
 descriptor

USB 3.1 devices can return a new SuperSpeedPlus isoc endpoint companion
descriptor for a isochronous endpoint that requires more than 48K bytes
per Service Interval.

The new descriptor immediately follows the old USB 3.0 SuperSpeed Endpoint
Companion and will provide a new BytesPerInterval value.

It is parsed and stored in struct usb_host_endpoint with the other endpoint
related descriptors, and should be used by USB3.1 capable hosts to reserve
bus time in the schedule.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/config.c | 31 +++++++++++++++++++++++++++++++
 include/linux/usb.h       |  2 ++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index bbcf4009f99e..58d089802ab3 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -43,6 +43,27 @@ static int find_next_descriptor(unsigned char *buffer, int size,
 	return buffer - buffer0;
 }
 
+static void usb_parse_ssp_isoc_endpoint_companion(struct device *ddev,
+		int cfgno, int inum, int asnum, struct usb_host_endpoint *ep,
+		unsigned char *buffer, int size)
+{
+	struct usb_ssp_isoc_ep_comp_descriptor *desc;
+
+	/*
+	 * The SuperSpeedPlus Isoc endpoint companion descriptor immediately
+	 * follows the SuperSpeed Endpoint Companion descriptor
+	 */
+	desc = (struct usb_ssp_isoc_ep_comp_descriptor *) buffer;
+	if (desc->bDescriptorType != USB_DT_SSP_ISOC_ENDPOINT_COMP ||
+	    size < USB_DT_SSP_ISOC_EP_COMP_SIZE) {
+		dev_warn(ddev, "Invalid SuperSpeedPlus isoc endpoint companion"
+			 "for config %d interface %d altsetting %d ep %d.\n",
+			 cfgno, inum, asnum, ep->desc.bEndpointAddress);
+		return;
+	}
+	memcpy(&ep->ssp_isoc_ep_comp, desc, USB_DT_SSP_ISOC_EP_COMP_SIZE);
+}
+
 static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 		int inum, int asnum, struct usb_host_endpoint *ep,
 		unsigned char *buffer, int size)
@@ -54,6 +75,9 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 	 * be the first thing immediately following the endpoint descriptor.
 	 */
 	desc = (struct usb_ss_ep_comp_descriptor *) buffer;
+	buffer += desc->bLength;
+	size -= desc->bLength;
+
 	if (desc->bDescriptorType != USB_DT_SS_ENDPOINT_COMP ||
 			size < USB_DT_SS_EP_COMP_SIZE) {
 		dev_warn(ddev, "No SuperSpeed endpoint companion for config %d "
@@ -112,6 +136,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 				cfgno, inum, asnum, ep->desc.bEndpointAddress);
 		ep->ss_ep_comp.bmAttributes = 16;
 	} else if (usb_endpoint_xfer_isoc(&ep->desc) &&
+		   !USB_SS_SSP_ISOC_COMP(desc->bmAttributes) &&
 		   USB_SS_MULT(desc->bmAttributes) > 3) {
 		dev_warn(ddev, "Isoc endpoint has Mult of %d in "
 				"config %d interface %d altsetting %d ep %d: "
@@ -121,6 +146,12 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 		ep->ss_ep_comp.bmAttributes = 2;
 	}
 
+	/* Parse a possible SuperSpeedPlus isoc ep companion descriptor */
+	if (usb_endpoint_xfer_isoc(&ep->desc) &&
+	    USB_SS_SSP_ISOC_COMP(desc->bmAttributes))
+		usb_parse_ssp_isoc_endpoint_companion(ddev, cfgno, inum, asnum,
+							ep, buffer, size);
+
 	if (usb_endpoint_xfer_isoc(&ep->desc))
 		max_tx = (desc->bMaxBurst + 1) *
 			(USB_SS_MULT(desc->bmAttributes)) *
diff --git a/include/linux/usb.h b/include/linux/usb.h
index dc0ea0de8a81..8fc881af8735 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -50,6 +50,7 @@ struct ep_device;
  * struct usb_host_endpoint - host-side endpoint descriptor and queue
  * @desc: descriptor for this endpoint, wMaxPacketSize in native byteorder
  * @ss_ep_comp: SuperSpeed companion descriptor for this endpoint
+ * @ssp_isoc_ep_comp: SuperSpeedPlus isoc companion descriptor for this endpoint
  * @urb_list: urbs queued to this endpoint; maintained by usbcore
  * @hcpriv: for use by HCD; typically holds hardware dma queue head (QH)
  *	with one or more transfer descriptors (TDs) per urb
@@ -65,6 +66,7 @@ struct ep_device;
 struct usb_host_endpoint {
 	struct usb_endpoint_descriptor		desc;
 	struct usb_ss_ep_comp_descriptor	ss_ep_comp;
+	struct usb_ssp_isoc_ep_comp_descriptor	ssp_isoc_ep_comp;
 	struct list_head		urb_list;
 	void				*hcpriv;
 	struct ep_device		*ep_dev;	/* For sysfs info */
-- 
cgit v1.2.3


From faee822c5a7ab99de25cd34fcde3f8d37b6b9923 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Fri, 12 Feb 2016 16:40:14 +0200
Subject: usb: Add USB 3.1 Precision time measurement capability descriptor
 support

USB 3.1 devices that support precision time measurement have an
additional PTM cabaility descriptor as part of the full BOS descriptor

Look for this descriptor while parsing the BOS descriptor, and store it in
struct usb_hub_bos if it exists.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/config.c    |  3 +++
 include/linux/usb.h          |  1 +
 include/uapi/linux/usb/ch9.h | 10 ++++++++++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 58d089802ab3..5eb1a87228b4 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -894,6 +894,9 @@ int usb_get_bos_descriptor(struct usb_device *dev)
 			dev->bos->ss_id =
 				(struct usb_ss_container_id_descriptor *)buffer;
 			break;
+		case USB_PTM_CAP_TYPE:
+			dev->bos->ptm_cap =
+				(struct usb_ptm_cap_descriptor *)buffer;
 		default:
 			break;
 		}
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 8fc881af8735..6a9a0c28415d 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -332,6 +332,7 @@ struct usb_host_bos {
 	struct usb_ss_cap_descriptor	*ss_cap;
 	struct usb_ssp_cap_descriptor	*ssp_cap;
 	struct usb_ss_container_id_descriptor	*ss_id;
+	struct usb_ptm_cap_descriptor	*ptm_cap;
 };
 
 int __usb_get_extra_descriptor(char *buffer, unsigned size,
diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h
index a65f1f328de1..252ac16635dc 100644
--- a/include/uapi/linux/usb/ch9.h
+++ b/include/uapi/linux/usb/ch9.h
@@ -912,6 +912,16 @@ struct usb_ssp_cap_descriptor {
 #define USB_SSP_SUBLINK_SPEED_LSM	(0xff << 16)	/* Lanespeed mantissa */
 } __attribute__((packed));
 
+/*
+ * Precision time measurement capability descriptor: advertised by devices and
+ * hubs that support PTM
+ */
+#define	USB_PTM_CAP_TYPE	0xb
+struct usb_ptm_cap_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDevCapabilityType;
+} __attribute__((packed));
 
 /*-------------------------------------------------------------------------*/
 
-- 
cgit v1.2.3


From 64024ac7bb82c41265e7ba08235591d9a2709dfc Mon Sep 17 00:00:00 2001
From: Jonathan Tinkham <sctincman@gmail.com>
Date: Tue, 2 Feb 2016 23:03:45 -0700
Subject: power: bq24735-charger: add 'ti,external-control' option

Implement an 'ti,external-control' option for when the charger
shouldn't be configured by the host.

Signed-off-by: Jonathan Tinkham <sctincman@gmail.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/power/bq24735-charger.c       | 11 +++++++++++
 include/linux/power/bq24735-charger.h |  2 ++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/power/bq24735-charger.c b/drivers/power/bq24735-charger.c
index 022a09ceab73..fa454c19ce17 100644
--- a/drivers/power/bq24735-charger.c
+++ b/drivers/power/bq24735-charger.c
@@ -106,6 +106,9 @@ static int bq24735_update_word(struct i2c_client *client, u8 reg,
 
 static inline int bq24735_enable_charging(struct bq24735 *charger)
 {
+	if (charger->pdata->ext_control)
+		return 0;
+
 	return bq24735_update_word(charger->client, BQ24735_CHG_OPT,
 				   BQ24735_CHG_OPT_CHARGE_DISABLE,
 				   ~BQ24735_CHG_OPT_CHARGE_DISABLE);
@@ -113,6 +116,9 @@ static inline int bq24735_enable_charging(struct bq24735 *charger)
 
 static inline int bq24735_disable_charging(struct bq24735 *charger)
 {
+	if (charger->pdata->ext_control)
+		return 0;
+
 	return bq24735_update_word(charger->client, BQ24735_CHG_OPT,
 				   BQ24735_CHG_OPT_CHARGE_DISABLE,
 				   BQ24735_CHG_OPT_CHARGE_DISABLE);
@@ -124,6 +130,9 @@ static int bq24735_config_charger(struct bq24735 *charger)
 	int ret;
 	u16 value;
 
+	if (pdata->ext_control)
+		return 0;
+
 	if (pdata->charge_current) {
 		value = pdata->charge_current & BQ24735_CHARGE_CURRENT_MASK;
 
@@ -322,6 +331,8 @@ static struct bq24735_platform *bq24735_parse_dt_data(struct i2c_client *client)
 	if (!ret)
 		pdata->input_current = val;
 
+	pdata->ext_control = of_property_read_bool(np, "ti,external-control");
+
 	return pdata;
 }
 
diff --git a/include/linux/power/bq24735-charger.h b/include/linux/power/bq24735-charger.h
index f536164a6069..6b750c1a45fa 100644
--- a/include/linux/power/bq24735-charger.h
+++ b/include/linux/power/bq24735-charger.h
@@ -32,6 +32,8 @@ struct bq24735_platform {
 	int status_gpio_active_low;
 	bool status_gpio_valid;
 
+	bool ext_control;
+
 	char **supplied_to;
 	size_t num_supplicants;
 };
-- 
cgit v1.2.3


From 6659b55dae09b2042d8cdd0f4576502c3e81957c Mon Sep 17 00:00:00 2001
From: Benson Leung <bleung@chromium.org>
Date: Fri, 12 Feb 2016 13:56:59 +0100
Subject: power_supply: Add types for USB Type C and PD chargers

This adds power supply types for USB chargers defined in
the USB Type-C Specification 1.1 and in the
USB Power Delivery Specification Revision 2.0 V1.1.

The following are added :
POWER_SUPPLY_TYPE_USB_TYPE_C,	/* Type C Port */
POWER_SUPPLY_TYPE_USB_PD,	/* Power Delivery Port */
POWER_SUPPLY_TYPE_USB_PD_DRP,	/* PD Dual Role Port */

Signed-off-by: Benson Leung <bleung@chromium.org>
[tomeu: remove the mention to Type C from the comments]
Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Reviewed-by: Alec Berg <alecaberg@chromium.org>
Reviewed-by: Vincent Palatin <vpalatin@chromium.org>
Reviewed-by: Todd Broch <tbroch@chromium.org>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/power/power_supply_sysfs.c | 3 ++-
 include/linux/power_supply.h       | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index ed2d7fd0c734..80fed98832f9 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -45,7 +45,8 @@ static ssize_t power_supply_show_property(struct device *dev,
 					  char *buf) {
 	static char *type_text[] = {
 		"Unknown", "Battery", "UPS", "Mains", "USB",
-		"USB_DCP", "USB_CDP", "USB_ACA"
+		"USB_DCP", "USB_CDP", "USB_ACA", "USB_C",
+		"USB_PD", "USB_PD_DRP"
 	};
 	static char *status_text[] = {
 		"Unknown", "Charging", "Discharging", "Not charging", "Full"
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index ef9f1592185d..751061790626 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -163,6 +163,9 @@ enum power_supply_type {
 	POWER_SUPPLY_TYPE_USB_DCP,	/* Dedicated Charging Port */
 	POWER_SUPPLY_TYPE_USB_CDP,	/* Charging Downstream Port */
 	POWER_SUPPLY_TYPE_USB_ACA,	/* Accessory Charger Adapters */
+	POWER_SUPPLY_TYPE_USB_TYPE_C,	/* Type C Port */
+	POWER_SUPPLY_TYPE_USB_PD,	/* Power Delivery Port */
+	POWER_SUPPLY_TYPE_USB_PD_DRP,	/* PD Dual Role Port */
 };
 
 enum power_supply_notifier_events {
-- 
cgit v1.2.3


From 800c3a0e683601c4ede490e8525852e63b0f6615 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Wed, 10 Feb 2016 14:29:50 +0530
Subject: regmap: irq: add devm apis for regmap_{add,del}_irq_chip

Add device managed APIs for regmap_add_irq_chip() and
regmap_del_irq_chip() so that it can be managed by
device framework for freeing it.

This helps on following:
1. Maintaining the sequence of resource allocation and deallocation
	regmap_add_irq_chip(&d);
	devm_requested_threaded_irq(virq)

	On free path:
		regmap_del_irq_chip(d);
		and then removing the irq registration.

	On this case, regmap irq is deleted before the irq is free.
	This force to use normal irq registration.

	By using devm apis, the sequence can be maintain properly:
		devm_regmap_add_irq_chip(&d);
		devm_requested_threaded_irq(virq);

	and resource deallocation will be done in reverse order
	by device framework.

2. No need to delete the regmap_irq_chip in error path or remove
   callback and hence there is less code on this path.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-irq.c | 82 ++++++++++++++++++++++++++++++++++++++++
 include/linux/regmap.h           |  8 ++++
 2 files changed, 90 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 7e1e9e86c70b..36d08ca2cbe2 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -695,6 +695,88 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d)
 }
 EXPORT_SYMBOL_GPL(regmap_del_irq_chip);
 
+static void devm_regmap_irq_chip_release(struct device *dev, void *res)
+{
+	struct regmap_irq_chip_data *d = *(struct regmap_irq_chip_data **)res;
+
+	regmap_del_irq_chip(d->irq, d);
+}
+
+static int devm_regmap_irq_chip_match(struct device *dev, void *res, void *data)
+
+{
+	struct regmap_irq_chip_data **r = res;
+
+	if (!r || !*r) {
+		WARN_ON(!r || !*r);
+		return 0;
+	}
+	return *r == data;
+}
+
+/**
+ * devm_regmap_add_irq_chip(): Resource manager regmap_add_irq_chip()
+ *
+ * @dev:       The device pointer on which irq_chip belongs to.
+ * @map:       The regmap for the device.
+ * @irq:       The IRQ the device uses to signal interrupts
+ * @irq_flags: The IRQF_ flags to use for the primary interrupt.
+ * @chip:      Configuration for the interrupt controller.
+ * @data:      Runtime data structure for the controller, allocated on success
+ *
+ * Returns 0 on success or an errno on failure.
+ *
+ * The regmap_irq_chip data automatically be released when the device is
+ * unbound.
+ */
+int devm_regmap_add_irq_chip(struct device *dev, struct regmap *map, int irq,
+			     int irq_flags, int irq_base,
+			     const struct regmap_irq_chip *chip,
+			     struct regmap_irq_chip_data **data)
+{
+	struct regmap_irq_chip_data **ptr, *d;
+	int ret;
+
+	ptr = devres_alloc(devm_regmap_irq_chip_release, sizeof(*ptr),
+			   GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = regmap_add_irq_chip(map, irq, irq_flags, irq_base,
+				  chip, &d);
+	if (ret < 0) {
+		devres_free(ptr);
+		return ret;
+	}
+
+	*ptr = d;
+	devres_add(dev, ptr);
+	*data = d;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devm_regmap_add_irq_chip);
+
+/**
+ * devm_regmap_del_irq_chip(): Resource managed regmap_del_irq_chip()
+ *
+ * @dev: Device for which which resource was allocated.
+ * @irq: Primary IRQ for the device
+ * @d:   regmap_irq_chip_data allocated by regmap_add_irq_chip()
+ */
+void devm_regmap_del_irq_chip(struct device *dev, int irq,
+			      struct regmap_irq_chip_data *data)
+{
+	int rc;
+
+	WARN_ON(irq != data->irq);
+	rc = devres_release(dev, devm_regmap_irq_chip_release,
+			    devm_regmap_irq_chip_match, data);
+
+	if (rc != 0)
+		WARN_ON(rc);
+}
+EXPORT_SYMBOL_GPL(devm_regmap_del_irq_chip);
+
 /**
  * regmap_irq_chip_get_base(): Retrieve interrupt base for a regmap IRQ chip
  *
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 18394343f489..de428962bfe6 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -868,6 +868,14 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 			int irq_base, const struct regmap_irq_chip *chip,
 			struct regmap_irq_chip_data **data);
 void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *data);
+
+int devm_regmap_add_irq_chip(struct device *dev, struct regmap *map, int irq,
+			     int irq_flags, int irq_base,
+			     const struct regmap_irq_chip *chip,
+			     struct regmap_irq_chip_data **data);
+void devm_regmap_del_irq_chip(struct device *dev, int irq,
+			      struct regmap_irq_chip_data *data);
+
 int regmap_irq_chip_get_base(struct regmap_irq_chip_data *data);
 int regmap_irq_get_virq(struct regmap_irq_chip_data *data, int irq);
 struct irq_domain *regmap_irq_get_domain(struct regmap_irq_chip_data *data);
-- 
cgit v1.2.3


From fc5cbf0c94b6f7fd62fdddff892207290510945d Mon Sep 17 00:00:00 2001
From: Axel Haslam <ahaslam+renesas@baylibre.com>
Date: Mon, 15 Feb 2016 11:10:51 +0100
Subject: PM / Domains: Support for multiple states

Some hardware (eg. OMAP), has the ability to enter different low power
modes for a given power domain. This allows for more fine grained control
over the power state of the platform. As a typical example, some registers
of the hardware may be implemented with retention flip-flops and be able
to retain their state at lower voltages allowing for faster on/off
latencies and an increased window of opportunity to enter an intermediate
low power state other than "off"

When trying to set a power domain to off, the genpd governor will choose
the deepest state that will respect the qos constraints of all the devices
and sub-domains on the power domain. The state chosen by the governor is
saved in the "state_idx" field of the generic_pm_domain structure and
shall be used by the power_off and power_on callbacks to perform the
necessary actions to set the power domain into (and out of) the state
indicated by state_idx.

States must be declared in ascending order from shallowest to deepest,
deepest meaning the state which takes longer to enter and exit.

For platforms that don't declare any states, a single a single "off"
state is used. Once all platforms are converted to use the state array,
the legacy on/off latencies will be removed.

[ Lina: Modified genpd state initialization and remove use of
        save_state_latency_ns in genpd timing data ]
Suggested-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Lina Iyer <lina.iyer@linaro.org>
Signed-off-by: Axel Haslam <ahaslam+renesas@baylibre.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c          | 39 +++++++++++++++++++---
 drivers/base/power/domain_governor.c | 64 ++++++++++++++++++++++--------------
 include/linux/pm_domain.h            | 11 +++++++
 3 files changed, 85 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 301b785f9f56..4c6f46b5c444 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -104,6 +104,7 @@ static void genpd_sd_counter_inc(struct generic_pm_domain *genpd)
 
 static int genpd_power_on(struct generic_pm_domain *genpd, bool timed)
 {
+	unsigned int state_idx = genpd->state_idx;
 	ktime_t time_start;
 	s64 elapsed_ns;
 	int ret;
@@ -120,10 +121,10 @@ static int genpd_power_on(struct generic_pm_domain *genpd, bool timed)
 		return ret;
 
 	elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
-	if (elapsed_ns <= genpd->power_on_latency_ns)
+	if (elapsed_ns <= genpd->states[state_idx].power_on_latency_ns)
 		return ret;
 
-	genpd->power_on_latency_ns = elapsed_ns;
+	genpd->states[state_idx].power_on_latency_ns = elapsed_ns;
 	genpd->max_off_time_changed = true;
 	pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
 		 genpd->name, "on", elapsed_ns);
@@ -133,6 +134,7 @@ static int genpd_power_on(struct generic_pm_domain *genpd, bool timed)
 
 static int genpd_power_off(struct generic_pm_domain *genpd, bool timed)
 {
+	unsigned int state_idx = genpd->state_idx;
 	ktime_t time_start;
 	s64 elapsed_ns;
 	int ret;
@@ -149,10 +151,10 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool timed)
 		return ret;
 
 	elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
-	if (elapsed_ns <= genpd->power_off_latency_ns)
+	if (elapsed_ns <= genpd->states[state_idx].power_off_latency_ns)
 		return ret;
 
-	genpd->power_off_latency_ns = elapsed_ns;
+	genpd->states[state_idx].power_off_latency_ns = elapsed_ns;
 	genpd->max_off_time_changed = true;
 	pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
 		 genpd->name, "off", elapsed_ns);
@@ -585,6 +587,8 @@ static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd,
 	    || atomic_read(&genpd->sd_count) > 0)
 		return;
 
+	/* Choose the deepest state when suspending */
+	genpd->state_idx = genpd->state_count - 1;
 	genpd_power_off(genpd, timed);
 
 	genpd->status = GPD_STATE_POWER_OFF;
@@ -1508,6 +1512,26 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 		genpd->dev_ops.start = pm_clk_resume;
 	}
 
+	if (genpd->state_idx >= GENPD_MAX_NUM_STATES) {
+		pr_warn("Initial state index out of bounds.\n");
+		genpd->state_idx = GENPD_MAX_NUM_STATES - 1;
+	}
+
+	if (genpd->state_count > GENPD_MAX_NUM_STATES) {
+		pr_warn("Limiting states to  %d\n", GENPD_MAX_NUM_STATES);
+		genpd->state_count = GENPD_MAX_NUM_STATES;
+	}
+
+	/* Use only one "off" state if there were no states declared */
+	if (genpd->state_count == 0) {
+		genpd->states[0].power_on_latency_ns =
+					genpd->power_on_latency_ns;
+		genpd->states[0].power_off_latency_ns =
+					genpd->power_off_latency_ns;
+
+		genpd->state_count = 1;
+	}
+
 	mutex_lock(&gpd_list_lock);
 	list_add(&genpd->gpd_list_node, &gpd_list);
 	mutex_unlock(&gpd_list_lock);
@@ -1872,7 +1896,12 @@ static int pm_genpd_summary_one(struct seq_file *s,
 
 	if (WARN_ON(genpd->status >= ARRAY_SIZE(status_lookup)))
 		goto exit;
-	seq_printf(s, "%-30s  %-15s ", genpd->name, status_lookup[genpd->status]);
+	seq_printf(s, "%-30s  %s", genpd->name, status_lookup[genpd->status]);
+
+	if (genpd->status == GPD_STATE_POWER_OFF)
+		seq_printf(s, " %-13d ", genpd->state_idx);
+	else
+		seq_printf(s, " %-15s ", "");
 
 	/*
 	 * Modifications on the list require holding locks on both
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index 1e937ac5f456..00a5436dd44b 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -98,7 +98,8 @@ static bool default_stop_ok(struct device *dev)
  *
  * This routine must be executed under the PM domain's lock.
  */
-static bool default_power_down_ok(struct dev_pm_domain *pd)
+static bool __default_power_down_ok(struct dev_pm_domain *pd,
+				     unsigned int state)
 {
 	struct generic_pm_domain *genpd = pd_to_genpd(pd);
 	struct gpd_link *link;
@@ -106,27 +107,9 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
 	s64 min_off_time_ns;
 	s64 off_on_time_ns;
 
-	if (genpd->max_off_time_changed) {
-		struct gpd_link *link;
-
-		/*
-		 * We have to invalidate the cached results for the masters, so
-		 * use the observation that default_power_down_ok() is not
-		 * going to be called for any master until this instance
-		 * returns.
-		 */
-		list_for_each_entry(link, &genpd->slave_links, slave_node)
-			link->master->max_off_time_changed = true;
-
-		genpd->max_off_time_changed = false;
-		genpd->cached_power_down_ok = false;
-		genpd->max_off_time_ns = -1;
-	} else {
-		return genpd->cached_power_down_ok;
-	}
+	off_on_time_ns = genpd->states[state].power_off_latency_ns +
+		genpd->states[state].power_on_latency_ns;
 
-	off_on_time_ns = genpd->power_off_latency_ns +
-				genpd->power_on_latency_ns;
 
 	min_off_time_ns = -1;
 	/*
@@ -186,8 +169,6 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
 			min_off_time_ns = constraint_ns;
 	}
 
-	genpd->cached_power_down_ok = true;
-
 	/*
 	 * If the computed minimum device off time is negative, there are no
 	 * latency constraints, so the domain can spend arbitrary time in the
@@ -201,10 +182,45 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
 	 * time and the time needed to turn the domain on is the maximum
 	 * theoretical time this domain can spend in the "off" state.
 	 */
-	genpd->max_off_time_ns = min_off_time_ns - genpd->power_on_latency_ns;
+	genpd->max_off_time_ns = min_off_time_ns -
+		genpd->states[state].power_on_latency_ns;
 	return true;
 }
 
+static bool default_power_down_ok(struct dev_pm_domain *pd)
+{
+	struct generic_pm_domain *genpd = pd_to_genpd(pd);
+	struct gpd_link *link;
+
+	if (!genpd->max_off_time_changed)
+		return genpd->cached_power_down_ok;
+
+	/*
+	 * We have to invalidate the cached results for the masters, so
+	 * use the observation that default_power_down_ok() is not
+	 * going to be called for any master until this instance
+	 * returns.
+	 */
+	list_for_each_entry(link, &genpd->slave_links, slave_node)
+		link->master->max_off_time_changed = true;
+
+	genpd->max_off_time_ns = -1;
+	genpd->max_off_time_changed = false;
+	genpd->cached_power_down_ok = true;
+	genpd->state_idx = genpd->state_count - 1;
+
+	/* Find a state to power down to, starting from the deepest. */
+	while (!__default_power_down_ok(pd, genpd->state_idx)) {
+		if (genpd->state_idx == 0) {
+			genpd->cached_power_down_ok = false;
+			break;
+		}
+		genpd->state_idx--;
+	}
+
+	return genpd->cached_power_down_ok;
+}
+
 static bool always_on_power_down_ok(struct dev_pm_domain *domain)
 {
 	return false;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index db21d3995f7e..1726c4ac6529 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -19,6 +19,8 @@
 /* Defines used for the flags field in the struct generic_pm_domain */
 #define GENPD_FLAG_PM_CLK	(1U << 0) /* PM domain uses PM clk */
 
+#define GENPD_MAX_NUM_STATES	8 /* Number of possible low power states */
+
 enum gpd_status {
 	GPD_STATE_ACTIVE = 0,	/* PM domain is active */
 	GPD_STATE_POWER_OFF,	/* PM domain is off */
@@ -37,6 +39,11 @@ struct gpd_dev_ops {
 	bool (*active_wakeup)(struct device *dev);
 };
 
+struct genpd_power_state {
+	s64 power_off_latency_ns;
+	s64 power_on_latency_ns;
+};
+
 struct generic_pm_domain {
 	struct dev_pm_domain domain;	/* PM domain operations */
 	struct list_head gpd_list_node;	/* Node in the global PM domains list */
@@ -66,6 +73,10 @@ struct generic_pm_domain {
 	void (*detach_dev)(struct generic_pm_domain *domain,
 			   struct device *dev);
 	unsigned int flags;		/* Bit field of configs for genpd */
+	struct genpd_power_state states[GENPD_MAX_NUM_STATES];
+	unsigned int state_count; /* number of states */
+	unsigned int state_idx; /* state that genpd will go to when off */
+
 };
 
 static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd)
-- 
cgit v1.2.3


From 90e63452ac3a42c9ff10b12880429e8592cf39ec Mon Sep 17 00:00:00 2001
From: Axel Haslam <ahaslam+renesas@baylibre.com>
Date: Mon, 15 Feb 2016 11:10:53 +0100
Subject: PM / Domains: remove old power on/off latencies

Now that all known users have been converted to use state latencies,
we can remove the latency field in the generic_pm_domain structure.

Signed-off-by: Axel Haslam <ahaslam+renesas@baylibre.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 8 +-------
 include/linux/pm_domain.h   | 2 --
 2 files changed, 1 insertion(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 4c6f46b5c444..e8ca290dbf9d 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1523,14 +1523,8 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	}
 
 	/* Use only one "off" state if there were no states declared */
-	if (genpd->state_count == 0) {
-		genpd->states[0].power_on_latency_ns =
-					genpd->power_on_latency_ns;
-		genpd->states[0].power_off_latency_ns =
-					genpd->power_off_latency_ns;
-
+	if (genpd->state_count == 0)
 		genpd->state_count = 1;
-	}
 
 	mutex_lock(&gpd_list_lock);
 	list_add(&genpd->gpd_list_node, &gpd_list);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 1726c4ac6529..49cd8890b873 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -61,9 +61,7 @@ struct generic_pm_domain {
 	unsigned int prepared_count;	/* Suspend counter of prepared devices */
 	bool suspend_power_off;	/* Power status before system suspend */
 	int (*power_off)(struct generic_pm_domain *domain);
-	s64 power_off_latency_ns;
 	int (*power_on)(struct generic_pm_domain *domain);
-	s64 power_on_latency_ns;
 	struct gpd_dev_ops dev_ops;
 	s64 max_off_time_ns;	/* Maximum allowed "suspended" time. */
 	bool max_off_time_changed;
-- 
cgit v1.2.3


From 20ec3e39fc12cf3a331ee73eb01d52bddcdd2fa4 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 11 Feb 2016 11:03:06 +0100
Subject: gpio: move the pin ranges into gpio_device

Instead of keeping this reference to the pin ranges in the
client driver-supplied gpio_chip, move it to the internal
gpio_device as the drivers have no need to inspect this.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c | 18 +++++++++---------
 drivers/gpio/gpiolib.c      | 10 +++++-----
 drivers/gpio/gpiolib.h      | 10 ++++++++++
 include/linux/gpio/driver.h |  9 ---------
 4 files changed, 24 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 540cbc88c7a2..682070d20f00 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -71,29 +71,29 @@ static int acpi_gpiochip_find(struct gpio_chip *gc, void *data)
  * controller uses pin controller and the mapping is not contiguous the
  * offset might be different.
  */
-static int acpi_gpiochip_pin_to_gpio_offset(struct gpio_chip *chip, int pin)
+static int acpi_gpiochip_pin_to_gpio_offset(struct gpio_device *gdev, int pin)
 {
 	struct gpio_pin_range *pin_range;
 
 	/* If there are no ranges in this chip, use 1:1 mapping */
-	if (list_empty(&chip->pin_ranges))
+	if (list_empty(&gdev->pin_ranges))
 		return pin;
 
-	list_for_each_entry(pin_range, &chip->pin_ranges, node) {
+	list_for_each_entry(pin_range, &gdev->pin_ranges, node) {
 		const struct pinctrl_gpio_range *range = &pin_range->range;
 		int i;
 
 		if (range->pins) {
 			for (i = 0; i < range->npins; i++) {
 				if (range->pins[i] == pin)
-					return range->base + i - chip->base;
+					return range->base + i - gdev->base;
 			}
 		} else {
 			if (pin >= range->pin_base &&
 			    pin < range->pin_base + range->npins) {
 				unsigned gpio_base;
 
-				gpio_base = range->base - chip->base;
+				gpio_base = range->base - gdev->base;
 				return gpio_base + pin - range->pin_base;
 			}
 		}
@@ -102,7 +102,7 @@ static int acpi_gpiochip_pin_to_gpio_offset(struct gpio_chip *chip, int pin)
 	return -EINVAL;
 }
 #else
-static inline int acpi_gpiochip_pin_to_gpio_offset(struct gpio_chip *chip,
+static inline int acpi_gpiochip_pin_to_gpio_offset(struct gpio_device *gdev,
 						   int pin)
 {
 	return pin;
@@ -134,7 +134,7 @@ static struct gpio_desc *acpi_get_gpiod(char *path, int pin)
 	if (!chip)
 		return ERR_PTR(-EPROBE_DEFER);
 
-	offset = acpi_gpiochip_pin_to_gpio_offset(chip, pin);
+	offset = acpi_gpiochip_pin_to_gpio_offset(chip->gpiodev, pin);
 	if (offset < 0)
 		return ERR_PTR(offset);
 
@@ -202,7 +202,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 	if (!handler)
 		return AE_BAD_PARAMETER;
 
-	pin = acpi_gpiochip_pin_to_gpio_offset(chip, pin);
+	pin = acpi_gpiochip_pin_to_gpio_offset(chip->gpiodev, pin);
 	if (pin < 0)
 		return AE_BAD_PARAMETER;
 
@@ -673,7 +673,7 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
 		struct gpio_desc *desc;
 		bool found;
 
-		pin = acpi_gpiochip_pin_to_gpio_offset(chip, pin);
+		pin = acpi_gpiochip_pin_to_gpio_offset(chip->gpiodev, pin);
 		if (pin < 0) {
 			status = AE_BAD_PARAMETER;
 			goto out;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 646dea4f96ff..28984bbc079c 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -532,8 +532,7 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 	spin_unlock_irqrestore(&gpio_lock, flags);
 
 #ifdef CONFIG_PINCTRL
-	/* FIXME: move pin ranges to gpio_device */
-	INIT_LIST_HEAD(&chip->pin_ranges);
+	INIT_LIST_HEAD(&gdev->pin_ranges);
 #endif
 
 	status = gpiochip_set_desc_names(chip);
@@ -1036,7 +1035,7 @@ int gpiochip_add_pingroup_range(struct gpio_chip *chip,
 		 gpio_offset, gpio_offset + pin_range->range.npins - 1,
 		 pinctrl_dev_get_devname(pctldev), pin_group);
 
-	list_add_tail(&pin_range->node, &chip->pin_ranges);
+	list_add_tail(&pin_range->node, &gdev->pin_ranges);
 
 	return 0;
 }
@@ -1085,7 +1084,7 @@ int gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
 		 pinctl_name,
 		 pin_offset, pin_offset + npins - 1);
 
-	list_add_tail(&pin_range->node, &chip->pin_ranges);
+	list_add_tail(&pin_range->node, &gdev->pin_ranges);
 
 	return 0;
 }
@@ -1098,8 +1097,9 @@ EXPORT_SYMBOL_GPL(gpiochip_add_pin_range);
 void gpiochip_remove_pin_ranges(struct gpio_chip *chip)
 {
 	struct gpio_pin_range *pin_range, *tmp;
+	struct gpio_device *gdev = chip->gpiodev;
 
-	list_for_each_entry_safe(pin_range, tmp, &chip->pin_ranges, node) {
+	list_for_each_entry_safe(pin_range, tmp, &gdev->pin_ranges, node) {
 		list_del(&pin_range->node);
 		pinctrl_remove_gpio_range(pin_range->pctldev,
 				&pin_range->range);
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index d154984c71d9..5a36908fd39d 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -55,6 +55,16 @@ struct gpio_device {
 	int			base;
 	u16			ngpio;
 	struct list_head        list;
+
+#ifdef CONFIG_PINCTRL
+	/*
+	 * If CONFIG_PINCTRL is enabled, then gpio controllers can optionally
+	 * describe the actual pin range which they serve in an SoC. This
+	 * information would be used by pinctrl subsystem to configure
+	 * corresponding pins for gpio usage.
+	 */
+	struct list_head pin_ranges;
+#endif
 };
 
 /**
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 41c6144c473b..e2a934ce3e64 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -181,15 +181,6 @@ struct gpio_chip {
 	int (*of_xlate)(struct gpio_chip *gc,
 			const struct of_phandle_args *gpiospec, u32 *flags);
 #endif
-#ifdef CONFIG_PINCTRL
-	/*
-	 * If CONFIG_PINCTRL is enabled, then gpio controllers can optionally
-	 * describe the actual pin range which they serve in an SoC. This
-	 * information would be used by pinctrl subsystem to configure
-	 * corresponding pins for gpio usage.
-	 */
-	struct list_head pin_ranges;
-#endif
 };
 
 extern const char *gpiochip_is_requested(struct gpio_chip *chip,
-- 
cgit v1.2.3


From 43c54ecade400cf6ca8203f960b525fbe5b73a13 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 11 Feb 2016 11:37:48 +0100
Subject: gpio: move the subdriver data pointer into gpio_device

We move to manage this pointer under gpiolib control rather than
leave it in the subdevice's gpio_chip. We can not NULL it after
gpiochip_remove so at to keep things tight.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 17 +++++++++++++++--
 drivers/gpio/gpiolib.h      |  2 ++
 include/linux/gpio/driver.h |  7 +------
 3 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 28984bbc079c..aa4a60e19339 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -480,8 +480,7 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 		goto err_free_gdev;
 	}
 	gdev->ngpio = chip->ngpio;
-	/* FIXME: move driver data into gpio_device dev_set_drvdata() */
-	chip->data = data;
+	gdev->data = data;
 
 	spin_lock_irqsave(&gpio_lock, flags);
 
@@ -602,6 +601,15 @@ err_free_gdev:
 }
 EXPORT_SYMBOL_GPL(gpiochip_add_data);
 
+/**
+ * gpiochip_get_data() - get per-subdriver data for the chip
+ */
+void *gpiochip_get_data(struct gpio_chip *chip)
+{
+	return chip->gpiodev->data;
+}
+EXPORT_SYMBOL_GPL(gpiochip_get_data);
+
 /**
  * gpiochip_remove() - unregister a gpio_chip
  * @chip: the chip to unregister
@@ -626,6 +634,11 @@ void gpiochip_remove(struct gpio_chip *chip)
 	gpiochip_remove_pin_ranges(chip);
 	gpiochip_free_hogs(chip);
 	of_gpiochip_remove(chip);
+	/*
+	 * We accept no more calls into the driver from this point, so
+	 * NULL the driver data pointer
+	 */
+	gdev->data = NULL;
 
 	spin_lock_irqsave(&gpio_lock, flags);
 	for (i = 0; i < gdev->ngpio; i++) {
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 5a36908fd39d..ddbe409ad48f 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -37,6 +37,7 @@ struct acpi_device;
  * of the @descs array.
  * @base: GPIO base in the DEPRECATED global Linux GPIO numberspace, assigned
  * at device creation time.
+ * @data: per-instance data assigned by the driver
  * @list: links gpio_device:s together for traversal
  *
  * This state container holds most of the runtime variable data
@@ -54,6 +55,7 @@ struct gpio_device {
 	struct gpio_desc	*descs;
 	int			base;
 	u16			ngpio;
+	void			*data;
 	struct list_head        list;
 
 #ifdef CONFIG_PINCTRL
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index e2a934ce3e64..b92ab9efdb69 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -25,7 +25,6 @@ struct gpio_device;
  * @gpiodev: the internal state holder, opaque struct
  * @parent: optional parent device providing the GPIOs
  * @owner: helps prevent removal of modules exporting active GPIOs
- * @data: per-instance data assigned by the driver
  * @request: optional hook for chip-specific activation, such as
  *	enabling module power and clock; may sleep
  * @free: optional hook for chip-specific deactivation, such as
@@ -109,7 +108,6 @@ struct gpio_chip {
 	struct gpio_device	*gpiodev;
 	struct device		*parent;
 	struct module		*owner;
-	void			*data;
 
 	int			(*request)(struct gpio_chip *chip,
 						unsigned offset);
@@ -202,10 +200,7 @@ void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
 bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset);
 
 /* get driver data */
-static inline void *gpiochip_get_data(struct gpio_chip *chip)
-{
-	return chip->data;
-}
+void *gpiochip_get_data(struct gpio_chip *chip);
 
 struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
 
-- 
cgit v1.2.3


From 1e9877902dc7e11d2be038371c6fbf2dfcd469d7 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 12 Feb 2016 13:01:54 -0800
Subject: mm/gup: Introduce get_user_pages_remote()

For protection keys, we need to understand whether protections
should be enforced in software or not.  In general, we enforce
protections when working on our own task, but not when on others.
We call these "current" and "remote" operations.

This patch introduces a new get_user_pages() variant:

        get_user_pages_remote()

Which is a replacement for when get_user_pages() is called on
non-current tsk/mm.

We also introduce a new gup flag: FOLL_REMOTE which can be used
for the "__" gup variants to get this new behavior.

The uprobes is_trap_at_addr() location holds mmap_sem and
calls get_user_pages(current->mm) on an instruction address.  This
makes it a pretty unique gup caller.  Being an instruction access
and also really originating from the kernel (vs. the app), I opted
to consider this a 'remote' access where protection keys will not
be enforced.

Without protection keys, this patch should not change any behavior.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: jack@suse.cz
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160212210154.3F0E51EA@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/gpu/drm/etnaviv/etnaviv_gem.c   |  6 +++---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 10 +++++-----
 drivers/infiniband/core/umem_odp.c      |  8 ++++----
 fs/exec.c                               |  8 ++++++--
 include/linux/mm.h                      |  5 +++++
 kernel/events/uprobes.c                 | 10 ++++++++--
 mm/gup.c                                | 27 ++++++++++++++++++++++-----
 mm/memory.c                             |  2 +-
 mm/process_vm_access.c                  | 11 ++++++++---
 security/tomoyo/domain.c                |  9 ++++++++-
 virt/kvm/async_pf.c                     |  8 +++++++-
 11 files changed, 77 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 4b519e4309b2..97d4457be8d2 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -753,9 +753,9 @@ static struct page **etnaviv_gem_userptr_do_get_pages(
 
 	down_read(&mm->mmap_sem);
 	while (pinned < npages) {
-		ret = get_user_pages(task, mm, ptr, npages - pinned,
-				     !etnaviv_obj->userptr.ro, 0,
-				     pvec + pinned, NULL);
+		ret = get_user_pages_remote(task, mm, ptr, npages - pinned,
+					    !etnaviv_obj->userptr.ro, 0,
+					    pvec + pinned, NULL);
 		if (ret < 0)
 			break;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 59e45b3a6937..90dbf8121210 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -584,11 +584,11 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 
 		down_read(&mm->mmap_sem);
 		while (pinned < npages) {
-			ret = get_user_pages(work->task, mm,
-					     obj->userptr.ptr + pinned * PAGE_SIZE,
-					     npages - pinned,
-					     !obj->userptr.read_only, 0,
-					     pvec + pinned, NULL);
+			ret = get_user_pages_remote(work->task, mm,
+					obj->userptr.ptr + pinned * PAGE_SIZE,
+					npages - pinned,
+					!obj->userptr.read_only, 0,
+					pvec + pinned, NULL);
 			if (ret < 0)
 				break;
 
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index e69bf266049d..75077a018675 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -572,10 +572,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 		 * complex (and doesn't gain us much performance in most use
 		 * cases).
 		 */
-		npages = get_user_pages(owning_process, owning_mm, user_virt,
-					gup_num_pages,
-					access_mask & ODP_WRITE_ALLOWED_BIT, 0,
-					local_page_list, NULL);
+		npages = get_user_pages_remote(owning_process, owning_mm,
+				user_virt, gup_num_pages,
+				access_mask & ODP_WRITE_ALLOWED_BIT,
+				0, local_page_list, NULL);
 		up_read(&owning_mm->mmap_sem);
 
 		if (npages < 0)
diff --git a/fs/exec.c b/fs/exec.c
index dcd4ac7d3f1e..d885b98b6a00 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -198,8 +198,12 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 			return NULL;
 	}
 #endif
-	ret = get_user_pages(current, bprm->mm, pos,
-			1, write, 1, &page, NULL);
+	/*
+	 * We are doing an exec().  'current' is the process
+	 * doing the exec and bprm->mm is the new process's mm.
+	 */
+	ret = get_user_pages_remote(current, bprm->mm, pos, 1, write,
+			1, &page, NULL);
 	if (ret <= 0)
 		return NULL;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b1d4b8c7f7cd..faf3b709eead 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1225,6 +1225,10 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		      unsigned long start, unsigned long nr_pages,
 		      unsigned int foll_flags, struct page **pages,
 		      struct vm_area_struct **vmas, int *nonblocking);
+long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
+			    unsigned long start, unsigned long nr_pages,
+			    int write, int force, struct page **pages,
+			    struct vm_area_struct **vmas);
 long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		    unsigned long start, unsigned long nr_pages,
 		    int write, int force, struct page **pages,
@@ -2170,6 +2174,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_MIGRATION	0x400	/* wait for page to replace migration entry */
 #define FOLL_TRIED	0x800	/* a retry, previous pass started an IO */
 #define FOLL_MLOCK	0x1000	/* lock present pages */
+#define FOLL_REMOTE	0x2000	/* we are working on non-current tsk/mm */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 0167679182c0..8eef5f55d3f0 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -299,7 +299,7 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
 
 retry:
 	/* Read the page with vaddr into memory */
-	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma);
+	ret = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma);
 	if (ret <= 0)
 		return ret;
 
@@ -1700,7 +1700,13 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
 	if (likely(result == 0))
 		goto out;
 
-	result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
+	/*
+	 * The NULL 'tsk' here ensures that any faults that occur here
+	 * will not be accounted to the task.  'mm' *is* current->mm,
+	 * but we treat this as a 'remote' access since it is
+	 * essentially a kernel access to the memory.
+	 */
+	result = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
 	if (result < 0)
 		return result;
 
diff --git a/mm/gup.c b/mm/gup.c
index 7bf19ffa2199..36ca850936c9 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -870,7 +870,7 @@ long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
 EXPORT_SYMBOL(get_user_pages_unlocked);
 
 /*
- * get_user_pages() - pin user pages in memory
+ * get_user_pages_remote() - pin user pages in memory
  * @tsk:	the task_struct to use for page fault accounting, or
  *		NULL if faults are not to be recorded.
  * @mm:		mm_struct of target mm
@@ -924,12 +924,29 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
  * should use get_user_pages because it cannot pass
  * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault.
  */
-long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		unsigned long start, unsigned long nr_pages, int write,
-		int force, struct page **pages, struct vm_area_struct **vmas)
+long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
+		unsigned long start, unsigned long nr_pages,
+		int write, int force, struct page **pages,
+		struct vm_area_struct **vmas)
 {
 	return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
-				       pages, vmas, NULL, false, FOLL_TOUCH);
+				       pages, vmas, NULL, false,
+				       FOLL_TOUCH | FOLL_REMOTE);
+}
+EXPORT_SYMBOL(get_user_pages_remote);
+
+/*
+ * This is the same as get_user_pages_remote() for the time
+ * being.
+ */
+long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		unsigned long start, unsigned long nr_pages,
+		int write, int force, struct page **pages,
+		struct vm_area_struct **vmas)
+{
+	return __get_user_pages_locked(tsk, mm, start, nr_pages,
+				       write, force, pages, vmas, NULL, false,
+				       FOLL_TOUCH);
 }
 EXPORT_SYMBOL(get_user_pages);
 
diff --git a/mm/memory.c b/mm/memory.c
index 38090ca37a08..8bfbad0cca8c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3685,7 +3685,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
 		void *maddr;
 		struct page *page = NULL;
 
-		ret = get_user_pages(tsk, mm, addr, 1,
+		ret = get_user_pages_remote(tsk, mm, addr, 1,
 				write, 1, &page, &vma);
 		if (ret <= 0) {
 #ifndef CONFIG_HAVE_IOREMAP_PROT
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 5d453e58ddbf..07514d41ebcc 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -98,9 +98,14 @@ static int process_vm_rw_single_vec(unsigned long addr,
 		int pages = min(nr_pages, max_pages_per_loop);
 		size_t bytes;
 
-		/* Get the pages we're interested in */
-		pages = get_user_pages_unlocked(task, mm, pa, pages,
-						vm_write, 0, process_pages);
+		/*
+		 * Get the pages we're interested in.  We must
+		 * add FOLL_REMOTE because task/mm might not
+		 * current/current->mm
+		 */
+		pages = __get_user_pages_unlocked(task, mm, pa, pages,
+						  vm_write, 0, process_pages,
+						  FOLL_REMOTE);
 		if (pages <= 0)
 			return -EFAULT;
 
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 38651454ed08..ade7c6cad172 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -874,7 +874,14 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
 	}
 	/* Same with get_arg_page(bprm, pos, 0) in fs/exec.c */
 #ifdef CONFIG_MMU
-	if (get_user_pages(current, bprm->mm, pos, 1, 0, 1, &page, NULL) <= 0)
+	/*
+	 * This is called at execve() time in order to dig around
+	 * in the argv/environment of the new proceess
+	 * (represented by bprm).  'current' is the process doing
+	 * the execve().
+	 */
+	if (get_user_pages_remote(current, bprm->mm, pos, 1,
+				0, 1, &page, NULL) <= 0)
 		return false;
 #else
 	page = bprm->page[pos / PAGE_SIZE];
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 353159922456..d604e87a510a 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -79,7 +79,13 @@ static void async_pf_execute(struct work_struct *work)
 
 	might_sleep();
 
-	get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL);
+	/*
+	 * This work is run asynchromously to the task which owns
+	 * mm and might be done in another context, so we must
+	 * use FOLL_REMOTE.
+	 */
+	__get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL, FOLL_REMOTE);
+
 	kvm_async_page_present_sync(vcpu, apf);
 
 	spin_lock(&vcpu->async_pf.lock);
-- 
cgit v1.2.3


From cde70140fed8429acf7a14e2e2cbd3e329036653 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 12 Feb 2016 13:01:55 -0800
Subject: mm/gup: Overload get_user_pages() functions

The concept here was a suggestion from Ingo.  The implementation
horrors are all mine.

This allows get_user_pages(), get_user_pages_unlocked(), and
get_user_pages_locked() to be called with or without the
leading tsk/mm arguments.  We will give a compile-time warning
about the old style being __deprecated and we will also
WARN_ON() if the non-remote version is used for a remote-style
access.

Doing this, folks will get nice warnings and will not break the
build.  This should be nice for -next and will hopefully let
developers fix up their own code instead of maintainers needing
to do it at merge time.

The way we do this is hideous.  It uses the __VA_ARGS__ macro
functionality to call different functions based on the number
of arguments passed to the macro.

There's an additional hack to ensure that our EXPORT_SYMBOL()
of the deprecated symbols doesn't trigger a warning.

We should be able to remove this mess as soon as -rc1 hits in
the release after this is merged.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Dominik Dingel <dingel@linux.vnet.ibm.com>
Cc: Geliang Tang <geliangtang@163.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Leon Romanovsky <leon@leon.nu>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Mateusz Guzik <mguzik@redhat.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xie XiuQi <xiexiuqi@huawei.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160212210155.73222EE1@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm.h | 74 ++++++++++++++++++++++++++++++++++++++++++++++--------
 mm/gup.c           | 62 ++++++++++++++++++++++++++++++++++-----------
 mm/nommu.c         | 64 ++++++++++++++++++++++++++++++++--------------
 mm/util.c          |  4 +--
 4 files changed, 158 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index faf3b709eead..4c7317828fda 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1229,24 +1229,78 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
 			    unsigned long start, unsigned long nr_pages,
 			    int write, int force, struct page **pages,
 			    struct vm_area_struct **vmas);
-long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		    unsigned long start, unsigned long nr_pages,
-		    int write, int force, struct page **pages,
-		    struct vm_area_struct **vmas);
-long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm,
-		    unsigned long start, unsigned long nr_pages,
-		    int write, int force, struct page **pages,
-		    int *locked);
+long get_user_pages6(unsigned long start, unsigned long nr_pages,
+			    int write, int force, struct page **pages,
+			    struct vm_area_struct **vmas);
+long get_user_pages_locked6(unsigned long start, unsigned long nr_pages,
+		    int write, int force, struct page **pages, int *locked);
 long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
 			       unsigned long start, unsigned long nr_pages,
 			       int write, int force, struct page **pages,
 			       unsigned int gup_flags);
-long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
-		    unsigned long start, unsigned long nr_pages,
+long get_user_pages_unlocked5(unsigned long start, unsigned long nr_pages,
 		    int write, int force, struct page **pages);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
 
+/* suppress warnings from use in EXPORT_SYMBOL() */
+#ifndef __DISABLE_GUP_DEPRECATED
+#define __gup_deprecated __deprecated
+#else
+#define __gup_deprecated
+#endif
+/*
+ * These macros provide backward-compatibility with the old
+ * get_user_pages() variants which took tsk/mm.  These
+ * functions/macros provide both compile-time __deprecated so we
+ * can catch old-style use and not break the build.  The actual
+ * functions also have WARN_ON()s to let us know at runtime if
+ * the get_user_pages() should have been the "remote" variant.
+ *
+ * These are hideous, but temporary.
+ *
+ * If you run into one of these __deprecated warnings, look
+ * at how you are calling get_user_pages().  If you are calling
+ * it with current/current->mm as the first two arguments,
+ * simply remove those arguments.  The behavior will be the same
+ * as it is now.  If you are calling it on another task, use
+ * get_user_pages_remote() instead.
+ *
+ * Any questions?  Ask Dave Hansen <dave@sr71.net>
+ */
+long
+__gup_deprecated
+get_user_pages8(struct task_struct *tsk, struct mm_struct *mm,
+		unsigned long start, unsigned long nr_pages,
+		int write, int force, struct page **pages,
+		struct vm_area_struct **vmas);
+#define GUP_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages, ...)	\
+	get_user_pages
+#define get_user_pages(...) GUP_MACRO(__VA_ARGS__,	\
+		get_user_pages8, x,			\
+		get_user_pages6, x, x, x, x, x)(__VA_ARGS__)
+
+__gup_deprecated
+long get_user_pages_locked8(struct task_struct *tsk, struct mm_struct *mm,
+		unsigned long start, unsigned long nr_pages,
+		int write, int force, struct page **pages,
+		int *locked);
+#define GUPL_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages_locked, ...)	\
+	get_user_pages_locked
+#define get_user_pages_locked(...) GUPL_MACRO(__VA_ARGS__,	\
+		get_user_pages_locked8,	x,			\
+		get_user_pages_locked6, x, x, x, x)(__VA_ARGS__)
+
+__gup_deprecated
+long get_user_pages_unlocked7(struct task_struct *tsk, struct mm_struct *mm,
+		unsigned long start, unsigned long nr_pages,
+		int write, int force, struct page **pages);
+#define GUPU_MACRO(_1, _2, _3, _4, _5, _6, _7, get_user_pages_unlocked, ...)	\
+	get_user_pages_unlocked
+#define get_user_pages_unlocked(...) GUPU_MACRO(__VA_ARGS__,	\
+		get_user_pages_unlocked7, x,			\
+		get_user_pages_unlocked5, x, x, x, x)(__VA_ARGS__)
+
 /* Container for pinned pfns / pages */
 struct frame_vector {
 	unsigned int nr_allocated;	/* Number of frames we have space for */
diff --git a/mm/gup.c b/mm/gup.c
index 36ca850936c9..8a035e042b35 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1,3 +1,4 @@
+#define __DISABLE_GUP_DEPRECATED 1
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/err.h>
@@ -807,15 +808,15 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
  *      if (locked)
  *          up_read(&mm->mmap_sem);
  */
-long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm,
-			   unsigned long start, unsigned long nr_pages,
+long get_user_pages_locked6(unsigned long start, unsigned long nr_pages,
 			   int write, int force, struct page **pages,
 			   int *locked)
 {
-	return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
-				       pages, NULL, locked, true, FOLL_TOUCH);
+	return __get_user_pages_locked(current, current->mm, start, nr_pages,
+				       write, force, pages, NULL, locked, true,
+				       FOLL_TOUCH);
 }
-EXPORT_SYMBOL(get_user_pages_locked);
+EXPORT_SYMBOL(get_user_pages_locked6);
 
 /*
  * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows to
@@ -860,14 +861,13 @@ EXPORT_SYMBOL(__get_user_pages_unlocked);
  * or if "force" shall be set to 1 (get_user_pages_fast misses the
  * "force" parameter).
  */
-long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
-			     unsigned long start, unsigned long nr_pages,
+long get_user_pages_unlocked5(unsigned long start, unsigned long nr_pages,
 			     int write, int force, struct page **pages)
 {
-	return __get_user_pages_unlocked(tsk, mm, start, nr_pages, write,
-					 force, pages, FOLL_TOUCH);
+	return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
+					 write, force, pages, FOLL_TOUCH);
 }
-EXPORT_SYMBOL(get_user_pages_unlocked);
+EXPORT_SYMBOL(get_user_pages_unlocked5);
 
 /*
  * get_user_pages_remote() - pin user pages in memory
@@ -939,16 +939,15 @@ EXPORT_SYMBOL(get_user_pages_remote);
  * This is the same as get_user_pages_remote() for the time
  * being.
  */
-long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		unsigned long start, unsigned long nr_pages,
+long get_user_pages6(unsigned long start, unsigned long nr_pages,
 		int write, int force, struct page **pages,
 		struct vm_area_struct **vmas)
 {
-	return __get_user_pages_locked(tsk, mm, start, nr_pages,
+	return __get_user_pages_locked(current, current->mm, start, nr_pages,
 				       write, force, pages, vmas, NULL, false,
 				       FOLL_TOUCH);
 }
-EXPORT_SYMBOL(get_user_pages);
+EXPORT_SYMBOL(get_user_pages6);
 
 /**
  * populate_vma_page_range() -  populate a range of pages in the vma.
@@ -1484,3 +1483,38 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 }
 
 #endif /* CONFIG_HAVE_GENERIC_RCU_GUP */
+
+long get_user_pages8(struct task_struct *tsk, struct mm_struct *mm,
+		     unsigned long start, unsigned long nr_pages,
+		     int write, int force, struct page **pages,
+		     struct vm_area_struct **vmas)
+{
+	WARN_ONCE(tsk != current, "get_user_pages() called on remote task");
+	WARN_ONCE(mm != current->mm, "get_user_pages() called on remote mm");
+
+	return get_user_pages6(start, nr_pages, write, force, pages, vmas);
+}
+EXPORT_SYMBOL(get_user_pages8);
+
+long get_user_pages_locked8(struct task_struct *tsk, struct mm_struct *mm,
+			    unsigned long start, unsigned long nr_pages,
+			    int write, int force, struct page **pages, int *locked)
+{
+	WARN_ONCE(tsk != current, "get_user_pages_locked() called on remote task");
+	WARN_ONCE(mm != current->mm, "get_user_pages_locked() called on remote mm");
+
+	return get_user_pages_locked6(start, nr_pages, write, force, pages, locked);
+}
+EXPORT_SYMBOL(get_user_pages_locked8);
+
+long get_user_pages_unlocked7(struct task_struct *tsk, struct mm_struct *mm,
+				  unsigned long start, unsigned long nr_pages,
+				  int write, int force, struct page **pages)
+{
+	WARN_ONCE(tsk != current, "get_user_pages_unlocked() called on remote task");
+	WARN_ONCE(mm != current->mm, "get_user_pages_unlocked() called on remote mm");
+
+	return get_user_pages_unlocked5(start, nr_pages, write, force, pages);
+}
+EXPORT_SYMBOL(get_user_pages_unlocked7);
+
diff --git a/mm/nommu.c b/mm/nommu.c
index fbf6f0f1d6c9..b64d04d19702 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -15,6 +15,8 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#define __DISABLE_GUP_DEPRECATED
+
 #include <linux/export.h>
 #include <linux/mm.h>
 #include <linux/vmacache.h>
@@ -182,8 +184,7 @@ finish_or_fault:
  *   slab page or a secondary page from a compound page
  * - don't permit access to VMAs that don't support it, such as I/O mappings
  */
-long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		    unsigned long start, unsigned long nr_pages,
+long get_user_pages6(unsigned long start, unsigned long nr_pages,
 		    int write, int force, struct page **pages,
 		    struct vm_area_struct **vmas)
 {
@@ -194,20 +195,18 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	if (force)
 		flags |= FOLL_FORCE;
 
-	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
-				NULL);
+	return __get_user_pages(current, current->mm, start, nr_pages, flags,
+				pages, vmas, NULL);
 }
-EXPORT_SYMBOL(get_user_pages);
+EXPORT_SYMBOL(get_user_pages6);
 
-long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm,
-			   unsigned long start, unsigned long nr_pages,
-			   int write, int force, struct page **pages,
-			   int *locked)
+long get_user_pages_locked6(unsigned long start, unsigned long nr_pages,
+			    int write, int force, struct page **pages,
+			    int *locked)
 {
-	return get_user_pages(tsk, mm, start, nr_pages, write, force,
-			      pages, NULL);
+	return get_user_pages6(start, nr_pages, write, force, pages, NULL);
 }
-EXPORT_SYMBOL(get_user_pages_locked);
+EXPORT_SYMBOL(get_user_pages_locked6);
 
 long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
 			       unsigned long start, unsigned long nr_pages,
@@ -216,21 +215,20 @@ long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
 {
 	long ret;
 	down_read(&mm->mmap_sem);
-	ret = get_user_pages(tsk, mm, start, nr_pages, write, force,
-			     pages, NULL);
+	ret = __get_user_pages(tsk, mm, start, nr_pages, gup_flags, pages,
+				NULL, NULL);
 	up_read(&mm->mmap_sem);
 	return ret;
 }
 EXPORT_SYMBOL(__get_user_pages_unlocked);
 
-long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
-			     unsigned long start, unsigned long nr_pages,
+long get_user_pages_unlocked5(unsigned long start, unsigned long nr_pages,
 			     int write, int force, struct page **pages)
 {
-	return __get_user_pages_unlocked(tsk, mm, start, nr_pages, write,
-					 force, pages, 0);
+	return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
+					 write, force, pages, 0);
 }
-EXPORT_SYMBOL(get_user_pages_unlocked);
+EXPORT_SYMBOL(get_user_pages_unlocked5);
 
 /**
  * follow_pfn - look up PFN at a user virtual address
@@ -2108,3 +2106,31 @@ static int __meminit init_admin_reserve(void)
 	return 0;
 }
 subsys_initcall(init_admin_reserve);
+
+long get_user_pages8(struct task_struct *tsk, struct mm_struct *mm,
+		     unsigned long start, unsigned long nr_pages,
+		     int write, int force, struct page **pages,
+		     struct vm_area_struct **vmas)
+{
+	return get_user_pages6(start, nr_pages, write, force, pages, vmas);
+}
+EXPORT_SYMBOL(get_user_pages8);
+
+long get_user_pages_locked8(struct task_struct *tsk, struct mm_struct *mm,
+			    unsigned long start, unsigned long nr_pages,
+			    int write, int force, struct page **pages,
+			    int *locked)
+{
+	return get_user_pages_locked6(start, nr_pages, write,
+				      force, pages, locked);
+}
+EXPORT_SYMBOL(get_user_pages_locked8);
+
+long get_user_pages_unlocked7(struct task_struct *tsk, struct mm_struct *mm,
+			      unsigned long start, unsigned long nr_pages,
+			      int write, int force, struct page **pages)
+{
+	return get_user_pages_unlocked5(start, nr_pages, write, force, pages);
+}
+EXPORT_SYMBOL(get_user_pages_unlocked7);
+
diff --git a/mm/util.c b/mm/util.c
index 4fb14ca5a419..1e6011699cab 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -283,9 +283,7 @@ EXPORT_SYMBOL_GPL(__get_user_pages_fast);
 int __weak get_user_pages_fast(unsigned long start,
 				int nr_pages, int write, struct page **pages)
 {
-	struct mm_struct *mm = current->mm;
-	return get_user_pages_unlocked(current, mm, start, nr_pages,
-				       write, 0, pages);
+	return get_user_pages_unlocked(start, nr_pages, write, 0, pages);
 }
 EXPORT_SYMBOL_GPL(get_user_pages_fast);
 
-- 
cgit v1.2.3


From 2e8741599cf128ea27674d9ae93b46e847f820b4 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Thu, 14 Jan 2016 17:58:02 +0000
Subject: firmware: arm_scpi: add support for 64-bit sensor values

SCPI specification version 1.1 extended the sensor from 32-bit to 64-bit
values in order to accommodate new sensor class with 64-bit requirements

Since the SCPI driver sets the higher 32-bit for older protocol version
to zeros, there's no need to explicitly check the SCPI protocol version
and the backward compatibility is maintainted.

Acked-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scpi.c   | 8 +++++---
 drivers/hwmon/scpi-hwmon.c    | 6 +++---
 include/linux/scpi_protocol.h | 2 +-
 3 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c
index c32ac6e61ba2..7e3e595c9f30 100644
--- a/drivers/firmware/arm_scpi.c
+++ b/drivers/firmware/arm_scpi.c
@@ -231,7 +231,8 @@ struct _scpi_sensor_info {
 };
 
 struct sensor_value {
-	__le32 val;
+	__le32 lo_val;
+	__le32 hi_val;
 } __packed;
 
 static struct scpi_drvinfo *scpi_info;
@@ -525,7 +526,7 @@ static int scpi_sensor_get_info(u16 sensor_id, struct scpi_sensor_info *info)
 	return ret;
 }
 
-int scpi_sensor_get_value(u16 sensor, u32 *val)
+int scpi_sensor_get_value(u16 sensor, u64 *val)
 {
 	__le16 id = cpu_to_le16(sensor);
 	struct sensor_value buf;
@@ -534,7 +535,8 @@ int scpi_sensor_get_value(u16 sensor, u32 *val)
 	ret = scpi_send_message(SCPI_CMD_SENSOR_VALUE, &id, sizeof(id),
 				&buf, sizeof(buf));
 	if (!ret)
-		*val = le32_to_cpu(buf.val);
+		*val = (u64)le32_to_cpu(buf.hi_val) << 32 |
+			le32_to_cpu(buf.lo_val);
 
 	return ret;
 }
diff --git a/drivers/hwmon/scpi-hwmon.c b/drivers/hwmon/scpi-hwmon.c
index 7e20567bc369..7101b14b5137 100644
--- a/drivers/hwmon/scpi-hwmon.c
+++ b/drivers/hwmon/scpi-hwmon.c
@@ -52,7 +52,7 @@ static int scpi_read_temp(void *dev, int *temp)
 	struct scpi_sensors *scpi_sensors = zone->scpi_sensors;
 	struct scpi_ops *scpi_ops = scpi_sensors->scpi_ops;
 	struct sensor_data *sensor = &scpi_sensors->data[zone->sensor_id];
-	u32 value;
+	u64 value;
 	int ret;
 
 	ret = scpi_ops->sensor_get_value(sensor->info.sensor_id, &value);
@@ -70,7 +70,7 @@ scpi_show_sensor(struct device *dev, struct device_attribute *attr, char *buf)
 	struct scpi_sensors *scpi_sensors = dev_get_drvdata(dev);
 	struct scpi_ops *scpi_ops = scpi_sensors->scpi_ops;
 	struct sensor_data *sensor;
-	u32 value;
+	u64 value;
 	int ret;
 
 	sensor = container_of(attr, struct sensor_data, dev_attr_input);
@@ -79,7 +79,7 @@ scpi_show_sensor(struct device *dev, struct device_attribute *attr, char *buf)
 	if (ret)
 		return ret;
 
-	return sprintf(buf, "%u\n", value);
+	return sprintf(buf, "%llu\n", value);
 }
 
 static ssize_t
diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h
index 72ce932c69b2..ecd248d46281 100644
--- a/include/linux/scpi_protocol.h
+++ b/include/linux/scpi_protocol.h
@@ -68,7 +68,7 @@ struct scpi_ops {
 	struct scpi_dvfs_info *(*dvfs_get_info)(u8);
 	int (*sensor_get_capability)(u16 *sensors);
 	int (*sensor_get_info)(u16 sensor_id, struct scpi_sensor_info *);
-	int (*sensor_get_value)(u16, u32 *);
+	int (*sensor_get_value)(u16, u64 *);
 };
 
 #if IS_REACHABLE(CONFIG_ARM_SCPI_PROTOCOL)
-- 
cgit v1.2.3


From fb3b07ef399bd6984f3361a709829618b75e98d8 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 25 Jan 2016 10:53:38 +0000
Subject: hwmon: (scpi) add energy meter support

SCPI specification v1.1 adds support for energy sensors. This patch
adds support for the same.

Acked-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/hwmon/scpi-hwmon.c    | 8 ++++++++
 include/linux/scpi_protocol.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hwmon/scpi-hwmon.c b/drivers/hwmon/scpi-hwmon.c
index 7101b14b5137..912b449c8303 100644
--- a/drivers/hwmon/scpi-hwmon.c
+++ b/drivers/hwmon/scpi-hwmon.c
@@ -114,6 +114,7 @@ static int scpi_hwmon_probe(struct platform_device *pdev)
 {
 	u16 nr_sensors, i;
 	int num_temp = 0, num_volt = 0, num_current = 0, num_power = 0;
+	int num_energy = 0;
 	struct scpi_ops *scpi_ops;
 	struct device *hwdev, *dev = &pdev->dev;
 	struct scpi_sensors *scpi_sensors;
@@ -182,6 +183,13 @@ static int scpi_hwmon_probe(struct platform_device *pdev)
 				 "power%d_label", num_power + 1);
 			num_power++;
 			break;
+		case ENERGY:
+			snprintf(sensor->input, sizeof(sensor->input),
+				 "energy%d_input", num_energy + 1);
+			snprintf(sensor->label, sizeof(sensor->input),
+				 "energy%d_label", num_energy + 1);
+			num_energy++;
+			break;
 		default:
 			continue;
 		}
diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h
index ecd248d46281..35de50a65665 100644
--- a/include/linux/scpi_protocol.h
+++ b/include/linux/scpi_protocol.h
@@ -33,6 +33,7 @@ enum scpi_sensor_class {
 	VOLTAGE,
 	CURRENT,
 	POWER,
+	ENERGY,
 };
 
 struct scpi_sensor_info {
-- 
cgit v1.2.3


From 143b65d677a59764e438d457bf2510b3fa5b90f8 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 16 Feb 2016 15:41:42 +0100
Subject: gpio: create an API to detect open drain/source on lines

My left hand merges code to privatize the descriptor handling
while my right hand merges drivers that poke around and
disrespect with the same gpiolib internals.

So let's expose the proper APIs for drivers to ask the gpiolib
core if a line is marked as open drain or open source and
get some order around things so this driver compiles again.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Nicolas Saenz Julienne <nicolassaenzj@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-tps65218.c |  9 ++++-----
 drivers/gpio/gpiolib.c       | 18 ++++++++++++++++++
 include/linux/gpio/driver.h  |  4 ++++
 3 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-tps65218.c b/drivers/gpio/gpio-tps65218.c
index 7b02f7be9bc9..9eb1a5ab2d95 100644
--- a/drivers/gpio/gpio-tps65218.c
+++ b/drivers/gpio/gpio-tps65218.c
@@ -71,17 +71,16 @@ static int tps65218_gpio_request(struct gpio_chip *gc, unsigned offset)
 {
 	struct tps65218_gpio *tps65218_gpio = gpiochip_get_data(gc);
 	struct tps65218 *tps65218 = tps65218_gpio->tps65218;
-	unsigned long flags = gc->desc[offset].flags;
 	int ret;
 
-	if (flags & FLAG_OPEN_SOURCE) {
+	if (gpiochip_line_is_open_source(gc, offset)) {
 		dev_err(gc->parent, "can't work as open source\n");
 		return -EINVAL;
 	}
 
 	switch (offset) {
 	case 0:
-		if (!(flags & FLAG_OPEN_DRAIN)) {
+		if (!gpiochip_line_is_open_drain(gc, offset)) {
 			dev_err(gc->parent, "GPO1 works only as open drain\n");
 			return -EINVAL;
 		}
@@ -103,7 +102,7 @@ static int tps65218_gpio_request(struct gpio_chip *gc, unsigned offset)
 		break;
 	case 1:
 		/* GP02 is push-pull by default, can be set as open drain. */
-		if (flags & FLAG_OPEN_DRAIN) {
+		if (gpiochip_line_is_open_drain(gc, offset)) {
 			ret = tps65218_clear_bits(tps65218,
 						  TPS65218_REG_CONFIG1,
 						  TPS65218_CONFIG1_GPO2_BUF,
@@ -122,7 +121,7 @@ static int tps65218_gpio_request(struct gpio_chip *gc, unsigned offset)
 		break;
 
 	case 2:
-		if (!(flags & FLAG_OPEN_DRAIN)) {
+		if (!gpiochip_line_is_open_drain(gc, offset)) {
 			dev_err(gc->parent, "GPO3 works only as open drain\n");
 			return -EINVAL;
 		}
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index aa4a60e19339..d8511cd68e7b 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1901,6 +1901,24 @@ bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset)
 }
 EXPORT_SYMBOL_GPL(gpiochip_line_is_irq);
 
+bool gpiochip_line_is_open_drain(struct gpio_chip *chip, unsigned int offset)
+{
+	if (offset >= chip->ngpio)
+		return false;
+
+	return test_bit(FLAG_OPEN_DRAIN, &chip->gpiodev->descs[offset].flags);
+}
+EXPORT_SYMBOL_GPL(gpiochip_line_is_open_drain);
+
+bool gpiochip_line_is_open_source(struct gpio_chip *chip, unsigned int offset)
+{
+	if (offset >= chip->ngpio)
+		return false;
+
+	return test_bit(FLAG_OPEN_SOURCE, &chip->gpiodev->descs[offset].flags);
+}
+EXPORT_SYMBOL_GPL(gpiochip_line_is_open_source);
+
 /**
  * gpiod_get_raw_value_cansleep() - return a gpio's raw value
  * @desc: gpio whose value will be returned
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index b92ab9efdb69..ff96d0f9fceb 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -199,6 +199,10 @@ int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
 void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
 bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset);
 
+/* Line status inquiry for drivers */
+bool gpiochip_line_is_open_drain(struct gpio_chip *chip, unsigned int offset);
+bool gpiochip_line_is_open_source(struct gpio_chip *chip, unsigned int offset);
+
 /* get driver data */
 void *gpiochip_get_data(struct gpio_chip *chip);
 
-- 
cgit v1.2.3


From 656b8035b0eebcac0172b24ca04e448c70dd047f Mon Sep 17 00:00:00 2001
From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Date: Mon, 15 Feb 2016 09:25:06 +0100
Subject: ARM: 8524/1: driver cohandle -EPROBE_DEFER from bus_type.match()

Allow implementations of the match() callback in struct bus_type to
return errors and if it's -EPROBE_DEFER then queue the device for
deferred probing.

This is useful to buses such as AMBA in which devices are registered
before their matching information can be retrieved from the HW
(typically because a clock driver hasn't probed yet).

[changed if-else code structure, adjusted documentation to match the code,
extended comments]

Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 Documentation/driver-model/porting.txt |  6 ++++--
 drivers/base/dd.c                      | 24 ++++++++++++++++++++++--
 include/linux/device.h                 |  7 +++++--
 3 files changed, 31 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/porting.txt b/Documentation/driver-model/porting.txt
index 92d86f7271b4..453053f1661f 100644
--- a/Documentation/driver-model/porting.txt
+++ b/Documentation/driver-model/porting.txt
@@ -340,8 +340,10 @@ comparison:
 
   int (*match)(struct device * dev, struct device_driver * drv);
 
-match should return '1' if the driver supports the device, and '0'
-otherwise. 
+match should return positive value if the driver supports the device,
+and zero otherwise. It may also return error code (for example
+-EPROBE_DEFER) if determining that given driver supports the device is
+not possible.
 
 When a device is registered, the bus's list of drivers is iterated
 over. bus->match() is called for each one until a match is found. 
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index c4da2df62e02..16688f50729c 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -560,6 +560,7 @@ static int __device_attach_driver(struct device_driver *drv, void *_data)
 	struct device_attach_data *data = _data;
 	struct device *dev = data->dev;
 	bool async_allowed;
+	int ret;
 
 	/*
 	 * Check if device has already been claimed. This may
@@ -570,8 +571,17 @@ static int __device_attach_driver(struct device_driver *drv, void *_data)
 	if (dev->driver)
 		return -EBUSY;
 
-	if (!driver_match_device(drv, dev))
+	ret = driver_match_device(drv, dev);
+	if (ret == 0) {
+		/* no match */
 		return 0;
+	} else if (ret == -EPROBE_DEFER) {
+		dev_dbg(dev, "Device match requests probe deferral\n");
+		driver_deferred_probe_add(dev);
+	} else if (ret < 0) {
+		dev_dbg(dev, "Bus failed to match device: %d", ret);
+		return ret;
+	} /* ret > 0 means positive match */
 
 	async_allowed = driver_allows_async_probing(drv);
 
@@ -691,6 +701,7 @@ void device_initial_probe(struct device *dev)
 static int __driver_attach(struct device *dev, void *data)
 {
 	struct device_driver *drv = data;
+	int ret;
 
 	/*
 	 * Lock device and try to bind to it. We drop the error
@@ -702,8 +713,17 @@ static int __driver_attach(struct device *dev, void *data)
 	 * is an error.
 	 */
 
-	if (!driver_match_device(drv, dev))
+	ret = driver_match_device(drv, dev);
+	if (ret == 0) {
+		/* no match */
 		return 0;
+	} else if (ret == -EPROBE_DEFER) {
+		dev_dbg(dev, "Device match requests probe deferral\n");
+		driver_deferred_probe_add(dev);
+	} else if (ret < 0) {
+		dev_dbg(dev, "Bus failed to match device: %d", ret);
+		return ret;
+	} /* ret > 0 means positive match */
 
 	if (dev->parent)	/* Needed for USB */
 		device_lock(dev->parent);
diff --git a/include/linux/device.h b/include/linux/device.h
index 6d6f1fec092f..8f2ec813d215 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -70,8 +70,11 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  * @dev_groups:	Default attributes of the devices on the bus.
  * @drv_groups: Default attributes of the device drivers on the bus.
  * @match:	Called, perhaps multiple times, whenever a new device or driver
- *		is added for this bus. It should return a nonzero value if the
- *		given device can be handled by the given driver.
+ *		is added for this bus. It should return a positive value if the
+ *		given device can be handled by the given driver and zero
+ *		otherwise. It may also return error code if determining that
+ *		the driver supports the device is not possible. In case of
+ *		-EPROBE_DEFER it will queue the device for deferred probing.
  * @uevent:	Called when a device is added, removed, or a few other things
  *		that generate uevents to add the environment variables.
  * @probe:	Called when a new device or driver add to this bus, and callback
-- 
cgit v1.2.3


From 416dd13ad620a14fbabe5d73584b12e07ce8d02e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 26 Jan 2016 01:21:26 +0100
Subject: ARM: 8503/1: clk_register_clkdev: remove format string interface

Many callers either use NULL or const strings for the third argument of
clk_register_clkdev. For those that do not and use a non-const string,
this is a risk for format strings being accidentally processed (for
example in device names). As this interface is already used as if it
weren't a format string (prints nothing when NULL), and there are zero
users of the format strings, remove the format string interface to make
sure format strings will not leak into the clkdev.

$ git grep '\bclk_register_clkdev\b' | grep % | wc -l
0

Unfortunately, all the internals expect a va_list even though they treat
a NULL format string as special. To deal with this, we must pass either
(..., "%s", string) or (..., NULL) so that a the va_list will be created
correctly (passing the name as an argument, not as a format string).

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/clk/clkdev.c   | 31 +++++++++++++++++++++++++------
 include/linux/clkdev.h |  3 +--
 2 files changed, 26 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c
index 779b6ff0c7ad..eb20b941154b 100644
--- a/drivers/clk/clkdev.c
+++ b/drivers/clk/clkdev.c
@@ -353,11 +353,25 @@ void clkdev_drop(struct clk_lookup *cl)
 }
 EXPORT_SYMBOL(clkdev_drop);
 
+static struct clk_lookup *__clk_register_clkdev(struct clk_hw *hw,
+						const char *con_id,
+						const char *dev_id, ...)
+{
+	struct clk_lookup *cl;
+	va_list ap;
+
+	va_start(ap, dev_id);
+	cl = vclkdev_create(hw, con_id, dev_id, ap);
+	va_end(ap);
+
+	return cl;
+}
+
 /**
  * clk_register_clkdev - register one clock lookup for a struct clk
  * @clk: struct clk to associate with all clk_lookups
  * @con_id: connection ID string on device
- * @dev_id: format string describing device name
+ * @dev_id: string describing device name
  *
  * con_id or dev_id may be NULL as a wildcard, just as in the rest of
  * clkdev.
@@ -368,17 +382,22 @@ EXPORT_SYMBOL(clkdev_drop);
  * after clk_register().
  */
 int clk_register_clkdev(struct clk *clk, const char *con_id,
-	const char *dev_fmt, ...)
+	const char *dev_id)
 {
 	struct clk_lookup *cl;
-	va_list ap;
 
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
-	va_start(ap, dev_fmt);
-	cl = vclkdev_create(__clk_get_hw(clk), con_id, dev_fmt, ap);
-	va_end(ap);
+	/*
+	 * Since dev_id can be NULL, and NULL is handled specially, we must
+	 * pass it as either a NULL format string, or with "%s".
+	 */
+	if (dev_id)
+		cl = __clk_register_clkdev(__clk_get_hw(clk), con_id, "%s",
+					   dev_id);
+	else
+		cl = __clk_register_clkdev(__clk_get_hw(clk), con_id, NULL);
 
 	return cl ? 0 : -ENOMEM;
 }
diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h
index 08bffcc466de..c2c04f7cbe8a 100644
--- a/include/linux/clkdev.h
+++ b/include/linux/clkdev.h
@@ -44,8 +44,7 @@ struct clk_lookup *clkdev_create(struct clk *clk, const char *con_id,
 void clkdev_add_table(struct clk_lookup *, size_t);
 int clk_add_alias(const char *, const char *, const char *, struct device *);
 
-int clk_register_clkdev(struct clk *, const char *, const char *, ...)
-	__printf(3, 4);
+int clk_register_clkdev(struct clk *, const char *, const char *);
 int clk_register_clkdevs(struct clk *, struct clk_lookup *, size_t);
 
 #ifdef CONFIG_COMMON_CLK
-- 
cgit v1.2.3


From 9f6df573a4041f896cbf51f1b3743494196620a7 Mon Sep 17 00:00:00 2001
From: Aditya Kali <adityakali@google.com>
Date: Fri, 29 Jan 2016 02:54:04 -0600
Subject: kernfs: Add API to generate relative kernfs path

The new function kernfs_path_from_node() generates and returns kernfs
path of a given kernfs_node relative to a given parent kernfs_node.

Signed-off-by: Aditya Kali <adityakali@google.com>
Signed-off-by: Serge E. Hallyn <serge.hallyn@canonical.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/kernfs/dir.c        | 191 +++++++++++++++++++++++++++++++++++++++++--------
 include/linux/kernfs.h |   9 +--
 2 files changed, 165 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 821973853340..25d71a53cf43 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -44,28 +44,122 @@ static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
 	return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
 }
 
-static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf,
-					      size_t buflen)
+/* kernfs_node_depth - compute depth from @from to @to */
+static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
 {
-	char *p = buf + buflen;
-	int len;
+	size_t depth = 0;
 
-	*--p = '\0';
+	while (to->parent && to != from) {
+		depth++;
+		to = to->parent;
+	}
+	return depth;
+}
 
-	do {
-		len = strlen(kn->name);
-		if (p - buf < len + 1) {
-			buf[0] = '\0';
-			p = NULL;
-			break;
-		}
-		p -= len;
-		memcpy(p, kn->name, len);
-		*--p = '/';
-		kn = kn->parent;
-	} while (kn && kn->parent);
+static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
+						  struct kernfs_node *b)
+{
+	size_t da, db;
+	struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b);
 
-	return p;
+	if (ra != rb)
+		return NULL;
+
+	da = kernfs_depth(ra->kn, a);
+	db = kernfs_depth(rb->kn, b);
+
+	while (da > db) {
+		a = a->parent;
+		da--;
+	}
+	while (db > da) {
+		b = b->parent;
+		db--;
+	}
+
+	/* worst case b and a will be the same at root */
+	while (b != a) {
+		b = b->parent;
+		a = a->parent;
+	}
+
+	return a;
+}
+
+/**
+ * kernfs_path_from_node_locked - find a pseudo-absolute path to @kn_to,
+ * where kn_from is treated as root of the path.
+ * @kn_from: kernfs node which should be treated as root for the path
+ * @kn_to: kernfs node to which path is needed
+ * @buf: buffer to copy the path into
+ * @buflen: size of @buf
+ *
+ * We need to handle couple of scenarios here:
+ * [1] when @kn_from is an ancestor of @kn_to at some level
+ * kn_from: /n1/n2/n3
+ * kn_to:   /n1/n2/n3/n4/n5
+ * result:  /n4/n5
+ *
+ * [2] when @kn_from is on a different hierarchy and we need to find common
+ * ancestor between @kn_from and @kn_to.
+ * kn_from: /n1/n2/n3/n4
+ * kn_to:   /n1/n2/n5
+ * result:  /../../n5
+ * OR
+ * kn_from: /n1/n2/n3/n4/n5   [depth=5]
+ * kn_to:   /n1/n2/n3         [depth=3]
+ * result:  /../..
+ *
+ * return value: length of the string.  If greater than buflen,
+ * then contents of buf are undefined.  On error, -1 is returned.
+ */
+static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
+					struct kernfs_node *kn_from,
+					char *buf, size_t buflen)
+{
+	struct kernfs_node *kn, *common;
+	const char parent_str[] = "/..";
+	size_t depth_from, depth_to, len = 0, nlen = 0;
+	char *p;
+	int i;
+
+	if (!kn_from)
+		kn_from = kernfs_root(kn_to)->kn;
+
+	if (kn_from == kn_to)
+		return strlcpy(buf, "/", buflen);
+
+	common = kernfs_common_ancestor(kn_from, kn_to);
+	if (WARN_ON(!common))
+		return -1;
+
+	depth_to = kernfs_depth(common, kn_to);
+	depth_from = kernfs_depth(common, kn_from);
+
+	if (buf)
+		buf[0] = '\0';
+
+	for (i = 0; i < depth_from; i++)
+		len += strlcpy(buf + len, parent_str,
+			       len < buflen ? buflen - len : 0);
+
+	/* Calculate how many bytes we need for the rest */
+	for (kn = kn_to; kn != common; kn = kn->parent)
+		nlen += strlen(kn->name) + 1;
+
+	if (len + nlen >= buflen)
+		return len + nlen;
+
+	p = buf + len + nlen;
+	*p = '\0';
+	for (kn = kn_to; kn != common; kn = kn->parent) {
+		nlen = strlen(kn->name);
+		p -= nlen;
+		memcpy(p, kn->name, nlen);
+		*(--p) = '/';
+	}
+
+	return len + nlen;
 }
 
 /**
@@ -114,6 +208,34 @@ size_t kernfs_path_len(struct kernfs_node *kn)
 	return len;
 }
 
+/**
+ * kernfs_path_from_node - build path of node @to relative to @from.
+ * @from: parent kernfs_node relative to which we need to build the path
+ * @to: kernfs_node of interest
+ * @buf: buffer to copy @to's path into
+ * @buflen: size of @buf
+ *
+ * Builds @to's path relative to @from in @buf. @from and @to must
+ * be on the same kernfs-root. If @from is not parent of @to, then a relative
+ * path (which includes '..'s) as needed to reach from @from to @to is
+ * returned.
+ *
+ * If @buf isn't long enough, the return value will be greater than @buflen
+ * and @buf contents are undefined.
+ */
+int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
+			  char *buf, size_t buflen)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&kernfs_rename_lock, flags);
+	ret = kernfs_path_from_node_locked(to, from, buf, buflen);
+	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kernfs_path_from_node);
+
 /**
  * kernfs_path - build full path of a given node
  * @kn: kernfs_node of interest
@@ -127,13 +249,12 @@ size_t kernfs_path_len(struct kernfs_node *kn)
  */
 char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
 {
-	unsigned long flags;
-	char *p;
+	int ret;
 
-	spin_lock_irqsave(&kernfs_rename_lock, flags);
-	p = kernfs_path_locked(kn, buf, buflen);
-	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
-	return p;
+	ret = kernfs_path_from_node(kn, NULL, buf, buflen);
+	if (ret < 0 || ret >= buflen)
+		return NULL;
+	return buf;
 }
 EXPORT_SYMBOL_GPL(kernfs_path);
 
@@ -164,17 +285,25 @@ void pr_cont_kernfs_name(struct kernfs_node *kn)
 void pr_cont_kernfs_path(struct kernfs_node *kn)
 {
 	unsigned long flags;
-	char *p;
+	int sz;
 
 	spin_lock_irqsave(&kernfs_rename_lock, flags);
 
-	p = kernfs_path_locked(kn, kernfs_pr_cont_buf,
-			       sizeof(kernfs_pr_cont_buf));
-	if (p)
-		pr_cont("%s", p);
-	else
-		pr_cont("<name too long>");
+	sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
+					  sizeof(kernfs_pr_cont_buf));
+	if (sz < 0) {
+		pr_cont("(error)");
+		goto out;
+	}
+
+	if (sz >= sizeof(kernfs_pr_cont_buf)) {
+		pr_cont("(name too long)");
+		goto out;
+	}
+
+	pr_cont("%s", kernfs_pr_cont_buf);
 
+out:
 	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
 }
 
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index af51df35d749..716bfdede5f5 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -267,8 +267,9 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
 
 int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
 size_t kernfs_path_len(struct kernfs_node *kn);
-char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
-				size_t buflen);
+int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn,
+			  char *buf, size_t buflen);
+char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen);
 void pr_cont_kernfs_name(struct kernfs_node *kn);
 void pr_cont_kernfs_path(struct kernfs_node *kn);
 struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn);
@@ -338,8 +339,8 @@ static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
 static inline size_t kernfs_path_len(struct kernfs_node *kn)
 { return 0; }
 
-static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
-					      size_t buflen)
+static inline char *kernfs_path(struct kernfs_node *kn, char *buf,
+				size_t buflen)
 { return NULL; }
 
 static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { }
-- 
cgit v1.2.3


From a79a908fd2b080977b45bf103184b81c9d11ad07 Mon Sep 17 00:00:00 2001
From: Aditya Kali <adityakali@google.com>
Date: Fri, 29 Jan 2016 02:54:06 -0600
Subject: cgroup: introduce cgroup namespaces

Introduce the ability to create new cgroup namespace. The newly created
cgroup namespace remembers the cgroup of the process at the point
of creation of the cgroup namespace (referred as cgroupns-root).
The main purpose of cgroup namespace is to virtualize the contents
of /proc/self/cgroup file. Processes inside a cgroup namespace
are only able to see paths relative to their namespace root
(unless they are moved outside of their cgroupns-root, at which point
 they will see a relative path from their cgroupns-root).
For a correctly setup container this enables container-tools
(like libcontainer, lxc, lmctfy, etc.) to create completely virtualized
containers without leaking system level cgroup hierarchy to the task.
This patch only implements the 'unshare' part of the cgroupns.

Signed-off-by: Aditya Kali <adityakali@google.com>
Signed-off-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/proc/namespaces.c    |   3 +
 include/linux/cgroup.h  |  49 ++++++++++++++
 include/linux/nsproxy.h |   2 +
 include/linux/proc_ns.h |   4 ++
 kernel/cgroup.c         | 173 +++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/cpuset.c         |   8 +--
 kernel/fork.c           |   2 +-
 kernel/nsproxy.c        |  19 +++++-
 8 files changed, 250 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 276f12431dbf..72cb26f85d58 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -28,6 +28,9 @@ static const struct proc_ns_operations *ns_entries[] = {
 	&userns_operations,
 #endif
 	&mntns_operations,
+#ifdef CONFIG_CGROUPS
+	&cgroupns_operations,
+#endif
 };
 
 static const char *proc_ns_get_link(struct dentry *dentry,
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2162dca88dc0..a20320c666fd 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -17,6 +17,11 @@
 #include <linux/seq_file.h>
 #include <linux/kernfs.h>
 #include <linux/jump_label.h>
+#include <linux/nsproxy.h>
+#include <linux/types.h>
+#include <linux/ns_common.h>
+#include <linux/nsproxy.h>
+#include <linux/user_namespace.h>
 
 #include <linux/cgroup-defs.h>
 
@@ -611,4 +616,48 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {}
 
 #endif	/* CONFIG_CGROUP_DATA */
 
+struct cgroup_namespace {
+	atomic_t		count;
+	struct ns_common	ns;
+	struct user_namespace	*user_ns;
+	struct css_set          *root_cset;
+};
+
+extern struct cgroup_namespace init_cgroup_ns;
+
+#ifdef CONFIG_CGROUPS
+
+void free_cgroup_ns(struct cgroup_namespace *ns);
+
+struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
+					struct user_namespace *user_ns,
+					struct cgroup_namespace *old_ns);
+
+char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+		     struct cgroup_namespace *ns);
+
+#else /* !CONFIG_CGROUPS */
+
+static inline void free_cgroup_ns(struct cgroup_namespace *ns) { }
+static inline struct cgroup_namespace *
+copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns,
+	       struct cgroup_namespace *old_ns)
+{
+	return old_ns;
+}
+
+#endif /* !CONFIG_CGROUPS */
+
+static inline void get_cgroup_ns(struct cgroup_namespace *ns)
+{
+	if (ns)
+		atomic_inc(&ns->count);
+}
+
+static inline void put_cgroup_ns(struct cgroup_namespace *ns)
+{
+	if (ns && atomic_dec_and_test(&ns->count))
+		free_cgroup_ns(ns);
+}
+
 #endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 35fa08fd7739..ac0d65bef5d0 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -8,6 +8,7 @@ struct mnt_namespace;
 struct uts_namespace;
 struct ipc_namespace;
 struct pid_namespace;
+struct cgroup_namespace;
 struct fs_struct;
 
 /*
@@ -33,6 +34,7 @@ struct nsproxy {
 	struct mnt_namespace *mnt_ns;
 	struct pid_namespace *pid_ns_for_children;
 	struct net 	     *net_ns;
+	struct cgroup_namespace *cgroup_ns;
 };
 extern struct nsproxy init_nsproxy;
 
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 42dfc615dbf8..de0e7719d4c5 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -9,6 +9,8 @@
 struct pid_namespace;
 struct nsproxy;
 struct path;
+struct task_struct;
+struct inode;
 
 struct proc_ns_operations {
 	const char *name;
@@ -24,6 +26,7 @@ extern const struct proc_ns_operations ipcns_operations;
 extern const struct proc_ns_operations pidns_operations;
 extern const struct proc_ns_operations userns_operations;
 extern const struct proc_ns_operations mntns_operations;
+extern const struct proc_ns_operations cgroupns_operations;
 
 /*
  * We always define these enumerators
@@ -34,6 +37,7 @@ enum {
 	PROC_UTS_INIT_INO	= 0xEFFFFFFEU,
 	PROC_USER_INIT_INO	= 0xEFFFFFFDU,
 	PROC_PID_INIT_INO	= 0xEFFFFFFCU,
+	PROC_CGROUP_INIT_INO	= 0xEFFFFFFBU,
 };
 
 #ifdef CONFIG_PROC_FS
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7ad61915967f..b001c5d36bec 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -59,6 +59,9 @@
 #include <linux/delay.h>
 #include <linux/atomic.h>
 #include <linux/cpuset.h>
+#include <linux/proc_ns.h>
+#include <linux/nsproxy.h>
+#include <linux/proc_ns.h>
 #include <net/sock.h>
 
 /*
@@ -212,6 +215,15 @@ static unsigned long have_fork_callback __read_mostly;
 static unsigned long have_exit_callback __read_mostly;
 static unsigned long have_free_callback __read_mostly;
 
+/* cgroup namespace for init task */
+struct cgroup_namespace init_cgroup_ns = {
+	.count		= { .counter = 2, },
+	.user_ns	= &init_user_ns,
+	.ns.ops		= &cgroupns_operations,
+	.ns.inum	= PROC_CGROUP_INIT_INO,
+	.root_cset	= &init_css_set,
+};
+
 /* Ditto for the can_fork callback. */
 static unsigned long have_canfork_callback __read_mostly;
 
@@ -2177,6 +2189,35 @@ static struct file_system_type cgroup2_fs_type = {
 	.kill_sb = cgroup_kill_sb,
 };
 
+static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
+				   struct cgroup_namespace *ns)
+{
+	struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
+	int ret;
+
+	ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
+	if (ret < 0 || ret >= buflen)
+		return NULL;
+	return buf;
+}
+
+char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+		     struct cgroup_namespace *ns)
+{
+	char *ret;
+
+	mutex_lock(&cgroup_mutex);
+	spin_lock_bh(&css_set_lock);
+
+	ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
+
+	spin_unlock_bh(&css_set_lock);
+	mutex_unlock(&cgroup_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cgroup_path_ns);
+
 /**
  * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
  * @task: target task
@@ -2204,7 +2245,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 
 	if (root) {
 		cgrp = task_cgroup_from_root(task, root);
-		path = cgroup_path(cgrp, buf, buflen);
+		path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
 	} else {
 		/* if no hierarchy exists, everyone is in "/" */
 		if (strlcpy(buf, "/", buflen) < buflen)
@@ -5297,6 +5338,8 @@ int __init cgroup_init(void)
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
 
+	get_user_ns(init_cgroup_ns.user_ns);
+
 	mutex_lock(&cgroup_mutex);
 
 	/* Add init_css_set to the hash table */
@@ -5438,7 +5481,8 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 		 * " (deleted)" is appended to the cgroup path.
 		 */
 		if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
-			path = cgroup_path(cgrp, buf, PATH_MAX);
+			path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
+						current->nsproxy->cgroup_ns);
 			if (!path) {
 				retval = -ENAMETOOLONG;
 				goto out_unlock;
@@ -5720,7 +5764,9 @@ static void cgroup_release_agent(struct work_struct *work)
 	if (!pathbuf || !agentbuf)
 		goto out;
 
-	path = cgroup_path(cgrp, pathbuf, PATH_MAX);
+	spin_lock_bh(&css_set_lock);
+	path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
+	spin_unlock_bh(&css_set_lock);
 	if (!path)
 		goto out;
 
@@ -5931,6 +5977,127 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
 
 #endif	/* CONFIG_SOCK_CGROUP_DATA */
 
+/* cgroup namespaces */
+
+static struct cgroup_namespace *alloc_cgroup_ns(void)
+{
+	struct cgroup_namespace *new_ns;
+	int ret;
+
+	new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL);
+	if (!new_ns)
+		return ERR_PTR(-ENOMEM);
+	ret = ns_alloc_inum(&new_ns->ns);
+	if (ret) {
+		kfree(new_ns);
+		return ERR_PTR(ret);
+	}
+	atomic_set(&new_ns->count, 1);
+	new_ns->ns.ops = &cgroupns_operations;
+	return new_ns;
+}
+
+void free_cgroup_ns(struct cgroup_namespace *ns)
+{
+	put_css_set(ns->root_cset);
+	put_user_ns(ns->user_ns);
+	ns_free_inum(&ns->ns);
+	kfree(ns);
+}
+EXPORT_SYMBOL(free_cgroup_ns);
+
+struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
+					struct user_namespace *user_ns,
+					struct cgroup_namespace *old_ns)
+{
+	struct cgroup_namespace *new_ns = NULL;
+	struct css_set *cset = NULL;
+	int err;
+
+	BUG_ON(!old_ns);
+
+	if (!(flags & CLONE_NEWCGROUP)) {
+		get_cgroup_ns(old_ns);
+		return old_ns;
+	}
+
+	/* Allow only sysadmin to create cgroup namespace. */
+	err = -EPERM;
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
+		goto err_out;
+
+	mutex_lock(&cgroup_mutex);
+	spin_lock_bh(&css_set_lock);
+
+	cset = task_css_set(current);
+	get_css_set(cset);
+
+	spin_unlock_bh(&css_set_lock);
+	mutex_unlock(&cgroup_mutex);
+
+	err = -ENOMEM;
+	new_ns = alloc_cgroup_ns();
+	if (!new_ns)
+		goto err_out;
+
+	new_ns->user_ns = get_user_ns(user_ns);
+	new_ns->root_cset = cset;
+
+	return new_ns;
+
+err_out:
+	if (cset)
+		put_css_set(cset);
+	kfree(new_ns);
+	return ERR_PTR(err);
+}
+
+static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
+{
+	return container_of(ns, struct cgroup_namespace, ns);
+}
+
+static int cgroupns_install(struct nsproxy *nsproxy, void *ns)
+{
+	pr_info("setns not supported for cgroup namespace");
+	return -EINVAL;
+}
+
+static struct ns_common *cgroupns_get(struct task_struct *task)
+{
+	struct cgroup_namespace *ns = NULL;
+	struct nsproxy *nsproxy;
+
+	task_lock(task);
+	nsproxy = task->nsproxy;
+	if (nsproxy) {
+		ns = nsproxy->cgroup_ns;
+		get_cgroup_ns(ns);
+	}
+	task_unlock(task);
+
+	return ns ? &ns->ns : NULL;
+}
+
+static void cgroupns_put(struct ns_common *ns)
+{
+	put_cgroup_ns(to_cg_ns(ns));
+}
+
+const struct proc_ns_operations cgroupns_operations = {
+	.name		= "cgroup",
+	.type		= CLONE_NEWCGROUP,
+	.get		= cgroupns_get,
+	.put		= cgroupns_put,
+	.install	= cgroupns_install,
+};
+
+static __init int cgroup_namespaces_init(void)
+{
+	return 0;
+}
+subsys_initcall(cgroup_namespaces_init);
+
 #ifdef CONFIG_CGROUP_DEBUG
 static struct cgroup_subsys_state *
 debug_css_alloc(struct cgroup_subsys_state *parent_css)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 41989ab4db57..d393125b228c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2714,10 +2714,10 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
 		goto out;
 
 	retval = -ENAMETOOLONG;
-	rcu_read_lock();
-	css = task_css(tsk, cpuset_cgrp_id);
-	p = cgroup_path(css->cgroup, buf, PATH_MAX);
-	rcu_read_unlock();
+	css = task_get_css(tsk, cpuset_cgrp_id);
+	p = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
+			   current->nsproxy->cgroup_ns);
+	css_put(css);
 	if (!p)
 		goto out_free;
 	seq_puts(m, p);
diff --git a/kernel/fork.c b/kernel/fork.c
index 2e391c754ae7..6611a6267949 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1884,7 +1884,7 @@ static int check_unshare_flags(unsigned long unshare_flags)
 	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
 				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
-				CLONE_NEWUSER|CLONE_NEWPID))
+				CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP))
 		return -EINVAL;
 	/*
 	 * Not implemented, but pretend it works if there is nothing
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 49746c81ad8d..782102e59eed 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -25,6 +25,7 @@
 #include <linux/proc_ns.h>
 #include <linux/file.h>
 #include <linux/syscalls.h>
+#include <linux/cgroup.h>
 
 static struct kmem_cache *nsproxy_cachep;
 
@@ -39,6 +40,9 @@ struct nsproxy init_nsproxy = {
 #ifdef CONFIG_NET
 	.net_ns			= &init_net,
 #endif
+#ifdef CONFIG_CGROUPS
+	.cgroup_ns		= &init_cgroup_ns,
+#endif
 };
 
 static inline struct nsproxy *create_nsproxy(void)
@@ -92,6 +96,13 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 		goto out_pid;
 	}
 
+	new_nsp->cgroup_ns = copy_cgroup_ns(flags, user_ns,
+					    tsk->nsproxy->cgroup_ns);
+	if (IS_ERR(new_nsp->cgroup_ns)) {
+		err = PTR_ERR(new_nsp->cgroup_ns);
+		goto out_cgroup;
+	}
+
 	new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
 	if (IS_ERR(new_nsp->net_ns)) {
 		err = PTR_ERR(new_nsp->net_ns);
@@ -101,6 +112,8 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 	return new_nsp;
 
 out_net:
+	put_cgroup_ns(new_nsp->cgroup_ns);
+out_cgroup:
 	if (new_nsp->pid_ns_for_children)
 		put_pid_ns(new_nsp->pid_ns_for_children);
 out_pid:
@@ -128,7 +141,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 	struct nsproxy *new_ns;
 
 	if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			      CLONE_NEWPID | CLONE_NEWNET)))) {
+			      CLONE_NEWPID | CLONE_NEWNET |
+			      CLONE_NEWCGROUP)))) {
 		get_nsproxy(old_ns);
 		return 0;
 	}
@@ -165,6 +179,7 @@ void free_nsproxy(struct nsproxy *ns)
 		put_ipc_ns(ns->ipc_ns);
 	if (ns->pid_ns_for_children)
 		put_pid_ns(ns->pid_ns_for_children);
+	put_cgroup_ns(ns->cgroup_ns);
 	put_net(ns->net_ns);
 	kmem_cache_free(nsproxy_cachep, ns);
 }
@@ -180,7 +195,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 	int err = 0;
 
 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			       CLONE_NEWNET | CLONE_NEWPID)))
+			       CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP)))
 		return 0;
 
 	user_ns = new_cred ? new_cred->user_ns : current_user_ns();
-- 
cgit v1.2.3


From fb3c8315650f89a1993fb3ae3e74e9c7e4a1c9c0 Mon Sep 17 00:00:00 2001
From: Aditya Kali <adityakali@google.com>
Date: Fri, 29 Jan 2016 02:54:08 -0600
Subject: kernfs: define kernfs_node_dentry

Add a new kernfs api is added to lookup the dentry for a particular
kernfs path.

Signed-off-by: Aditya Kali <adityakali@google.com>
Signed-off-by: Serge E. Hallyn <serge.hallyn@canonical.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/kernfs/mount.c      | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/kernfs.h |  2 ++
 2 files changed, 71 insertions(+)

(limited to 'include/linux')

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 8eaf417187f1..b67dbccdaf88 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -14,6 +14,7 @@
 #include <linux/magic.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
+#include <linux/namei.h>
 
 #include "kernfs-internal.h"
 
@@ -62,6 +63,74 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
 	return NULL;
 }
 
+/*
+ * find the next ancestor in the path down to @child, where @parent was the
+ * ancestor whose descendant we want to find.
+ *
+ * Say the path is /a/b/c/d.  @child is d, @parent is NULL.  We return the root
+ * node.  If @parent is b, then we return the node for c.
+ * Passing in d as @parent is not ok.
+ */
+static struct kernfs_node *find_next_ancestor(struct kernfs_node *child,
+					      struct kernfs_node *parent)
+{
+	if (child == parent) {
+		pr_crit_once("BUG in find_next_ancestor: called with parent == child");
+		return NULL;
+	}
+
+	while (child->parent != parent) {
+		if (!child->parent)
+			return NULL;
+		child = child->parent;
+	}
+
+	return child;
+}
+
+/**
+ * kernfs_node_dentry - get a dentry for the given kernfs_node
+ * @kn: kernfs_node for which a dentry is needed
+ * @sb: the kernfs super_block
+ */
+struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
+				  struct super_block *sb)
+{
+	struct dentry *dentry;
+	struct kernfs_node *knparent = NULL;
+
+	BUG_ON(sb->s_op != &kernfs_sops);
+
+	dentry = dget(sb->s_root);
+
+	/* Check if this is the root kernfs_node */
+	if (!kn->parent)
+		return dentry;
+
+	knparent = find_next_ancestor(kn, NULL);
+	if (WARN_ON(!knparent))
+		return ERR_PTR(-EINVAL);
+
+	do {
+		struct dentry *dtmp;
+		struct kernfs_node *kntmp;
+
+		if (kn == knparent)
+			return dentry;
+		kntmp = find_next_ancestor(kn, knparent);
+		if (WARN_ON(!kntmp))
+			return ERR_PTR(-EINVAL);
+		mutex_lock(&d_inode(dentry)->i_mutex);
+		dtmp = lookup_one_len(kntmp->name, dentry, strlen(kntmp->name));
+		mutex_unlock(&d_inode(dentry)->i_mutex);
+		dput(dentry);
+		if (IS_ERR(dtmp))
+			return dtmp;
+		knparent = kntmp;
+		dentry = dtmp;
+	} while (true);
+}
+
 static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
 {
 	struct kernfs_super_info *info = kernfs_info(sb);
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 716bfdede5f5..c06c44242f39 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -284,6 +284,8 @@ struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry);
 struct kernfs_root *kernfs_root_from_sb(struct super_block *sb);
 struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn);
 
+struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
+				  struct super_block *sb);
 struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 				       unsigned int flags, void *priv);
 void kernfs_destroy_root(struct kernfs_root *root);
-- 
cgit v1.2.3


From d4ab4286276fcd6c155bafdf4422b712068d2516 Mon Sep 17 00:00:00 2001
From: "Keller, Jacob E" <jacob.e.keller@intel.com>
Date: Mon, 8 Feb 2016 16:05:03 -0800
Subject: ethtool: correctly ensure {GS}CHANNELS doesn't conflict with GS{RXFH}

Ethernet drivers implementing both {GS}RXFH and {GS}CHANNELS ethtool ops
incorrectly allow SCHANNELS when it would conflict with the settings
from SRXFH. This occurs because it is not possible for drivers to
understand whether their Rx flow indirection table has been configured
or is in the default state. In addition, drivers currently behave in
various ways when increasing the number of Rx channels.

Some drivers will always destroy the Rx flow indirection table when this
occurs, whether it has been set by the user or not. Other drivers will
attempt to preserve the table even if the user has never modified it
from the default driver settings. Neither of these situation is
desirable because it leads to unexpected behavior or loss of user
configuration.

The correct behavior is to simply return -EINVAL when SCHANNELS would
conflict with the current Rx flow table settings. However, it should
only do so if the current settings were modified by the user. If we
required that the new settings never conflict with the current (default)
Rx flow settings, we would force users to first reduce their Rx flow
settings and then reduce the number of Rx channels.

This patch proposes a solution implemented in net/core/ethtool.c which
ensures that all drivers behave correctly. It checks whether the RXFH
table has been configured to non-default settings, and stores this
information in a private netdev flag. When the number of channels is
requested to change, it first ensures that the current Rx flow table is
not going to assign flows to now disabled channels.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  8 +++++++
 net/core/ethtool.c        | 55 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 219f53c30cb3..0499569c256d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1291,6 +1291,7 @@ struct net_device_ops {
  * @IFF_OPENVSWITCH: device is a Open vSwitch master
  * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device
  * @IFF_TEAM: device is a team device
+ * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1318,6 +1319,7 @@ enum netdev_priv_flags {
 	IFF_OPENVSWITCH			= 1<<22,
 	IFF_L3MDEV_SLAVE		= 1<<23,
 	IFF_TEAM			= 1<<24,
+	IFF_RXFH_CONFIGURED		= 1<<25,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1345,6 +1347,7 @@ enum netdev_priv_flags {
 #define IFF_OPENVSWITCH			IFF_OPENVSWITCH
 #define IFF_L3MDEV_SLAVE		IFF_L3MDEV_SLAVE
 #define IFF_TEAM			IFF_TEAM
+#define IFF_RXFH_CONFIGURED		IFF_RXFH_CONFIGURED
 
 /**
  *	struct net_device - The DEVICE structure.
@@ -4048,6 +4051,11 @@ static inline bool netif_is_lag_port(const struct net_device *dev)
 	return netif_is_bond_slave(dev) || netif_is_team_port(dev);
 }
 
+static inline bool netif_is_rxfh_configured(const struct net_device *dev)
+{
+	return dev->priv_flags & IFF_RXFH_CONFIGURED;
+}
+
 /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
 static inline void netif_keep_dst(struct net_device *dev)
 {
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 453c803f1c87..379bdc59b1c8 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -642,6 +642,37 @@ void netdev_rss_key_fill(void *buffer, size_t len)
 }
 EXPORT_SYMBOL(netdev_rss_key_fill);
 
+static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max)
+{
+	u32 dev_size, current_max = 0;
+	u32 *indir;
+	int ret;
+
+	if (!dev->ethtool_ops->get_rxfh_indir_size ||
+	    !dev->ethtool_ops->get_rxfh)
+		return -EOPNOTSUPP;
+	dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+	if (dev_size == 0)
+		return -EOPNOTSUPP;
+
+	indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
+	if (!indir)
+		return -ENOMEM;
+
+	ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL);
+	if (ret)
+		goto out;
+
+	while (dev_size--)
+		current_max = max(current_max, indir[dev_size]);
+
+	*max = current_max;
+
+out:
+	kfree(indir);
+	return ret;
+}
+
 static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
 						     void __user *useraddr)
 {
@@ -738,6 +769,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 	}
 
 	ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE);
+	if (ret)
+		goto out;
+
+	/* indicate whether rxfh was set to default */
+	if (user_size == 0)
+		dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+	else
+		dev->priv_flags |= IFF_RXFH_CONFIGURED;
 
 out:
 	kfree(indir);
@@ -897,6 +936,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 	}
 
 	ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
+	if (ret)
+		goto out;
+
+	/* indicate whether rxfh was set to default */
+	if (rxfh.indir_size == 0)
+		dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+	else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
+		dev->priv_flags |= IFF_RXFH_CONFIGURED;
 
 out:
 	kfree(rss_config);
@@ -1228,6 +1275,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
 						   void __user *useraddr)
 {
 	struct ethtool_channels channels;
+	u32 max_rx_in_use = 0;
 
 	if (!dev->ethtool_ops->set_channels)
 		return -EOPNOTSUPP;
@@ -1235,6 +1283,13 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
 	if (copy_from_user(&channels, useraddr, sizeof(channels)))
 		return -EFAULT;
 
+	/* ensure the new Rx count fits within the configured Rx flow
+	 * indirection table settings */
+	if (netif_is_rxfh_configured(dev) &&
+	    !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) &&
+	    (channels.combined_count + channels.rx_count) <= max_rx_in_use)
+	    return -EINVAL;
+
 	return dev->ethtool_ops->set_channels(dev, &channels);
 }
 
-- 
cgit v1.2.3


From 92016ba5c1d71fbe4e9952df518b5386f2a0556b Mon Sep 17 00:00:00 2001
From: Jake Oshins <jakeo@microsoft.com>
Date: Tue, 16 Feb 2016 21:56:21 +0000
Subject: PCI: Add fwnode_handle to x86 pci_sysdata

Add an fwnode_handle to the x86 struct pci_sysdata, which will be used to
locate an IRQ domain associated with a root PCI bus.

[bhelgaas: changelog]
Signed-off-by: Jake Oshins <jakeo@microsoft.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 arch/x86/include/asm/pci.h | 15 +++++++++++++++
 include/linux/pci.h        |  4 ++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 462594320d39..6fc3c7c554b2 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -20,6 +20,9 @@ struct pci_sysdata {
 #ifdef CONFIG_X86_64
 	void		*iommu;		/* IOMMU private data */
 #endif
+#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+	void		*fwnode;	/* IRQ domain for MSI assignment */
+#endif
 };
 
 extern int pci_routeirq;
@@ -32,6 +35,7 @@ extern int noioapicreroute;
 static inline int pci_domain_nr(struct pci_bus *bus)
 {
 	struct pci_sysdata *sd = bus->sysdata;
+
 	return sd->domain;
 }
 
@@ -41,6 +45,17 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 }
 #endif
 
+#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+static inline void *_pci_root_bus_fwnode(struct pci_bus *bus)
+{
+	struct pci_sysdata *sd = bus->sysdata;
+
+	return sd->fwnode;
+}
+
+#define pci_root_bus_fwnode	_pci_root_bus_fwnode
+#endif
+
 /* Can be used to override the logic in pci_scan_bus for skipping
    already-configured bus numbers - to be used for buggy BIOSes
    or architectures with incomplete PCI setup by the loader */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 27df4a6585da..1bb44af6c9f0 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1515,6 +1515,10 @@ static inline int pci_get_new_domain_nr(void) { return -ENOSYS; }
 
 #include <asm/pci.h>
 
+#ifndef pci_root_bus_fwnode
+#define pci_root_bus_fwnode(bus)	NULL
+#endif
+
 /* these helpers provide future and backwards compatibility
  * for accessing popular PCI BAR info */
 #define pci_resource_start(dev, bar)	((dev)->resource[(bar)].start)
-- 
cgit v1.2.3


From e1ab7f39d7e0dbfbdefe148be3ae4ee121e47ecc Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Tue, 15 Dec 2015 22:24:14 +0800
Subject: atomics: Allow architectures to define their own __atomic_op_*
 helpers

Some architectures may have their special barriers for acquire, release
and fence semantics, so that general memory barriers(smp_mb__*_atomic())
in the default __atomic_op_*() may be too strong, so allow architectures
to define their own helpers which can overwrite the default helpers.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 include/linux/atomic.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 301de78d65f7..5f3ee5a60a81 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -34,20 +34,29 @@
  * The idea here is to build acquire/release variants by adding explicit
  * barriers on top of the relaxed variant. In the case where the relaxed
  * variant is already fully ordered, no additional barriers are needed.
+ *
+ * Besides, if an arch has a special barrier for acquire/release, it could
+ * implement its own __atomic_op_* and use the same framework for building
+ * variants
  */
+#ifndef __atomic_op_acquire
 #define __atomic_op_acquire(op, args...)				\
 ({									\
 	typeof(op##_relaxed(args)) __ret  = op##_relaxed(args);		\
 	smp_mb__after_atomic();						\
 	__ret;								\
 })
+#endif
 
+#ifndef __atomic_op_release
 #define __atomic_op_release(op, args...)				\
 ({									\
 	smp_mb__before_atomic();					\
 	op##_relaxed(args);						\
 })
+#endif
 
+#ifndef __atomic_op_fence
 #define __atomic_op_fence(op, args...)					\
 ({									\
 	typeof(op##_relaxed(args)) __ret;				\
@@ -56,6 +65,7 @@
 	smp_mb__after_atomic();						\
 	__ret;								\
 })
+#endif
 
 /* atomic_add_return_relaxed */
 #ifndef atomic_add_return_relaxed
-- 
cgit v1.2.3


From e4c6734eaab90695db0ea8456307790cb0c1ccb5 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 16 Feb 2016 21:16:15 -0800
Subject: net: rework ndo tc op to consume additional qdisc handle parameter

The ndo_setup_tc() op was added to support drivers offloading tx
qdiscs however only support for mqprio was ever added. So we
only ever added support for passing the number of traffic classes
to the driver.

This patch generalizes the ndo_setup_tc op so that a handle can
be provided to indicate if the offload is for ingress or egress
or potentially even child qdiscs.

CC: Murali Karicheri <m-karicheri2@ti.com>
CC: Shradha Shah <sshah@solarflare.com>
CC: Or Gerlitz <ogerlitz@mellanox.com>
CC: Ariel Elior <ariel.elior@qlogic.com>
CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
CC: Bruce Allan <bruce.w.allan@intel.com>
CC: Jesse Brandeburg <jesse.brandeburg@intel.com>
CC: Don Skidmore <donald.c.skidmore@intel.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c         |  5 ++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  |  7 +++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h  |  1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |  2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.c        |  5 ++++-
 drivers/net/ethernet/intel/fm10k/fm10k_netdev.c  | 10 +++++++++-
 drivers/net/ethernet/intel/i40e/i40e.h           |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_fcoe.c      |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_main.c      | 17 ++++++++++++-----
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    | 11 ++++++++++-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c   | 12 ++++++++++--
 drivers/net/ethernet/sfc/efx.h                   |  2 +-
 drivers/net/ethernet/sfc/tx.c                    |  5 ++++-
 drivers/net/ethernet/ti/netcp_core.c             |  5 ++++-
 include/linux/netdevice.h                        |  3 ++-
 net/sched/sch_mqprio.c                           |  5 +++--
 16 files changed, 74 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 8a9b493566c9..9955cae3cabc 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1626,12 +1626,15 @@ static void xgbe_poll_controller(struct net_device *netdev)
 }
 #endif /* End CONFIG_NET_POLL_CONTROLLER */
 
-static int xgbe_setup_tc(struct net_device *netdev, u8 tc)
+static int xgbe_setup_tc(struct net_device *netdev, u32 handle, u8 tc)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 	unsigned int offset, queue;
 	u8 i;
 
+	if (handle != TC_H_ROOT)
+		return -EINVAL;
+
 	if (tc && (tc != pdata->hw_feat.tc_cnt))
 		return -EINVAL;
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 9e42bcaf9917..b262cba34dfa 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -4272,6 +4272,13 @@ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc)
 	return 0;
 }
 
+int __bnx2x_setup_tc(struct net_device *dev, u32 handle, u8 num_tc)
+{
+	if (handle != TC_H_ROOT)
+		return -EINVAL;
+	return bnx2x_setup_tc(dev, num_tc);
+}
+
 /* called with rtnl_lock */
 int bnx2x_change_mac_addr(struct net_device *dev, void *p)
 {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 37369865ca6d..60a4109dcdeb 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -486,6 +486,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev);
 
 /* setup_tc callback */
 int bnx2x_setup_tc(struct net_device *dev, u8 num_tc);
+int __bnx2x_setup_tc(struct net_device *dev, u32 handle, u8 num_tc);
 
 int bnx2x_get_vf_config(struct net_device *dev, int vf,
 			struct ifla_vf_info *ivi);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index c5845252c920..81fc51c4ec2b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13061,7 +13061,7 @@ static const struct net_device_ops bnx2x_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= poll_bnx2x,
 #endif
-	.ndo_setup_tc		= bnx2x_setup_tc,
+	.ndo_setup_tc		= __bnx2x_setup_tc,
 #ifdef CONFIG_BNX2X_SRIOV
 	.ndo_set_vf_mac		= bnx2x_set_vf_mac,
 	.ndo_set_vf_vlan	= bnx2x_set_vf_vlan,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 5dc89e527e7d..ff08faf44ee5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5370,10 +5370,13 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static int bnxt_setup_tc(struct net_device *dev, u8 tc)
+static int bnxt_setup_tc(struct net_device *dev, u32 handle, u8 tc)
 {
 	struct bnxt *bp = netdev_priv(dev);
 
+	if (handle != TC_H_ROOT)
+		return -EINVAL;
+
 	if (tc > bp->max_tc) {
 		netdev_err(dev, "too many traffic classes requested: %d Max supported is %d\n",
 			   tc, bp->max_tc);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 662569d5b7c0..12701a492325 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1204,6 +1204,14 @@ err_queueing_scheme:
 	return err;
 }
 
+static int __fm10k_setup_tc(struct net_device *dev, u32 handle, u8 tc)
+{
+	if (handle != TC_H_ROOT)
+		return -EINVAL;
+
+	return fm10k_setup_tc(dev, tc);
+}
+
 static int fm10k_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 {
 	switch (cmd) {
@@ -1386,7 +1394,7 @@ static const struct net_device_ops fm10k_netdev_ops = {
 	.ndo_vlan_rx_kill_vid	= fm10k_vlan_rx_kill_vid,
 	.ndo_set_rx_mode	= fm10k_set_rx_mode,
 	.ndo_get_stats64	= fm10k_get_stats64,
-	.ndo_setup_tc		= fm10k_setup_tc,
+	.ndo_setup_tc		= __fm10k_setup_tc,
 	.ndo_set_vf_mac		= fm10k_ndo_set_vf_mac,
 	.ndo_set_vf_vlan	= fm10k_ndo_set_vf_vlan,
 	.ndo_set_vf_rate	= fm10k_ndo_set_vf_bw,
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 53ed3bdd8363..ef9ca075d5e5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -788,7 +788,7 @@ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr,
 				      bool is_vf, bool is_netdev);
 #ifdef I40E_FCOE
 int i40e_close(struct net_device *netdev);
-int i40e_setup_tc(struct net_device *netdev, u8 tc);
+int __i40e_setup_tc(struct net_device *netdev, u32 handle, u8 tc);
 void i40e_netpoll(struct net_device *netdev);
 int i40e_fcoe_enable(struct net_device *netdev);
 int i40e_fcoe_disable(struct net_device *netdev);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
index 579a46ca82df..7c66ce416ec7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
@@ -1457,7 +1457,7 @@ static const struct net_device_ops i40e_fcoe_netdev_ops = {
 	.ndo_tx_timeout		= i40e_tx_timeout,
 	.ndo_vlan_rx_add_vid	= i40e_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= i40e_vlan_rx_kill_vid,
-	.ndo_setup_tc		= i40e_setup_tc,
+	.ndo_setup_tc		= __i40e_setup_tc,
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= i40e_netpoll,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 320b0491abd9..abcb6c152186 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -5253,11 +5253,7 @@ void i40e_down(struct i40e_vsi *vsi)
  * @netdev: net device to configure
  * @tc: number of traffic classes to enable
  **/
-#ifdef I40E_FCOE
-int i40e_setup_tc(struct net_device *netdev, u8 tc)
-#else
 static int i40e_setup_tc(struct net_device *netdev, u8 tc)
-#endif
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
@@ -5310,6 +5306,17 @@ exit:
 	return ret;
 }
 
+#ifdef I40E_FCOE
+int __i40e_setup_tc(struct net_device *netdev, u32 handle, u8 tc)
+#else
+static int __i40e_setup_tc(struct net_device *netdev, u32 handle, u8 tc)
+#endif
+{
+	if (handle != TC_H_ROOT)
+		return -EINVAL;
+	return i40e_setup_tc(netdev, tc);
+}
+
 /**
  * i40e_open - Called when a network interface is made active
  * @netdev: network interface device structure
@@ -8951,7 +8958,7 @@ static const struct net_device_ops i40e_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= i40e_netpoll,
 #endif
-	.ndo_setup_tc		= i40e_setup_tc,
+	.ndo_setup_tc		= __i40e_setup_tc,
 #ifdef I40E_FCOE
 	.ndo_fcoe_enable	= i40e_fcoe_enable,
 	.ndo_fcoe_disable	= i40e_fcoe_disable,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 0c701b8438b6..1ba714efd78c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8200,6 +8200,15 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
 	return 0;
 }
 
+int __ixgbe_setup_tc(struct net_device *dev, u32 handle, u8 tc)
+{
+	/* Only support egress tc setup for now */
+	if (handle != TC_H_ROOT)
+		return -EINVAL;
+
+	return ixgbe_setup_tc(dev, tc);
+}
+
 #ifdef CONFIG_PCI_IOV
 void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter)
 {
@@ -8658,7 +8667,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_get_vf_config	= ixgbe_ndo_get_vf_config,
 	.ndo_get_stats64	= ixgbe_get_stats64,
 #ifdef CONFIG_IXGBE_DCB
-	.ndo_setup_tc		= ixgbe_setup_tc,
+	.ndo_setup_tc		= __ixgbe_setup_tc,
 #endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= ixgbe_netpoll,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 0c7e3f69a73b..d5c6c16b9457 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -69,6 +69,14 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up)
 	return 0;
 }
 
+static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, u8 up)
+{
+	if (handle != TC_H_ROOT)
+		return -EINVAL;
+
+	return mlx4_en_setup_tc(dev, up);
+}
+
 #ifdef CONFIG_RFS_ACCEL
 
 struct mlx4_en_filter {
@@ -2466,7 +2474,7 @@ static const struct net_device_ops mlx4_netdev_ops = {
 #endif
 	.ndo_set_features	= mlx4_en_set_features,
 	.ndo_fix_features	= mlx4_en_fix_features,
-	.ndo_setup_tc		= mlx4_en_setup_tc,
+	.ndo_setup_tc		= __mlx4_en_setup_tc,
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer	= mlx4_en_filter_rfs,
 #endif
@@ -2504,7 +2512,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = {
 #endif
 	.ndo_set_features	= mlx4_en_set_features,
 	.ndo_fix_features	= mlx4_en_fix_features,
-	.ndo_setup_tc		= mlx4_en_setup_tc,
+	.ndo_setup_tc		= __mlx4_en_setup_tc,
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer	= mlx4_en_filter_rfs,
 #endif
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 10827476bc0b..7815fa09b15d 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -32,7 +32,7 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
 				struct net_device *net_dev);
 netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
 void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
-int efx_setup_tc(struct net_device *net_dev, u8 num_tc);
+int efx_setup_tc(struct net_device *net_dev, u32 handle, u8 num_tc);
 unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
 extern unsigned int efx_piobuf_size;
 extern bool efx_separate_tx_channels;
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index f7a0ec1bca97..8f1d53e2aca7 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -562,7 +562,7 @@ void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue)
 				     efx->n_tx_channels : 0));
 }
 
-int efx_setup_tc(struct net_device *net_dev, u8 num_tc)
+int efx_setup_tc(struct net_device *net_dev, u32 handle, u8 num_tc)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 	struct efx_channel *channel;
@@ -570,6 +570,9 @@ int efx_setup_tc(struct net_device *net_dev, u8 num_tc)
 	unsigned tc;
 	int rc;
 
+	if (handle != TC_H_ROOT)
+		return -EINVAL;
+
 	if (efx_nic_rev(efx) < EFX_REV_FALCON_B0 || num_tc > EFX_MAX_TX_TC)
 		return -EINVAL;
 
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index c61d66d38634..40cde814608b 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1835,13 +1835,16 @@ static u16 netcp_select_queue(struct net_device *dev, struct sk_buff *skb,
 	return 0;
 }
 
-static int netcp_setup_tc(struct net_device *dev, u8 num_tc)
+static int netcp_setup_tc(struct net_device *dev, u32 handle, u8 num_tc)
 {
 	int i;
 
 	/* setup tc must be called under rtnl lock */
 	ASSERT_RTNL();
 
+	if (handle != TC_H_ROOT)
+		return -EINVAL;
+
 	/* Sanity-check the number of traffic classes requested */
 	if ((dev->real_num_tx_queues <= 1) ||
 	    (dev->real_num_tx_queues < num_tc))
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0499569c256d..48928b6f9cb6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -51,6 +51,7 @@
 #include <linux/neighbour.h>
 #include <uapi/linux/netdevice.h>
 #include <uapi/linux/if_bonding.h>
+#include <uapi/linux/pkt_cls.h>
 
 struct netpoll_info;
 struct device;
@@ -1150,7 +1151,7 @@ struct net_device_ops {
 	int			(*ndo_set_vf_rss_query_en)(
 						   struct net_device *dev,
 						   int vf, bool setting);
-	int			(*ndo_setup_tc)(struct net_device *dev, u8 tc);
+	int			(*ndo_setup_tc)(struct net_device *dev, u32 handle, u8 tc);
 #if IS_ENABLED(CONFIG_FCOE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index ad70ecf57ce7..f5a0e8a4dbd7 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -39,7 +39,7 @@ static void mqprio_destroy(struct Qdisc *sch)
 	}
 
 	if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
-		dev->netdev_ops->ndo_setup_tc(dev, 0);
+		dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0);
 	else
 		netdev_set_num_tc(dev, 0);
 }
@@ -141,7 +141,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 	 */
 	if (qopt->hw) {
 		priv->hw_owned = 1;
-		err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
+		err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle,
+						    qopt->num_tc);
 		if (err)
 			goto err;
 	} else {
-- 
cgit v1.2.3


From 16e5cc647173a97e33b3e3ba81f73eb455561794 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 16 Feb 2016 21:16:43 -0800
Subject: net: rework setup_tc ndo op to consume general tc operand

This patch updates setup_tc so we can pass additional parameters into
the ndo op in a generic way. To do this we provide structured union
and type flag.

This lets each classifier and qdisc provide its own set of attributes
without having to add new ndo ops or grow the signature of the
callback.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c        |  9 ++++++---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |  7 ++++---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h |  3 ++-
 drivers/net/ethernet/broadcom/bnxt/bnxt.c       |  8 ++++++--
 drivers/net/ethernet/intel/fm10k/fm10k_netdev.c |  7 ++++---
 drivers/net/ethernet/intel/i40e/i40e.h          |  3 ++-
 drivers/net/ethernet/intel/i40e/i40e_main.c     | 10 ++++++----
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c   |  7 ++++---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c  |  7 ++++---
 drivers/net/ethernet/sfc/efx.h                  |  3 ++-
 drivers/net/ethernet/sfc/tx.c                   |  9 ++++++---
 drivers/net/ethernet/ti/netcp_core.c            | 13 +++++++------
 include/linux/netdevice.h                       | 20 +++++++++++++++++++-
 net/sched/sch_mqprio.c                          |  9 ++++++---
 14 files changed, 78 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 9955cae3cabc..cfd3f7efda1c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1626,15 +1626,18 @@ static void xgbe_poll_controller(struct net_device *netdev)
 }
 #endif /* End CONFIG_NET_POLL_CONTROLLER */
 
-static int xgbe_setup_tc(struct net_device *netdev, u32 handle, u8 tc)
+static int xgbe_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+			 struct tc_to_netdev *tc_to_netdev)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 	unsigned int offset, queue;
-	u8 i;
+	u8 i, tc;
 
-	if (handle != TC_H_ROOT)
+	if (handle != TC_H_ROOT || tc_to_netdev->type != TC_SETUP_MQPRIO)
 		return -EINVAL;
 
+	tc = tc_to_netdev->tc;
+
 	if (tc && (tc != pdata->hw_feat.tc_cnt))
 		return -EINVAL;
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index b262cba34dfa..45843d150868 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -4272,11 +4272,12 @@ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc)
 	return 0;
 }
 
-int __bnx2x_setup_tc(struct net_device *dev, u32 handle, u8 num_tc)
+int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+		     struct tc_to_netdev *tc)
 {
-	if (handle != TC_H_ROOT)
+	if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
 		return -EINVAL;
-	return bnx2x_setup_tc(dev, num_tc);
+	return bnx2x_setup_tc(dev, tc->tc);
 }
 
 /* called with rtnl_lock */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 60a4109dcdeb..0e68fadecfdb 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -486,7 +486,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev);
 
 /* setup_tc callback */
 int bnx2x_setup_tc(struct net_device *dev, u8 num_tc);
-int __bnx2x_setup_tc(struct net_device *dev, u32 handle, u8 num_tc);
+int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+		     struct tc_to_netdev *tc);
 
 int bnx2x_get_vf_config(struct net_device *dev, int vf,
 			struct ifla_vf_info *ivi);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index ff08faf44ee5..169920aa39f3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5370,13 +5370,17 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static int bnxt_setup_tc(struct net_device *dev, u32 handle, u8 tc)
+static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+			 struct tc_to_netdev *ntc)
 {
 	struct bnxt *bp = netdev_priv(dev);
+	u8 tc;
 
-	if (handle != TC_H_ROOT)
+	if (handle != TC_H_ROOT || ntc->type != TC_SETUP_MQPRIO)
 		return -EINVAL;
 
+	tc = ntc->tc;
+
 	if (tc > bp->max_tc) {
 		netdev_err(dev, "too many traffic classes requested: %d Max supported is %d\n",
 			   tc, bp->max_tc);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 12701a492325..dc1a82148ff0 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1204,12 +1204,13 @@ err_queueing_scheme:
 	return err;
 }
 
-static int __fm10k_setup_tc(struct net_device *dev, u32 handle, u8 tc)
+static int __fm10k_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+			    struct tc_to_netdev *tc)
 {
-	if (handle != TC_H_ROOT)
+	if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
 		return -EINVAL;
 
-	return fm10k_setup_tc(dev, tc);
+	return fm10k_setup_tc(dev, tc->tc);
 }
 
 static int fm10k_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index ef9ca075d5e5..933c4b3d92c8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -788,7 +788,8 @@ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr,
 				      bool is_vf, bool is_netdev);
 #ifdef I40E_FCOE
 int i40e_close(struct net_device *netdev);
-int __i40e_setup_tc(struct net_device *netdev, u32 handle, u8 tc);
+int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+		    struct tc_to_netdev *tc);
 void i40e_netpoll(struct net_device *netdev);
 int i40e_fcoe_enable(struct net_device *netdev);
 int i40e_fcoe_disable(struct net_device *netdev);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index abcb6c152186..257d16207976 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -5307,14 +5307,16 @@ exit:
 }
 
 #ifdef I40E_FCOE
-int __i40e_setup_tc(struct net_device *netdev, u32 handle, u8 tc)
+int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+		    struct tc_to_netdev *tc)
 #else
-static int __i40e_setup_tc(struct net_device *netdev, u32 handle, u8 tc)
+static int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+			   struct tc_to_netdev *tc)
 #endif
 {
-	if (handle != TC_H_ROOT)
+	if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
 		return -EINVAL;
-	return i40e_setup_tc(netdev, tc);
+	return i40e_setup_tc(netdev, tc->tc);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 1ba714efd78c..dca2298f4c36 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8200,13 +8200,14 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
 	return 0;
 }
 
-int __ixgbe_setup_tc(struct net_device *dev, u32 handle, u8 tc)
+int __ixgbe_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+		     struct tc_to_netdev *tc)
 {
 	/* Only support egress tc setup for now */
-	if (handle != TC_H_ROOT)
+	if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
 		return -EINVAL;
 
-	return ixgbe_setup_tc(dev, tc);
+	return ixgbe_setup_tc(dev, tc->tc);
 }
 
 #ifdef CONFIG_PCI_IOV
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index d5c6c16b9457..01d6a9695586 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -69,12 +69,13 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up)
 	return 0;
 }
 
-static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, u8 up)
+static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+			      struct tc_to_netdev *tc)
 {
-	if (handle != TC_H_ROOT)
+	if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
 		return -EINVAL;
 
-	return mlx4_en_setup_tc(dev, up);
+	return mlx4_en_setup_tc(dev, tc->tc);
 }
 
 #ifdef CONFIG_RFS_ACCEL
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 7815fa09b15d..5e3f93f04e62 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -32,7 +32,8 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
 				struct net_device *net_dev);
 netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
 void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
-int efx_setup_tc(struct net_device *net_dev, u32 handle, u8 num_tc);
+int efx_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
+		 struct tc_to_netdev *tc);
 unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
 extern unsigned int efx_piobuf_size;
 extern bool efx_separate_tx_channels;
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 8f1d53e2aca7..2cdb5718ed66 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -562,17 +562,20 @@ void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue)
 				     efx->n_tx_channels : 0));
 }
 
-int efx_setup_tc(struct net_device *net_dev, u32 handle, u8 num_tc)
+int efx_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
+		 struct tc_to_netdev *ntc)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 	struct efx_channel *channel;
 	struct efx_tx_queue *tx_queue;
-	unsigned tc;
+	unsigned tc, num_tc;
 	int rc;
 
-	if (handle != TC_H_ROOT)
+	if (handle != TC_H_ROOT || ntc->type != TC_SETUP_MQPRIO)
 		return -EINVAL;
 
+	num_tc = ntc->tc;
+
 	if (efx_nic_rev(efx) < EFX_REV_FALCON_B0 || num_tc > EFX_MAX_TX_TC)
 		return -EINVAL;
 
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 40cde814608b..8586a2034019 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1835,25 +1835,26 @@ static u16 netcp_select_queue(struct net_device *dev, struct sk_buff *skb,
 	return 0;
 }
 
-static int netcp_setup_tc(struct net_device *dev, u32 handle, u8 num_tc)
+static int netcp_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+			  struct tc_to_netdev tc)
 {
 	int i;
 
 	/* setup tc must be called under rtnl lock */
 	ASSERT_RTNL();
 
-	if (handle != TC_H_ROOT)
+	if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
 		return -EINVAL;
 
 	/* Sanity-check the number of traffic classes requested */
 	if ((dev->real_num_tx_queues <= 1) ||
-	    (dev->real_num_tx_queues < num_tc))
+	    (dev->real_num_tx_queues < tc->tc))
 		return -EINVAL;
 
 	/* Configure traffic class to queue mappings */
-	if (num_tc) {
-		netdev_set_num_tc(dev, num_tc);
-		for (i = 0; i < num_tc; i++)
+	if (tc->tc) {
+		netdev_set_num_tc(dev, tc->tc);
+		for (i = 0; i < tc->tc; i++)
 			netdev_set_tc_queue(dev, i, 1, i);
 	} else {
 		netdev_reset_tc(dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 48928b6f9cb6..e396060f815f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -779,6 +779,21 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
 				       struct sk_buff *skb);
 
+/* This structure holds attributes of qdisc and classifiers
+ * that are being passed to the netdevice through the setup_tc op.
+ */
+enum {
+	TC_SETUP_MQPRIO,
+};
+
+struct tc_to_netdev {
+	unsigned int type;
+	union {
+		u8 tc;
+	};
+};
+
+
 /*
  * This structure defines the management hooks for network devices.
  * The following hooks can be defined; unless noted otherwise, they are
@@ -1151,7 +1166,10 @@ struct net_device_ops {
 	int			(*ndo_set_vf_rss_query_en)(
 						   struct net_device *dev,
 						   int vf, bool setting);
-	int			(*ndo_setup_tc)(struct net_device *dev, u32 handle, u8 tc);
+	int			(*ndo_setup_tc)(struct net_device *dev,
+						u32 handle,
+						__be16 protocol,
+						struct tc_to_netdev *tc);
 #if IS_ENABLED(CONFIG_FCOE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index f5a0e8a4dbd7..f9947d1f4952 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -28,6 +28,7 @@ static void mqprio_destroy(struct Qdisc *sch)
 {
 	struct net_device *dev = qdisc_dev(sch);
 	struct mqprio_sched *priv = qdisc_priv(sch);
+	struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO};
 	unsigned int ntx;
 
 	if (priv->qdiscs) {
@@ -39,7 +40,7 @@ static void mqprio_destroy(struct Qdisc *sch)
 	}
 
 	if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
-		dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0);
+		dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
 	else
 		netdev_set_num_tc(dev, 0);
 }
@@ -140,9 +141,11 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 	 * supplied and verified mapping
 	 */
 	if (qopt->hw) {
+		struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO,
+					  .tc = qopt->num_tc};
+
 		priv->hw_owned = 1;
-		err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle,
-						    qopt->num_tc);
+		err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
 		if (err)
 			goto err;
 	} else {
-- 
cgit v1.2.3


From a1b7c5fd7fe98f51fbbc393ee1fc4c1cdb2f0119 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 16 Feb 2016 21:17:09 -0800
Subject: net: sched: add cls_u32 offload hooks for netdevs

This patch allows netdev drivers to consume cls_u32 offloads via
the ndo_setup_tc ndo op.

This works aligns with how network drivers have been doing qdisc
offloads for mqprio.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  6 ++-
 include/net/pkt_cls.h     | 34 ++++++++++++++++
 net/sched/cls_u32.c       | 99 ++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 136 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e396060f815f..47671ce04ac4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -779,17 +779,21 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
 				       struct sk_buff *skb);
 
-/* This structure holds attributes of qdisc and classifiers
+/* These structures hold the attributes of qdisc and classifiers
  * that are being passed to the netdevice through the setup_tc op.
  */
 enum {
 	TC_SETUP_MQPRIO,
+	TC_SETUP_CLSU32,
 };
 
+struct tc_cls_u32_offload;
+
 struct tc_to_netdev {
 	unsigned int type;
 	union {
 		u8 tc;
+		struct tc_cls_u32_offload *cls_u32;
 	};
 };
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index bc49967e1a68..59789ca6e2c8 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -358,4 +358,38 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
 }
 #endif /* CONFIG_NET_CLS_IND */
 
+struct tc_cls_u32_knode {
+	struct tcf_exts *exts;
+	u8 fshift;
+	u32 handle;
+	u32 val;
+	u32 mask;
+	u32 link_handle;
+	struct tc_u32_sel *sel;
+};
+
+struct tc_cls_u32_hnode {
+	u32 handle;
+	u32 prio;
+	unsigned int divisor;
+};
+
+enum tc_clsu32_command {
+	TC_CLSU32_NEW_KNODE,
+	TC_CLSU32_REPLACE_KNODE,
+	TC_CLSU32_DELETE_KNODE,
+	TC_CLSU32_NEW_HNODE,
+	TC_CLSU32_REPLACE_HNODE,
+	TC_CLSU32_DELETE_HNODE,
+};
+
+struct tc_cls_u32_offload {
+	/* knode values */
+	enum tc_clsu32_command command;
+	union {
+		struct tc_cls_u32_knode knode;
+		struct tc_cls_u32_hnode hnode;
+	};
+};
+
 #endif
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4fbb67430ce4..d54bc942ea87 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -43,6 +43,7 @@
 #include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
+#include <linux/netdevice.h>
 
 struct tc_u_knode {
 	struct tc_u_knode __rcu	*next;
@@ -424,6 +425,93 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
 	return 0;
 }
 
+static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct tc_cls_u32_offload u32_offload = {0};
+	struct tc_to_netdev offload;
+
+	offload.type = TC_SETUP_CLSU32;
+	offload.cls_u32 = &u32_offload;
+
+	if (dev->netdev_ops->ndo_setup_tc) {
+		offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
+		offload.cls_u32->knode.handle = handle;
+		dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+					      tp->protocol, &offload);
+	}
+}
+
+static void u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct tc_cls_u32_offload u32_offload = {0};
+	struct tc_to_netdev offload;
+
+	offload.type = TC_SETUP_CLSU32;
+	offload.cls_u32 = &u32_offload;
+
+	if (dev->netdev_ops->ndo_setup_tc) {
+		offload.cls_u32->command = TC_CLSU32_NEW_HNODE;
+		offload.cls_u32->hnode.divisor = h->divisor;
+		offload.cls_u32->hnode.handle = h->handle;
+		offload.cls_u32->hnode.prio = h->prio;
+
+		dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+					      tp->protocol, &offload);
+	}
+}
+
+static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct tc_cls_u32_offload u32_offload = {0};
+	struct tc_to_netdev offload;
+
+	offload.type = TC_SETUP_CLSU32;
+	offload.cls_u32 = &u32_offload;
+
+	if (dev->netdev_ops->ndo_setup_tc) {
+		offload.cls_u32->command = TC_CLSU32_DELETE_HNODE;
+		offload.cls_u32->hnode.divisor = h->divisor;
+		offload.cls_u32->hnode.handle = h->handle;
+		offload.cls_u32->hnode.prio = h->prio;
+
+		dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+					      tp->protocol, &offload);
+	}
+}
+
+static void u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct tc_cls_u32_offload u32_offload = {0};
+	struct tc_to_netdev offload;
+
+	offload.type = TC_SETUP_CLSU32;
+	offload.cls_u32 = &u32_offload;
+
+	if (dev->netdev_ops->ndo_setup_tc) {
+		offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE;
+		offload.cls_u32->knode.handle = n->handle;
+		offload.cls_u32->knode.fshift = n->fshift;
+#ifdef CONFIG_CLS_U32_MARK
+		offload.cls_u32->knode.val = n->val;
+		offload.cls_u32->knode.mask = n->mask;
+#else
+		offload.cls_u32->knode.val = 0;
+		offload.cls_u32->knode.mask = 0;
+#endif
+		offload.cls_u32->knode.sel = &n->sel;
+		offload.cls_u32->knode.exts = &n->exts;
+		if (n->ht_down)
+			offload.cls_u32->knode.link_handle = n->ht_down->handle;
+
+		dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+					      tp->protocol, &offload);
+	}
+}
+
 static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 {
 	struct tc_u_knode *n;
@@ -434,6 +522,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 			RCU_INIT_POINTER(ht->ht[h],
 					 rtnl_dereference(n->next));
 			tcf_unbind_filter(tp, &n->res);
+			u32_remove_hw_knode(tp, n->handle);
 			call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
 		}
 	}
@@ -454,6 +543,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 	     phn;
 	     hn = &phn->next, phn = rtnl_dereference(*hn)) {
 		if (phn == ht) {
+			u32_clear_hw_hnode(tp, ht);
 			RCU_INIT_POINTER(*hn, ht->next);
 			kfree_rcu(ht, rcu);
 			return 0;
@@ -540,8 +630,10 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
 	if (ht == NULL)
 		return 0;
 
-	if (TC_U32_KEY(ht->handle))
+	if (TC_U32_KEY(ht->handle)) {
+		u32_remove_hw_knode(tp, ht->handle);
 		return u32_delete_key(tp, (struct tc_u_knode *)ht);
+	}
 
 	if (root_ht == ht)
 		return -EINVAL;
@@ -769,6 +861,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 		u32_replace_knode(tp, tp_c, new);
 		tcf_unbind_filter(tp, &n->res);
 		call_rcu(&n->rcu, u32_delete_key_rcu);
+		u32_replace_hw_knode(tp, new);
 		return 0;
 	}
 
@@ -795,6 +888,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 		RCU_INIT_POINTER(ht->next, tp_c->hlist);
 		rcu_assign_pointer(tp_c->hlist, ht);
 		*arg = (unsigned long)ht;
+
+		u32_replace_hw_hnode(tp, ht);
 		return 0;
 	}
 
@@ -877,7 +972,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 
 		RCU_INIT_POINTER(n->next, pins);
 		rcu_assign_pointer(*ins, n);
-
+		u32_replace_hw_knode(tp, n);
 		*arg = (unsigned long)n;
 		return 0;
 	}
-- 
cgit v1.2.3


From 1c78c64e9c6f43a490427d55cd2d213b7c6795c1 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 16 Feb 2016 21:17:37 -0800
Subject: net: add tc offload feature flag

Its useful to turn off the qdisc offload feature at a per device
level. This gives us a big hammer to enable/disable offloading.
More fine grained control (i.e. per rule) may be supported later.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 3 +++
 net/core/ethtool.c              | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index d9654f0eecb3..a734bf43d190 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -67,6 +67,8 @@ enum {
 	NETIF_F_HW_L2FW_DOFFLOAD_BIT,	/* Allow L2 Forwarding in Hardware */
 	NETIF_F_BUSY_POLL_BIT,		/* Busy poll */
 
+	NETIF_F_HW_TC_BIT,		/* Offload TC infrastructure */
+
 	/*
 	 * Add your fresh new feature above and remember to update
 	 * netdev_features_strings[] in net/core/ethtool.c and maybe
@@ -124,6 +126,7 @@ enum {
 #define NETIF_F_HW_VLAN_STAG_TX	__NETIF_F(HW_VLAN_STAG_TX)
 #define NETIF_F_HW_L2FW_DOFFLOAD	__NETIF_F(HW_L2FW_DOFFLOAD)
 #define NETIF_F_BUSY_POLL	__NETIF_F(BUSY_POLL)
+#define NETIF_F_HW_TC		__NETIF_F(HW_TC)
 
 #define for_each_netdev_feature(mask_addr, bit)	\
 	for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 65f907aea777..c2d3118b1395 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -98,6 +98,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_RXALL_BIT] =            "rx-all",
 	[NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
 	[NETIF_F_BUSY_POLL_BIT] =        "busy-poll",
+	[NETIF_F_HW_TC_BIT] =		 "hw-tc-offload",
 };
 
 static const char
-- 
cgit v1.2.3


From af719c18075f76f83e363bd13a97937b2e9f3f7d Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Wed, 30 Sep 2015 13:02:01 +0200
Subject: clk: at91: pmc: drop at91_pmc_base

at91_pmc_base is not used anymore, remove it along with at91_pmc_read and
at91_pmc_write.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/at91/pmc.c       |  7 -------
 include/linux/clk/at91_pmc.h | 12 ------------
 2 files changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c
index 361ea0c1d3c9..dce285549de2 100644
--- a/drivers/clk/at91/pmc.c
+++ b/drivers/clk/at91/pmc.c
@@ -12,7 +12,6 @@
 #include <linux/clkdev.h>
 #include <linux/clk/at91_pmc.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 
@@ -29,9 +28,6 @@ struct at91_pmc {
 	const struct at91_pmc_caps *caps;
 };
 
-void __iomem *at91_pmc_base;
-EXPORT_SYMBOL_GPL(at91_pmc_base);
-
 int of_at91_get_clk_range(struct device_node *np, const char *propname,
 			  struct clk_range *range)
 {
@@ -108,11 +104,8 @@ static void __init of_at91_pmc_setup(struct device_node *np,
 				     const struct at91_pmc_caps *caps)
 {
 	struct at91_pmc *pmc;
-	void __iomem *regbase = of_iomap(np, 0);
 	struct regmap *regmap;
 
-	at91_pmc_base = regbase;
-
 	regmap = syscon_node_to_regmap(np);
 	if (IS_ERR(regmap))
 		panic("Could not retrieve syscon regmap");
diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index 1e6932222e11..17f413bbbedf 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -16,18 +16,6 @@
 #ifndef AT91_PMC_H
 #define AT91_PMC_H
 
-#ifndef __ASSEMBLY__
-extern void __iomem *at91_pmc_base;
-
-#define at91_pmc_read(field) \
-	readl_relaxed(at91_pmc_base + field)
-
-#define at91_pmc_write(field, value) \
-	writel_relaxed(value, at91_pmc_base + field)
-#else
-.extern at91_pmc_base
-#endif
-
 #define	AT91_PMC_SCER		0x00			/* System Clock Enable Register */
 #define	AT91_PMC_SCDR		0x04			/* System Clock Disable Register */
 
-- 
cgit v1.2.3


From fc48b7a6148af974b49db145812a8b060324a503 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Mon, 15 Feb 2016 13:22:35 -0500
Subject: qed/qede: use 8.7.3.0 FW.

This patch moves the qed* driver into utilizing the 8.7.3.0 FW.
This new FW is required for a lot of new SW features, including:
  - Vlan filtering offload
  - Encapsulation offload support
  - HW ingress aggregations
As well as paving the way for the possibility of adding storage protocols
in the future.

V2:
 - Fix kbuild test robot error/warnings.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@qlogic.com>
Signed-off-by: Manish Chopra <manish.chopra@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h              |   43 +-
 drivers/net/ethernet/qlogic/qed/qed_cxt.c          |    3 +-
 drivers/net/ethernet/qlogic/qed/qed_dev.c          |   88 +-
 drivers/net/ethernet/qlogic/qed/qed_hsi.h          | 2690 +++++++++-----------
 .../net/ethernet/qlogic/qed/qed_init_fw_funcs.c    |   22 +-
 drivers/net/ethernet/qlogic/qed/qed_init_ops.c     |  155 +-
 drivers/net/ethernet/qlogic/qed/qed_l2.c           |   13 +-
 drivers/net/ethernet/qlogic/qed/qed_main.c         |    2 +-
 drivers/net/ethernet/qlogic/qed/qed_mcp.c          |   37 +-
 drivers/net/ethernet/qlogic/qed/qed_sp.h           |    2 +-
 drivers/net/ethernet/qlogic/qed/qed_sp_commands.c  |   17 +-
 drivers/net/ethernet/qlogic/qede/qede.h            |    8 +-
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c    |    6 +-
 drivers/net/ethernet/qlogic/qede/qede_main.c       |  264 +-
 include/linux/qed/common_hsi.h                     |   36 +-
 include/linux/qed/eth_common.h                     |  171 +-
 include/linux/qed/qed_if.h                         |    8 +-
 17 files changed, 1795 insertions(+), 1770 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 1292c360390c..d34da638b5d5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -70,8 +70,8 @@ struct qed_sb_sp_info;
 struct qed_mcp_info;
 
 struct qed_rt_data {
-	u32 init_val;
-	bool b_valid;
+	u32	*init_val;
+	bool	*b_valid;
 };
 
 /* The PCI personality is not quite synonymous to protocol ID:
@@ -120,6 +120,10 @@ enum QED_PORT_MODE {
 	QED_PORT_MODE_DE_1X25G
 };
 
+enum qed_dev_cap {
+	QED_DEV_CAP_ETH,
+};
+
 struct qed_hw_info {
 	/* PCI personality */
 	enum qed_pci_personality	personality;
@@ -151,6 +155,7 @@ struct qed_hw_info {
 
 	u32				port_mode;
 	u32				hw_mode;
+	unsigned long		device_capabilities;
 };
 
 struct qed_hw_cid_data {
@@ -267,7 +272,7 @@ struct qed_hwfn {
 	struct qed_hw_info		hw_info;
 
 	/* rt_array (for init-tool) */
-	struct qed_rt_data		*rt_data;
+	struct qed_rt_data		rt_data;
 
 	/* SPQ */
 	struct qed_spq			*p_spq;
@@ -350,9 +355,20 @@ struct qed_dev {
 	char	name[NAME_SIZE];
 
 	u8	type;
-#define QED_DEV_TYPE_BB_A0      (0 << 0)
-#define QED_DEV_TYPE_MASK       (0x3)
-#define QED_DEV_TYPE_SHIFT      (0)
+#define QED_DEV_TYPE_BB (0 << 0)
+#define QED_DEV_TYPE_AH BIT(0)
+/* Translate type/revision combo into the proper conditions */
+#define QED_IS_BB(dev)  ((dev)->type == QED_DEV_TYPE_BB)
+#define QED_IS_BB_A0(dev)       (QED_IS_BB(dev) && \
+				 CHIP_REV_IS_A0(dev))
+#define QED_IS_BB_B0(dev)       (QED_IS_BB(dev) && \
+				 CHIP_REV_IS_B0(dev))
+
+#define QED_GET_TYPE(dev)       (QED_IS_BB_A0(dev) ? CHIP_BB_A0 : \
+				 QED_IS_BB_B0(dev) ? CHIP_BB_B0 : CHIP_K2)
+
+	u16	vendor_id;
+	u16	device_id;
 
 	u16	chip_num;
 #define CHIP_NUM_MASK                   0xffff
@@ -361,6 +377,8 @@ struct qed_dev {
 	u16	chip_rev;
 #define CHIP_REV_MASK                   0xf
 #define CHIP_REV_SHIFT                  12
+#define CHIP_REV_IS_A0(_cdev)   (!(_cdev)->chip_rev)
+#define CHIP_REV_IS_B0(_cdev)   ((_cdev)->chip_rev == 1)
 
 	u16				chip_metal;
 #define CHIP_METAL_MASK                 0xff
@@ -375,10 +393,10 @@ struct qed_dev {
 	u8				num_funcs_in_port;
 
 	u8				path_id;
-	enum mf_mode			mf_mode;
-#define IS_MF(_p_hwfn)          (((_p_hwfn)->cdev)->mf_mode != SF)
-#define IS_MF_SI(_p_hwfn)       (((_p_hwfn)->cdev)->mf_mode == MF_NPAR)
-#define IS_MF_SD(_p_hwfn)       (((_p_hwfn)->cdev)->mf_mode == MF_OVLAN)
+	enum qed_mf_mode		mf_mode;
+#define IS_MF_DEFAULT(_p_hwfn)  (((_p_hwfn)->cdev)->mf_mode == QED_MF_DEFAULT)
+#define IS_MF_SI(_p_hwfn)       (((_p_hwfn)->cdev)->mf_mode == QED_MF_NPAR)
+#define IS_MF_SD(_p_hwfn)       (((_p_hwfn)->cdev)->mf_mode == QED_MF_OVLAN)
 
 	int				pcie_width;
 	int				pcie_speed;
@@ -441,11 +459,6 @@ struct qed_dev {
 	const struct firmware		*firmware;
 };
 
-#define QED_GET_TYPE(dev)       (((dev)->type & QED_DEV_TYPE_MASK) >> \
-				 QED_DEV_TYPE_SHIFT)
-#define QED_IS_BB_A0(dev)       (QED_GET_TYPE(dev) == QED_DEV_TYPE_BB_A0)
-#define QED_IS_BB(dev)  (QED_IS_BB_A0(dev))
-
 #define NUM_OF_SBS(dev)         MAX_SB_PER_PATH_BB
 #define NUM_OF_ENG_PFS(dev)     MAX_NUM_PFS_BB
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 7ccdb46c6764..d3f7a0215e7e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -581,7 +581,8 @@ void qed_qm_init_pf(struct qed_hwfn *p_hwfn)
 	params.num_pf_cids = iids.cids;
 	params.start_pq = qm_info->start_pq;
 	params.num_pf_pqs = qm_info->num_pqs;
-	params.start_vport = qm_info->num_vports;
+	params.start_vport = qm_info->start_vport;
+	params.num_vports = qm_info->num_vports;
 	params.pf_wfq = qm_info->pf_wfq;
 	params.pf_rl = qm_info->pf_rl;
 	params.pq_params = qm_info->qm_pq_params;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 817bbd5476ff..bc17ed2c9cac 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -341,11 +341,6 @@ void qed_resc_setup(struct qed_dev *cdev)
 	}
 }
 
-#define FINAL_CLEANUP_CMD_OFFSET        (0)
-#define FINAL_CLEANUP_CMD (0x1)
-#define FINAL_CLEANUP_VALID_OFFSET      (6)
-#define FINAL_CLEANUP_VFPF_ID_SHIFT     (7)
-#define FINAL_CLEANUP_COMP (0x2)
 #define FINAL_CLEANUP_POLL_CNT          (100)
 #define FINAL_CLEANUP_POLL_TIME         (10)
 int qed_final_cleanup(struct qed_hwfn *p_hwfn,
@@ -355,12 +350,14 @@ int qed_final_cleanup(struct qed_hwfn *p_hwfn,
 	u32 command = 0, addr, count = FINAL_CLEANUP_POLL_CNT;
 	int rc = -EBUSY;
 
-	addr = GTT_BAR0_MAP_REG_USDM_RAM + USTORM_FLR_FINAL_ACK_OFFSET;
+	addr = GTT_BAR0_MAP_REG_USDM_RAM +
+		USTORM_FLR_FINAL_ACK_OFFSET(p_hwfn->rel_pf_id);
 
-	command |= FINAL_CLEANUP_CMD << FINAL_CLEANUP_CMD_OFFSET;
-	command |= 1 << FINAL_CLEANUP_VALID_OFFSET;
-	command |= id << FINAL_CLEANUP_VFPF_ID_SHIFT;
-	command |= FINAL_CLEANUP_COMP << SDM_OP_GEN_COMP_TYPE_SHIFT;
+	command |= X_FINAL_CLEANUP_AGG_INT <<
+		SDM_AGG_INT_COMP_PARAMS_AGG_INT_INDEX_SHIFT;
+	command |= 1 << SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_ENABLE_SHIFT;
+	command |= id << SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_BIT_SHIFT;
+	command |= SDM_COMP_TYPE_AGG_INT << SDM_OP_GEN_COMP_TYPE_SHIFT;
 
 	/* Make sure notification is not set before initiating final cleanup */
 	if (REG_RD(p_hwfn, addr)) {
@@ -415,18 +412,16 @@ static void qed_calc_hw_mode(struct qed_hwfn *p_hwfn)
 	}
 
 	switch (p_hwfn->cdev->mf_mode) {
-	case SF:
-		hw_mode |= 1 << MODE_SF;
+	case QED_MF_DEFAULT:
+	case QED_MF_NPAR:
+		hw_mode |= 1 << MODE_MF_SI;
 		break;
-	case MF_OVLAN:
+	case QED_MF_OVLAN:
 		hw_mode |= 1 << MODE_MF_SD;
 		break;
-	case MF_NPAR:
-		hw_mode |= 1 << MODE_MF_SI;
-		break;
 	default:
-		DP_NOTICE(p_hwfn, "Unsupported MF mode, init as SF\n");
-		hw_mode |= 1 << MODE_SF;
+		DP_NOTICE(p_hwfn, "Unsupported MF mode, init as DEFAULT\n");
+		hw_mode |= 1 << MODE_MF_SI;
 	}
 
 	hw_mode |= 1 << MODE_ASIC;
@@ -1018,8 +1013,7 @@ static void qed_hw_get_resc(struct qed_hwfn *p_hwfn)
 	u32 *resc_num = p_hwfn->hw_info.resc_num;
 	int num_funcs, i;
 
-	num_funcs = IS_MF(p_hwfn) ? MAX_NUM_PFS_BB
-				  : p_hwfn->cdev->num_ports_in_engines;
+	num_funcs = MAX_NUM_PFS_BB;
 
 	resc_num[QED_SB] = min_t(u32,
 				 (MAX_SB_PER_PATH_BB / num_funcs),
@@ -1071,7 +1065,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
 			       struct qed_ptt *p_ptt)
 {
 	u32 nvm_cfg1_offset, mf_mode, addr, generic_cont0, core_cfg;
-	u32 port_cfg_addr, link_temp, val, nvm_cfg_addr;
+	u32 port_cfg_addr, link_temp, nvm_cfg_addr, device_capabilities;
 	struct qed_mcp_link_params *link;
 
 	/* Read global nvm_cfg address */
@@ -1134,21 +1128,6 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
 		break;
 	}
 
-	addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
-	       offsetof(struct nvm_cfg1, func[MCP_PF_ID(p_hwfn)]) +
-	       offsetof(struct nvm_cfg1_func, device_id);
-	val = qed_rd(p_hwfn, p_ptt, addr);
-
-	if (IS_MF(p_hwfn)) {
-		p_hwfn->hw_info.device_id =
-			(val & NVM_CFG1_FUNC_MF_VENDOR_DEVICE_ID_MASK) >>
-			NVM_CFG1_FUNC_MF_VENDOR_DEVICE_ID_OFFSET;
-	} else {
-		p_hwfn->hw_info.device_id =
-			(val & NVM_CFG1_FUNC_VENDOR_DEVICE_ID_MASK) >>
-			NVM_CFG1_FUNC_VENDOR_DEVICE_ID_OFFSET;
-	}
-
 	/* Read default link configuration */
 	link = &p_hwfn->mcp_info->link_input;
 	port_cfg_addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
@@ -1220,18 +1199,28 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
 
 	switch (mf_mode) {
 	case NVM_CFG1_GLOB_MF_MODE_MF_ALLOWED:
-		p_hwfn->cdev->mf_mode = MF_OVLAN;
+		p_hwfn->cdev->mf_mode = QED_MF_OVLAN;
 		break;
 	case NVM_CFG1_GLOB_MF_MODE_NPAR1_0:
-		p_hwfn->cdev->mf_mode = MF_NPAR;
+		p_hwfn->cdev->mf_mode = QED_MF_NPAR;
 		break;
-	case NVM_CFG1_GLOB_MF_MODE_FORCED_SF:
-		p_hwfn->cdev->mf_mode = SF;
+	case NVM_CFG1_GLOB_MF_MODE_DEFAULT:
+		p_hwfn->cdev->mf_mode = QED_MF_DEFAULT;
 		break;
 	}
 	DP_INFO(p_hwfn, "Multi function mode is %08x\n",
 		p_hwfn->cdev->mf_mode);
 
+	/* Read Multi-function information from shmem */
+	addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+		offsetof(struct nvm_cfg1, glob) +
+		offsetof(struct nvm_cfg1_glob, device_capabilities);
+
+	device_capabilities = qed_rd(p_hwfn, p_ptt, addr);
+	if (device_capabilities & NVM_CFG1_GLOB_DEVICE_CAPABILITIES_ETHERNET)
+		__set_bit(QED_DEV_CAP_ETH,
+			  &p_hwfn->hw_info.device_capabilities);
+
 	return qed_mcp_fill_shmem_func_info(p_hwfn, p_ptt);
 }
 
@@ -1293,29 +1282,36 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn,
 
 static void qed_get_dev_info(struct qed_dev *cdev)
 {
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
 	u32 tmp;
 
-	cdev->chip_num = (u16)qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt,
+	/* Read Vendor Id / Device Id */
+	pci_read_config_word(cdev->pdev, PCI_VENDOR_ID,
+			     &cdev->vendor_id);
+	pci_read_config_word(cdev->pdev, PCI_DEVICE_ID,
+			     &cdev->device_id);
+	cdev->chip_num = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt,
 				     MISCS_REG_CHIP_NUM);
-	cdev->chip_rev = (u16)qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt,
+	cdev->chip_rev = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt,
 				     MISCS_REG_CHIP_REV);
 	MASK_FIELD(CHIP_REV, cdev->chip_rev);
 
+	cdev->type = QED_DEV_TYPE_BB;
 	/* Learn number of HW-functions */
-	tmp = qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt,
+	tmp = qed_rd(p_hwfn, p_hwfn->p_main_ptt,
 		     MISCS_REG_CMT_ENABLED_FOR_PAIR);
 
-	if (tmp & (1 << cdev->hwfns[0].rel_pf_id)) {
+	if (tmp & (1 << p_hwfn->rel_pf_id)) {
 		DP_NOTICE(cdev->hwfns, "device in CMT mode\n");
 		cdev->num_hwfns = 2;
 	} else {
 		cdev->num_hwfns = 1;
 	}
 
-	cdev->chip_bond_id = qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt,
+	cdev->chip_bond_id = qed_rd(p_hwfn, p_hwfn->p_main_ptt,
 				    MISCS_REG_CHIP_TEST_REG) >> 4;
 	MASK_FIELD(CHIP_BOND_ID, cdev->chip_bond_id);
-	cdev->chip_metal = (u16)qed_rd(cdev->hwfns, cdev->hwfns[0].p_main_ptt,
+	cdev->chip_metal = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt,
 				       MISCS_REG_CHIP_METAL);
 	MASK_FIELD(CHIP_METAL, cdev->chip_metal);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 264e954675d1..49bbf696a16d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -34,6 +34,8 @@ enum common_event_opcode {
 	COMMON_EVENT_RESERVED3,
 	COMMON_EVENT_RESERVED4,
 	COMMON_EVENT_RESERVED5,
+	COMMON_EVENT_RESERVED6,
+	COMMON_EVENT_EMPTY,
 	MAX_COMMON_EVENT_OPCODE
 };
 
@@ -45,6 +47,7 @@ enum common_ramrod_cmd_id {
 	COMMON_RAMROD_RESERVED,
 	COMMON_RAMROD_RESERVED2,
 	COMMON_RAMROD_RESERVED3,
+	COMMON_RAMROD_EMPTY,
 	MAX_COMMON_RAMROD_CMD_ID
 };
 
@@ -331,6 +334,179 @@ struct xstorm_core_conn_ag_ctx {
 	__le16	word15 /* word15 */;
 };
 
+struct tstorm_core_conn_ag_ctx {
+	u8	byte0 /* cdu_validation */;
+	u8	byte1 /* state */;
+	u8	flags0;
+#define TSTORM_CORE_CONN_AG_CTX_BIT0_MASK     0x1       /* exist_in_qm0 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT    0
+#define TSTORM_CORE_CONN_AG_CTX_BIT1_MASK     0x1       /* exist_in_qm1 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT    1
+#define TSTORM_CORE_CONN_AG_CTX_BIT2_MASK     0x1       /* bit2 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT2_SHIFT    2
+#define TSTORM_CORE_CONN_AG_CTX_BIT3_MASK     0x1       /* bit3 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT3_SHIFT    3
+#define TSTORM_CORE_CONN_AG_CTX_BIT4_MASK     0x1       /* bit4 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT4_SHIFT    4
+#define TSTORM_CORE_CONN_AG_CTX_BIT5_MASK     0x1       /* bit5 */
+#define TSTORM_CORE_CONN_AG_CTX_BIT5_SHIFT    5
+#define TSTORM_CORE_CONN_AG_CTX_CF0_MASK      0x3       /* timer0cf */
+#define TSTORM_CORE_CONN_AG_CTX_CF0_SHIFT     6
+	u8 flags1;
+#define TSTORM_CORE_CONN_AG_CTX_CF1_MASK      0x3       /* timer1cf */
+#define TSTORM_CORE_CONN_AG_CTX_CF1_SHIFT     0
+#define TSTORM_CORE_CONN_AG_CTX_CF2_MASK      0x3       /* timer2cf */
+#define TSTORM_CORE_CONN_AG_CTX_CF2_SHIFT     2
+#define TSTORM_CORE_CONN_AG_CTX_CF3_MASK      0x3       /* timer_stop_all */
+#define TSTORM_CORE_CONN_AG_CTX_CF3_SHIFT     4
+#define TSTORM_CORE_CONN_AG_CTX_CF4_MASK      0x3       /* cf4 */
+#define TSTORM_CORE_CONN_AG_CTX_CF4_SHIFT     6
+	u8 flags2;
+#define TSTORM_CORE_CONN_AG_CTX_CF5_MASK      0x3       /* cf5 */
+#define TSTORM_CORE_CONN_AG_CTX_CF5_SHIFT     0
+#define TSTORM_CORE_CONN_AG_CTX_CF6_MASK      0x3       /* cf6 */
+#define TSTORM_CORE_CONN_AG_CTX_CF6_SHIFT     2
+#define TSTORM_CORE_CONN_AG_CTX_CF7_MASK      0x3       /* cf7 */
+#define TSTORM_CORE_CONN_AG_CTX_CF7_SHIFT     4
+#define TSTORM_CORE_CONN_AG_CTX_CF8_MASK      0x3       /* cf8 */
+#define TSTORM_CORE_CONN_AG_CTX_CF8_SHIFT     6
+	u8 flags3;
+#define TSTORM_CORE_CONN_AG_CTX_CF9_MASK      0x3       /* cf9 */
+#define TSTORM_CORE_CONN_AG_CTX_CF9_SHIFT     0
+#define TSTORM_CORE_CONN_AG_CTX_CF10_MASK     0x3       /* cf10 */
+#define TSTORM_CORE_CONN_AG_CTX_CF10_SHIFT    2
+#define TSTORM_CORE_CONN_AG_CTX_CF0EN_MASK    0x1       /* cf0en */
+#define TSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT   4
+#define TSTORM_CORE_CONN_AG_CTX_CF1EN_MASK    0x1       /* cf1en */
+#define TSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT   5
+#define TSTORM_CORE_CONN_AG_CTX_CF2EN_MASK    0x1       /* cf2en */
+#define TSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT   6
+#define TSTORM_CORE_CONN_AG_CTX_CF3EN_MASK    0x1       /* cf3en */
+#define TSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT   7
+	u8 flags4;
+#define TSTORM_CORE_CONN_AG_CTX_CF4EN_MASK    0x1       /* cf4en */
+#define TSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT   0
+#define TSTORM_CORE_CONN_AG_CTX_CF5EN_MASK    0x1       /* cf5en */
+#define TSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT   1
+#define TSTORM_CORE_CONN_AG_CTX_CF6EN_MASK    0x1       /* cf6en */
+#define TSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT   2
+#define TSTORM_CORE_CONN_AG_CTX_CF7EN_MASK    0x1       /* cf7en */
+#define TSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT   3
+#define TSTORM_CORE_CONN_AG_CTX_CF8EN_MASK    0x1       /* cf8en */
+#define TSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT   4
+#define TSTORM_CORE_CONN_AG_CTX_CF9EN_MASK    0x1       /* cf9en */
+#define TSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT   5
+#define TSTORM_CORE_CONN_AG_CTX_CF10EN_MASK   0x1       /* cf10en */
+#define TSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT  6
+#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK  0x1       /* rule0en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 7
+	u8 flags5;
+#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK  0x1       /* rule1en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 0
+#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK  0x1       /* rule2en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 1
+#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK  0x1       /* rule3en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 2
+#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK  0x1       /* rule4en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 3
+#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK  0x1       /* rule5en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT 4
+#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK  0x1       /* rule6en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT 5
+#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK  0x1       /* rule7en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT 6
+#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_MASK  0x1       /* rule8en */
+#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT 7
+	__le32	reg0 /* reg0 */;
+	__le32	reg1 /* reg1 */;
+	__le32	reg2 /* reg2 */;
+	__le32	reg3 /* reg3 */;
+	__le32	reg4 /* reg4 */;
+	__le32	reg5 /* reg5 */;
+	__le32	reg6 /* reg6 */;
+	__le32	reg7 /* reg7 */;
+	__le32	reg8 /* reg8 */;
+	u8	byte2 /* byte2 */;
+	u8	byte3 /* byte3 */;
+	__le16	word0 /* word0 */;
+	u8	byte4 /* byte4 */;
+	u8	byte5 /* byte5 */;
+	__le16	word1 /* word1 */;
+	__le16	word2 /* conn_dpi */;
+	__le16	word3 /* word3 */;
+	__le32	reg9 /* reg9 */;
+	__le32	reg10 /* reg10 */;
+};
+
+struct ustorm_core_conn_ag_ctx {
+	u8	reserved /* cdu_validation */;
+	u8	byte1 /* state */;
+	u8	flags0;
+#define USTORM_CORE_CONN_AG_CTX_BIT0_MASK     0x1       /* exist_in_qm0 */
+#define USTORM_CORE_CONN_AG_CTX_BIT0_SHIFT    0
+#define USTORM_CORE_CONN_AG_CTX_BIT1_MASK     0x1       /* exist_in_qm1 */
+#define USTORM_CORE_CONN_AG_CTX_BIT1_SHIFT    1
+#define USTORM_CORE_CONN_AG_CTX_CF0_MASK      0x3       /* timer0cf */
+#define USTORM_CORE_CONN_AG_CTX_CF0_SHIFT     2
+#define USTORM_CORE_CONN_AG_CTX_CF1_MASK      0x3       /* timer1cf */
+#define USTORM_CORE_CONN_AG_CTX_CF1_SHIFT     4
+#define USTORM_CORE_CONN_AG_CTX_CF2_MASK      0x3       /* timer2cf */
+#define USTORM_CORE_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define USTORM_CORE_CONN_AG_CTX_CF3_MASK      0x3       /* timer_stop_all */
+#define USTORM_CORE_CONN_AG_CTX_CF3_SHIFT     0
+#define USTORM_CORE_CONN_AG_CTX_CF4_MASK      0x3       /* cf4 */
+#define USTORM_CORE_CONN_AG_CTX_CF4_SHIFT     2
+#define USTORM_CORE_CONN_AG_CTX_CF5_MASK      0x3       /* cf5 */
+#define USTORM_CORE_CONN_AG_CTX_CF5_SHIFT     4
+#define USTORM_CORE_CONN_AG_CTX_CF6_MASK      0x3       /* cf6 */
+#define USTORM_CORE_CONN_AG_CTX_CF6_SHIFT     6
+	u8 flags2;
+#define USTORM_CORE_CONN_AG_CTX_CF0EN_MASK    0x1       /* cf0en */
+#define USTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT   0
+#define USTORM_CORE_CONN_AG_CTX_CF1EN_MASK    0x1       /* cf1en */
+#define USTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT   1
+#define USTORM_CORE_CONN_AG_CTX_CF2EN_MASK    0x1       /* cf2en */
+#define USTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT   2
+#define USTORM_CORE_CONN_AG_CTX_CF3EN_MASK    0x1       /* cf3en */
+#define USTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT   3
+#define USTORM_CORE_CONN_AG_CTX_CF4EN_MASK    0x1       /* cf4en */
+#define USTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT   4
+#define USTORM_CORE_CONN_AG_CTX_CF5EN_MASK    0x1       /* cf5en */
+#define USTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT   5
+#define USTORM_CORE_CONN_AG_CTX_CF6EN_MASK    0x1       /* cf6en */
+#define USTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT   6
+#define USTORM_CORE_CONN_AG_CTX_RULE0EN_MASK  0x1       /* rule0en */
+#define USTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 7
+	u8 flags3;
+#define USTORM_CORE_CONN_AG_CTX_RULE1EN_MASK  0x1       /* rule1en */
+#define USTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 0
+#define USTORM_CORE_CONN_AG_CTX_RULE2EN_MASK  0x1       /* rule2en */
+#define USTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 1
+#define USTORM_CORE_CONN_AG_CTX_RULE3EN_MASK  0x1       /* rule3en */
+#define USTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 2
+#define USTORM_CORE_CONN_AG_CTX_RULE4EN_MASK  0x1       /* rule4en */
+#define USTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 3
+#define USTORM_CORE_CONN_AG_CTX_RULE5EN_MASK  0x1       /* rule5en */
+#define USTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT 4
+#define USTORM_CORE_CONN_AG_CTX_RULE6EN_MASK  0x1       /* rule6en */
+#define USTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT 5
+#define USTORM_CORE_CONN_AG_CTX_RULE7EN_MASK  0x1       /* rule7en */
+#define USTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT 6
+#define USTORM_CORE_CONN_AG_CTX_RULE8EN_MASK  0x1       /* rule8en */
+#define USTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT 7
+	u8	byte2 /* byte2 */;
+	u8	byte3 /* byte3 */;
+	__le16	word0 /* conn_dpi */;
+	__le16	word1 /* word1 */;
+	__le32	rx_producers /* reg0 */;
+	__le32	reg1 /* reg1 */;
+	__le32	reg2 /* reg2 */;
+	__le32	reg3 /* reg3 */;
+	__le16	word2 /* word2 */;
+	__le16	word3 /* word3 */;
+};
+
 /* The core storm context for the Mstorm */
 struct mstorm_core_conn_st_ctx {
 	__le32 reserved[24];
@@ -349,8 +525,9 @@ struct core_conn_context {
 	struct regpair			pstorm_st_padding[2];
 	struct xstorm_core_conn_st_ctx	xstorm_st_context;
 	struct xstorm_core_conn_ag_ctx	xstorm_ag_context;
+	struct tstorm_core_conn_ag_ctx	tstorm_ag_context;
+	struct ustorm_core_conn_ag_ctx	ustorm_ag_context;
 	struct mstorm_core_conn_st_ctx	mstorm_st_context;
-	struct regpair			mstorm_st_padding[2];
 	struct ustorm_core_conn_st_ctx	ustorm_st_context;
 	struct regpair			ustorm_st_padding[2] /* padding */;
 };
@@ -397,10 +574,12 @@ union event_ring_element {
 };
 
 enum personality_type {
+	BAD_PERSONALITY_TYP,
 	PERSONALITY_RESERVED,
 	PERSONALITY_RESERVED2,
 	PERSONALITY_RDMA_AND_ETH /* Roce or Iwarp */,
 	PERSONALITY_RESERVED3,
+	PERSONALITY_CORE,
 	PERSONALITY_ETH /* Ethernet */,
 	PERSONALITY_RESERVED4,
 	MAX_PERSONALITY_TYPE
@@ -570,7 +749,7 @@ enum block_addr {
 	GRCBASE_NWM		= 0x800000,
 	GRCBASE_NWS		= 0x700000,
 	GRCBASE_MS		= 0x6a0000,
-	GRCBASE_PHY_PCIE	= 0x618000,
+	GRCBASE_PHY_PCIE	= 0x620000,
 	GRCBASE_MISC_AEU	= 0x8000,
 	GRCBASE_BAR0_MAP	= 0x1c00000,
 	MAX_BLOCK_ADDR
@@ -795,13 +974,13 @@ enum init_modes {
 	MODE_RESERVED3,
 	MODE_RESERVED4,
 	MODE_RESERVED5,
+	MODE_RESERVED6,
 	MODE_SF,
 	MODE_MF_SD,
 	MODE_MF_SI,
 	MODE_PORTS_PER_ENG_1,
 	MODE_PORTS_PER_ENG_2,
 	MODE_PORTS_PER_ENG_4,
-	MODE_40G,
 	MODE_100G,
 	MODE_EAGLE_ENG1_WORKAROUND,
 	MAX_INIT_MODES
@@ -816,43 +995,6 @@ enum init_phases {
 	MAX_INIT_PHASES
 };
 
-struct mstorm_core_conn_ag_ctx {
-	u8	byte0 /* cdu_validation */;
-	u8	byte1 /* state */;
-	u8	flags0;
-#define MSTORM_CORE_CONN_AG_CTX_BIT0_MASK     0x1       /* exist_in_qm0 */
-#define MSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT    0
-#define MSTORM_CORE_CONN_AG_CTX_BIT1_MASK     0x1       /* exist_in_qm1 */
-#define MSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT    1
-#define MSTORM_CORE_CONN_AG_CTX_CF0_MASK      0x3       /* cf0 */
-#define MSTORM_CORE_CONN_AG_CTX_CF0_SHIFT     2
-#define MSTORM_CORE_CONN_AG_CTX_CF1_MASK      0x3       /* cf1 */
-#define MSTORM_CORE_CONN_AG_CTX_CF1_SHIFT     4
-#define MSTORM_CORE_CONN_AG_CTX_CF2_MASK      0x3       /* cf2 */
-#define MSTORM_CORE_CONN_AG_CTX_CF2_SHIFT     6
-	u8 flags1;
-#define MSTORM_CORE_CONN_AG_CTX_CF0EN_MASK    0x1       /* cf0en */
-#define MSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT   0
-#define MSTORM_CORE_CONN_AG_CTX_CF1EN_MASK    0x1       /* cf1en */
-#define MSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT   1
-#define MSTORM_CORE_CONN_AG_CTX_CF2EN_MASK    0x1       /* cf2en */
-#define MSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT   2
-#define MSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK  0x1       /* rule0en */
-#define MSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 3
-#define MSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK  0x1       /* rule1en */
-#define MSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 4
-#define MSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK  0x1       /* rule2en */
-#define MSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 5
-#define MSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK  0x1       /* rule3en */
-#define MSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 6
-#define MSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK  0x1       /* rule4en */
-#define MSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 7
-	__le16	word0 /* word0 */;
-	__le16	word1 /* word1 */;
-	__le32	reg0 /* reg0 */;
-	__le32	reg1 /* reg1 */;
-};
-
 /* per encapsulation type enabling flags */
 struct prs_reg_encapsulation_type_en {
 	u8 flags;
@@ -927,250 +1069,44 @@ struct qm_rf_opportunistic_mask {
 };
 
 /* QM hardware structure of QM map memory */
-struct qm_rf_pq_map {
-	u32 reg;
-#define QM_RF_PQ_MAP_PQ_VALID_MASK          0x1         /* PQ active */
-#define QM_RF_PQ_MAP_PQ_VALID_SHIFT         0
-#define QM_RF_PQ_MAP_RL_ID_MASK             0xFF        /* RL ID */
-#define QM_RF_PQ_MAP_RL_ID_SHIFT            1
-#define QM_RF_PQ_MAP_VP_PQ_ID_MASK          0x1FF
-#define QM_RF_PQ_MAP_VP_PQ_ID_SHIFT         9
-#define QM_RF_PQ_MAP_VOQ_MASK               0x1F        /* VOQ */
-#define QM_RF_PQ_MAP_VOQ_SHIFT              18
-#define QM_RF_PQ_MAP_WRR_WEIGHT_GROUP_MASK  0x3         /* WRR weight */
-#define QM_RF_PQ_MAP_WRR_WEIGHT_GROUP_SHIFT 23
-#define QM_RF_PQ_MAP_RL_VALID_MASK          0x1         /* RL active */
-#define QM_RF_PQ_MAP_RL_VALID_SHIFT         25
-#define QM_RF_PQ_MAP_RESERVED_MASK          0x3F
-#define QM_RF_PQ_MAP_RESERVED_SHIFT         26
-};
-
-/* SDM operation gen command (generate aggregative interrupt) */
-struct sdm_op_gen {
-	__le32 command;
-#define SDM_OP_GEN_COMP_PARAM_MASK  0xFFFF      /* completion parameters 0-15 */
-#define SDM_OP_GEN_COMP_PARAM_SHIFT 0
-#define SDM_OP_GEN_COMP_TYPE_MASK   0xF         /* completion type 16-19 */
-#define SDM_OP_GEN_COMP_TYPE_SHIFT  16
-#define SDM_OP_GEN_RESERVED_MASK    0xFFF       /* reserved 20-31 */
-#define SDM_OP_GEN_RESERVED_SHIFT   20
-};
-
-struct tstorm_core_conn_ag_ctx {
-	u8	byte0 /* cdu_validation */;
-	u8	byte1 /* state */;
-	u8	flags0;
-#define TSTORM_CORE_CONN_AG_CTX_BIT0_MASK     0x1       /* exist_in_qm0 */
-#define TSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT    0
-#define TSTORM_CORE_CONN_AG_CTX_BIT1_MASK     0x1       /* exist_in_qm1 */
-#define TSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT    1
-#define TSTORM_CORE_CONN_AG_CTX_BIT2_MASK     0x1       /* bit2 */
-#define TSTORM_CORE_CONN_AG_CTX_BIT2_SHIFT    2
-#define TSTORM_CORE_CONN_AG_CTX_BIT3_MASK     0x1       /* bit3 */
-#define TSTORM_CORE_CONN_AG_CTX_BIT3_SHIFT    3
-#define TSTORM_CORE_CONN_AG_CTX_BIT4_MASK     0x1       /* bit4 */
-#define TSTORM_CORE_CONN_AG_CTX_BIT4_SHIFT    4
-#define TSTORM_CORE_CONN_AG_CTX_BIT5_MASK     0x1       /* bit5 */
-#define TSTORM_CORE_CONN_AG_CTX_BIT5_SHIFT    5
-#define TSTORM_CORE_CONN_AG_CTX_CF0_MASK      0x3       /* timer0cf */
-#define TSTORM_CORE_CONN_AG_CTX_CF0_SHIFT     6
-	u8 flags1;
-#define TSTORM_CORE_CONN_AG_CTX_CF1_MASK      0x3       /* timer1cf */
-#define TSTORM_CORE_CONN_AG_CTX_CF1_SHIFT     0
-#define TSTORM_CORE_CONN_AG_CTX_CF2_MASK      0x3       /* timer2cf */
-#define TSTORM_CORE_CONN_AG_CTX_CF2_SHIFT     2
-#define TSTORM_CORE_CONN_AG_CTX_CF3_MASK      0x3       /* timer_stop_all */
-#define TSTORM_CORE_CONN_AG_CTX_CF3_SHIFT     4
-#define TSTORM_CORE_CONN_AG_CTX_CF4_MASK      0x3       /* cf4 */
-#define TSTORM_CORE_CONN_AG_CTX_CF4_SHIFT     6
-	u8 flags2;
-#define TSTORM_CORE_CONN_AG_CTX_CF5_MASK      0x3       /* cf5 */
-#define TSTORM_CORE_CONN_AG_CTX_CF5_SHIFT     0
-#define TSTORM_CORE_CONN_AG_CTX_CF6_MASK      0x3       /* cf6 */
-#define TSTORM_CORE_CONN_AG_CTX_CF6_SHIFT     2
-#define TSTORM_CORE_CONN_AG_CTX_CF7_MASK      0x3       /* cf7 */
-#define TSTORM_CORE_CONN_AG_CTX_CF7_SHIFT     4
-#define TSTORM_CORE_CONN_AG_CTX_CF8_MASK      0x3       /* cf8 */
-#define TSTORM_CORE_CONN_AG_CTX_CF8_SHIFT     6
-	u8 flags3;
-#define TSTORM_CORE_CONN_AG_CTX_CF9_MASK      0x3       /* cf9 */
-#define TSTORM_CORE_CONN_AG_CTX_CF9_SHIFT     0
-#define TSTORM_CORE_CONN_AG_CTX_CF10_MASK     0x3       /* cf10 */
-#define TSTORM_CORE_CONN_AG_CTX_CF10_SHIFT    2
-#define TSTORM_CORE_CONN_AG_CTX_CF0EN_MASK    0x1       /* cf0en */
-#define TSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT   4
-#define TSTORM_CORE_CONN_AG_CTX_CF1EN_MASK    0x1       /* cf1en */
-#define TSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT   5
-#define TSTORM_CORE_CONN_AG_CTX_CF2EN_MASK    0x1       /* cf2en */
-#define TSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT   6
-#define TSTORM_CORE_CONN_AG_CTX_CF3EN_MASK    0x1       /* cf3en */
-#define TSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT   7
-	u8 flags4;
-#define TSTORM_CORE_CONN_AG_CTX_CF4EN_MASK    0x1       /* cf4en */
-#define TSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT   0
-#define TSTORM_CORE_CONN_AG_CTX_CF5EN_MASK    0x1       /* cf5en */
-#define TSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT   1
-#define TSTORM_CORE_CONN_AG_CTX_CF6EN_MASK    0x1       /* cf6en */
-#define TSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT   2
-#define TSTORM_CORE_CONN_AG_CTX_CF7EN_MASK    0x1       /* cf7en */
-#define TSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT   3
-#define TSTORM_CORE_CONN_AG_CTX_CF8EN_MASK    0x1       /* cf8en */
-#define TSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT   4
-#define TSTORM_CORE_CONN_AG_CTX_CF9EN_MASK    0x1       /* cf9en */
-#define TSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT   5
-#define TSTORM_CORE_CONN_AG_CTX_CF10EN_MASK   0x1       /* cf10en */
-#define TSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT  6
-#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK  0x1       /* rule0en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 7
-	u8 flags5;
-#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK  0x1       /* rule1en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 0
-#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK  0x1       /* rule2en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 1
-#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK  0x1       /* rule3en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 2
-#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK  0x1       /* rule4en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 3
-#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK  0x1       /* rule5en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT 4
-#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK  0x1       /* rule6en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT 5
-#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK  0x1       /* rule7en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT 6
-#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_MASK  0x1       /* rule8en */
-#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT 7
-	__le32	reg0 /* reg0 */;
-	__le32	reg1 /* reg1 */;
-	__le32	reg2 /* reg2 */;
-	__le32	reg3 /* reg3 */;
-	__le32	reg4 /* reg4 */;
-	__le32	reg5 /* reg5 */;
-	__le32	reg6 /* reg6 */;
-	__le32	reg7 /* reg7 */;
-	__le32	reg8 /* reg8 */;
-	u8	byte2 /* byte2 */;
-	u8	byte3 /* byte3 */;
-	__le16	word0 /* word0 */;
-	u8	byte4 /* byte4 */;
-	u8	byte5 /* byte5 */;
-	__le16	word1 /* word1 */;
-	__le16	word2 /* conn_dpi */;
-	__le16	word3 /* word3 */;
-	__le32	reg9 /* reg9 */;
-	__le32	reg10 /* reg10 */;
+struct qm_rf_pq_map {
+	u32 reg;
+#define QM_RF_PQ_MAP_PQ_VALID_MASK          0x1         /* PQ active */
+#define QM_RF_PQ_MAP_PQ_VALID_SHIFT         0
+#define QM_RF_PQ_MAP_RL_ID_MASK             0xFF        /* RL ID */
+#define QM_RF_PQ_MAP_RL_ID_SHIFT            1
+#define QM_RF_PQ_MAP_VP_PQ_ID_MASK          0x1FF
+#define QM_RF_PQ_MAP_VP_PQ_ID_SHIFT         9
+#define QM_RF_PQ_MAP_VOQ_MASK               0x1F        /* VOQ */
+#define QM_RF_PQ_MAP_VOQ_SHIFT              18
+#define QM_RF_PQ_MAP_WRR_WEIGHT_GROUP_MASK  0x3         /* WRR weight */
+#define QM_RF_PQ_MAP_WRR_WEIGHT_GROUP_SHIFT 23
+#define QM_RF_PQ_MAP_RL_VALID_MASK          0x1         /* RL active */
+#define QM_RF_PQ_MAP_RL_VALID_SHIFT         25
+#define QM_RF_PQ_MAP_RESERVED_MASK          0x3F
+#define QM_RF_PQ_MAP_RESERVED_SHIFT         26
 };
 
-struct ustorm_core_conn_ag_ctx {
-	u8	reserved /* cdu_validation */;
-	u8	byte1 /* state */;
-	u8	flags0;
-#define USTORM_CORE_CONN_AG_CTX_BIT0_MASK     0x1       /* exist_in_qm0 */
-#define USTORM_CORE_CONN_AG_CTX_BIT0_SHIFT    0
-#define USTORM_CORE_CONN_AG_CTX_BIT1_MASK     0x1       /* exist_in_qm1 */
-#define USTORM_CORE_CONN_AG_CTX_BIT1_SHIFT    1
-#define USTORM_CORE_CONN_AG_CTX_CF0_MASK      0x3       /* timer0cf */
-#define USTORM_CORE_CONN_AG_CTX_CF0_SHIFT     2
-#define USTORM_CORE_CONN_AG_CTX_CF1_MASK      0x3       /* timer1cf */
-#define USTORM_CORE_CONN_AG_CTX_CF1_SHIFT     4
-#define USTORM_CORE_CONN_AG_CTX_CF2_MASK      0x3       /* timer2cf */
-#define USTORM_CORE_CONN_AG_CTX_CF2_SHIFT     6
-	u8 flags1;
-#define USTORM_CORE_CONN_AG_CTX_CF3_MASK      0x3       /* timer_stop_all */
-#define USTORM_CORE_CONN_AG_CTX_CF3_SHIFT     0
-#define USTORM_CORE_CONN_AG_CTX_CF4_MASK      0x3       /* cf4 */
-#define USTORM_CORE_CONN_AG_CTX_CF4_SHIFT     2
-#define USTORM_CORE_CONN_AG_CTX_CF5_MASK      0x3       /* cf5 */
-#define USTORM_CORE_CONN_AG_CTX_CF5_SHIFT     4
-#define USTORM_CORE_CONN_AG_CTX_CF6_MASK      0x3       /* cf6 */
-#define USTORM_CORE_CONN_AG_CTX_CF6_SHIFT     6
-	u8 flags2;
-#define USTORM_CORE_CONN_AG_CTX_CF0EN_MASK    0x1       /* cf0en */
-#define USTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT   0
-#define USTORM_CORE_CONN_AG_CTX_CF1EN_MASK    0x1       /* cf1en */
-#define USTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT   1
-#define USTORM_CORE_CONN_AG_CTX_CF2EN_MASK    0x1       /* cf2en */
-#define USTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT   2
-#define USTORM_CORE_CONN_AG_CTX_CF3EN_MASK    0x1       /* cf3en */
-#define USTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT   3
-#define USTORM_CORE_CONN_AG_CTX_CF4EN_MASK    0x1       /* cf4en */
-#define USTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT   4
-#define USTORM_CORE_CONN_AG_CTX_CF5EN_MASK    0x1       /* cf5en */
-#define USTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT   5
-#define USTORM_CORE_CONN_AG_CTX_CF6EN_MASK    0x1       /* cf6en */
-#define USTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT   6
-#define USTORM_CORE_CONN_AG_CTX_RULE0EN_MASK  0x1       /* rule0en */
-#define USTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 7
-	u8 flags3;
-#define USTORM_CORE_CONN_AG_CTX_RULE1EN_MASK  0x1       /* rule1en */
-#define USTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 0
-#define USTORM_CORE_CONN_AG_CTX_RULE2EN_MASK  0x1       /* rule2en */
-#define USTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 1
-#define USTORM_CORE_CONN_AG_CTX_RULE3EN_MASK  0x1       /* rule3en */
-#define USTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 2
-#define USTORM_CORE_CONN_AG_CTX_RULE4EN_MASK  0x1       /* rule4en */
-#define USTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 3
-#define USTORM_CORE_CONN_AG_CTX_RULE5EN_MASK  0x1       /* rule5en */
-#define USTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT 4
-#define USTORM_CORE_CONN_AG_CTX_RULE6EN_MASK  0x1       /* rule6en */
-#define USTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT 5
-#define USTORM_CORE_CONN_AG_CTX_RULE7EN_MASK  0x1       /* rule7en */
-#define USTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT 6
-#define USTORM_CORE_CONN_AG_CTX_RULE8EN_MASK  0x1       /* rule8en */
-#define USTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT 7
-	u8	byte2 /* byte2 */;
-	u8	byte3 /* byte3 */;
-	__le16	word0 /* conn_dpi */;
-	__le16	word1 /* word1 */;
-	__le32	rx_producers /* reg0 */;
-	__le32	reg1 /* reg1 */;
-	__le32	reg2 /* reg2 */;
-	__le32	reg3 /* reg3 */;
-	__le16	word2 /* word2 */;
-	__le16	word3 /* word3 */;
+/* Completion params for aggregated interrupt completion */
+struct sdm_agg_int_comp_params {
+	__le16 params;
+#define SDM_AGG_INT_COMP_PARAMS_AGG_INT_INDEX_MASK      0x3F
+#define SDM_AGG_INT_COMP_PARAMS_AGG_INT_INDEX_SHIFT     0
+#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_ENABLE_MASK  0x1
+#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_ENABLE_SHIFT 6
+#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_BIT_MASK     0x1FF
+#define SDM_AGG_INT_COMP_PARAMS_AGG_VECTOR_BIT_SHIFT    7
 };
 
-struct ystorm_core_conn_ag_ctx {
-	u8	byte0 /* cdu_validation */;
-	u8	byte1 /* state */;
-	u8	flags0;
-#define YSTORM_CORE_CONN_AG_CTX_BIT0_MASK     0x1       /* exist_in_qm0 */
-#define YSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT    0
-#define YSTORM_CORE_CONN_AG_CTX_BIT1_MASK     0x1       /* exist_in_qm1 */
-#define YSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT    1
-#define YSTORM_CORE_CONN_AG_CTX_CF0_MASK      0x3       /* cf0 */
-#define YSTORM_CORE_CONN_AG_CTX_CF0_SHIFT     2
-#define YSTORM_CORE_CONN_AG_CTX_CF1_MASK      0x3       /* cf1 */
-#define YSTORM_CORE_CONN_AG_CTX_CF1_SHIFT     4
-#define YSTORM_CORE_CONN_AG_CTX_CF2_MASK      0x3       /* cf2 */
-#define YSTORM_CORE_CONN_AG_CTX_CF2_SHIFT     6
-	u8 flags1;
-#define YSTORM_CORE_CONN_AG_CTX_CF0EN_MASK    0x1       /* cf0en */
-#define YSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT   0
-#define YSTORM_CORE_CONN_AG_CTX_CF1EN_MASK    0x1       /* cf1en */
-#define YSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT   1
-#define YSTORM_CORE_CONN_AG_CTX_CF2EN_MASK    0x1       /* cf2en */
-#define YSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT   2
-#define YSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK  0x1       /* rule0en */
-#define YSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT 3
-#define YSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK  0x1       /* rule1en */
-#define YSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT 4
-#define YSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK  0x1       /* rule2en */
-#define YSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT 5
-#define YSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK  0x1       /* rule3en */
-#define YSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT 6
-#define YSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK  0x1       /* rule4en */
-#define YSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT 7
-	u8	byte2 /* byte2 */;
-	u8	byte3 /* byte3 */;
-	__le16	word0 /* word0 */;
-	__le32	reg0 /* reg0 */;
-	__le32	reg1 /* reg1 */;
-	__le16	word1 /* word1 */;
-	__le16	word2 /* word2 */;
-	__le16	word3 /* word3 */;
-	__le16	word4 /* word4 */;
-	__le32	reg2 /* reg2 */;
-	__le32	reg3 /* reg3 */;
+/* SDM operation gen command (generate aggregative interrupt) */
+struct sdm_op_gen {
+	__le32 command;
+#define SDM_OP_GEN_COMP_PARAM_MASK  0xFFFF      /* completion parameters 0-15 */
+#define SDM_OP_GEN_COMP_PARAM_SHIFT 0
+#define SDM_OP_GEN_COMP_TYPE_MASK   0xF         /* completion type 16-19 */
+#define SDM_OP_GEN_COMP_TYPE_SHIFT  16
+#define SDM_OP_GEN_RESERVED_MASK    0xFFF       /* reserved 20-31 */
+#define SDM_OP_GEN_RESERVED_SHIFT   20
 };
 
 /*********************************** Init ************************************/
@@ -1274,13 +1210,6 @@ enum chip_ids {
 	MAX_CHIP_IDS
 };
 
-enum idle_chk_severity_types {
-	IDLE_CHK_SEVERITY_ERROR /* idle check failure should cause an error */,
-	IDLE_CHK_SEVERITY_ERROR_NO_TRAFFIC,
-	IDLE_CHK_SEVERITY_WARNING,
-	MAX_IDLE_CHK_SEVERITY_TYPES
-};
-
 struct init_array_raw_hdr {
 	__le32 data;
 #define INIT_ARRAY_RAW_HDR_TYPE_MASK    0xF
@@ -1340,14 +1269,6 @@ struct init_callback_op {
 	__le16	block_id /* Blocks ID */;
 };
 
-/* init comparison types */
-enum init_comparison_types {
-	INIT_COMPARISON_EQ /* init value is included in the init command */,
-	INIT_COMPARISON_OR /* init value is all zeros */,
-	INIT_COMPARISON_AND /* init value is an array of values */,
-	MAX_INIT_COMPARISON_TYPES
-};
-
 /* init operation: delay */
 struct init_delay_op {
 	__le32	op_data;
@@ -1444,12 +1365,10 @@ struct init_read_op {
 	__le32 op_data;
 #define INIT_READ_OP_OP_MASK         0xF
 #define INIT_READ_OP_OP_SHIFT        0
-#define INIT_READ_OP_POLL_COMP_MASK  0x7
-#define INIT_READ_OP_POLL_COMP_SHIFT 4
+#define INIT_READ_OP_POLL_TYPE_MASK  0xF
+#define INIT_READ_OP_POLL_TYPE_SHIFT 4
 #define INIT_READ_OP_RESERVED_MASK   0x1
-#define INIT_READ_OP_RESERVED_SHIFT  7
-#define INIT_READ_OP_POLL_MASK       0x1
-#define INIT_READ_OP_POLL_SHIFT      8
+#define INIT_READ_OP_RESERVED_SHIFT  8
 #define INIT_READ_OP_ADDRESS_MASK    0x7FFFFF
 #define INIT_READ_OP_ADDRESS_SHIFT   9
 	__le32 expected_val;
@@ -1477,6 +1396,14 @@ enum init_op_types {
 	MAX_INIT_OP_TYPES
 };
 
+enum init_poll_types {
+	INIT_POLL_NONE /* No polling */,
+	INIT_POLL_EQ /* init value is included in the init command */,
+	INIT_POLL_OR /* init value is all zeros */,
+	INIT_POLL_AND /* init value is an array of values */,
+	MAX_INIT_POLL_TYPES
+};
+
 /* init source types */
 enum init_source_types {
 	INIT_SRC_INLINE /* init value is included in the init command */,
@@ -1677,175 +1604,213 @@ bool qed_send_qm_stop_cmd(struct qed_hwfn	*p_hwfn,
 			  u16			num_pqs);
 
 /* Ystorm flow control mode. Use enum fw_flow_ctrl_mode */
-#define YSTORM_FLOW_CONTROL_MODE_OFFSET			(IRO[0].base)
-#define YSTORM_FLOW_CONTROL_MODE_SIZE			(IRO[0].size)
+#define YSTORM_FLOW_CONTROL_MODE_OFFSET  (IRO[0].base)
+#define YSTORM_FLOW_CONTROL_MODE_SIZE    (IRO[0].size)
 /* Tstorm port statistics */
-#define TSTORM_PORT_STAT_OFFSET(port_id)		(IRO[1].base + \
-							 ((port_id) * \
-							  IRO[1].m1))
-#define TSTORM_PORT_STAT_SIZE				(IRO[1].size)
+#define TSTORM_PORT_STAT_OFFSET(port_id) (IRO[1].base + ((port_id) * IRO[1].m1))
+#define TSTORM_PORT_STAT_SIZE            (IRO[1].size)
+/* Tstorm ll2 port statistics */
+#define TSTORM_LL2_PORT_STAT_OFFSET(port_id) \
+				(IRO[2].base + ((port_id) * IRO[2].m1))
+#define TSTORM_LL2_PORT_STAT_SIZE            (IRO[2].size)
 /* Ustorm VF-PF Channel ready flag */
-#define USTORM_VF_PF_CHANNEL_READY_OFFSET(vf_id)	(IRO[2].base +	\
-							 ((vf_id) *	\
-							  IRO[2].m1))
-#define USTORM_VF_PF_CHANNEL_READY_SIZE			(IRO[2].size)
+#define USTORM_VF_PF_CHANNEL_READY_OFFSET(vf_id) \
+				(IRO[3].base +	((vf_id) * IRO[3].m1))
+#define USTORM_VF_PF_CHANNEL_READY_SIZE          (IRO[3].size)
 /* Ustorm Final flr cleanup ack */
-#define USTORM_FLR_FINAL_ACK_OFFSET			(IRO[3].base)
-#define USTORM_FLR_FINAL_ACK_SIZE			(IRO[3].size)
+#define USTORM_FLR_FINAL_ACK_OFFSET(pf_id) (IRO[4].base + ((pf_id) * IRO[4].m1))
+#define USTORM_FLR_FINAL_ACK_SIZE          (IRO[4].size)
 /* Ustorm Event ring consumer */
-#define USTORM_EQE_CONS_OFFSET(pf_id)			(IRO[4].base +	\
-							 ((pf_id) *	\
-							  IRO[4].m1))
-#define USTORM_EQE_CONS_SIZE				(IRO[4].size)
-/* Ustorm Completion ring consumer */
-#define USTORM_CQ_CONS_OFFSET(global_queue_id)		(IRO[5].base +	\
-							 ((global_queue_id) * \
-							  IRO[5].m1))
-#define USTORM_CQ_CONS_SIZE				(IRO[5].size)
+#define USTORM_EQE_CONS_OFFSET(pf_id)    (IRO[5].base +	((pf_id) * IRO[5].m1))
+#define USTORM_EQE_CONS_SIZE             (IRO[5].size)
+/* Ustorm Common Queue ring consumer */
+#define USTORM_COMMON_QUEUE_CONS_OFFSET(global_queue_id) \
+			(IRO[6].base + ((global_queue_id) * IRO[6].m1))
+#define USTORM_COMMON_QUEUE_CONS_SIZE    (IRO[6].size)
 /* Xstorm Integration Test Data */
-#define XSTORM_INTEG_TEST_DATA_OFFSET			(IRO[6].base)
-#define XSTORM_INTEG_TEST_DATA_SIZE			(IRO[6].size)
+#define XSTORM_INTEG_TEST_DATA_OFFSET    (IRO[7].base)
+#define XSTORM_INTEG_TEST_DATA_SIZE      (IRO[7].size)
 /* Ystorm Integration Test Data */
-#define YSTORM_INTEG_TEST_DATA_OFFSET			(IRO[7].base)
-#define YSTORM_INTEG_TEST_DATA_SIZE			(IRO[7].size)
+#define YSTORM_INTEG_TEST_DATA_OFFSET    (IRO[8].base)
+#define YSTORM_INTEG_TEST_DATA_SIZE      (IRO[8].size)
 /* Pstorm Integration Test Data */
-#define PSTORM_INTEG_TEST_DATA_OFFSET			(IRO[8].base)
-#define PSTORM_INTEG_TEST_DATA_SIZE			(IRO[8].size)
+#define PSTORM_INTEG_TEST_DATA_OFFSET    (IRO[9].base)
+#define PSTORM_INTEG_TEST_DATA_SIZE      (IRO[9].size)
 /* Tstorm Integration Test Data */
-#define TSTORM_INTEG_TEST_DATA_OFFSET			(IRO[9].base)
-#define TSTORM_INTEG_TEST_DATA_SIZE			(IRO[9].size)
+#define TSTORM_INTEG_TEST_DATA_OFFSET    (IRO[10].base)
+#define TSTORM_INTEG_TEST_DATA_SIZE      (IRO[10].size)
 /* Mstorm Integration Test Data */
-#define MSTORM_INTEG_TEST_DATA_OFFSET			(IRO[10].base)
-#define MSTORM_INTEG_TEST_DATA_SIZE			(IRO[10].size)
+#define MSTORM_INTEG_TEST_DATA_OFFSET    (IRO[11].base)
+#define MSTORM_INTEG_TEST_DATA_SIZE      (IRO[11].size)
 /* Ustorm Integration Test Data */
-#define USTORM_INTEG_TEST_DATA_OFFSET			(IRO[11].base)
-#define USTORM_INTEG_TEST_DATA_SIZE			(IRO[11].size)
+#define USTORM_INTEG_TEST_DATA_OFFSET    (IRO[12].base)
+#define USTORM_INTEG_TEST_DATA_SIZE      (IRO[12].size)
 /* Tstorm producers */
-#define TSTORM_LL2_RX_PRODS_OFFSET(core_rx_queue_id)	(IRO[12].base +	\
-							 ((core_rx_queue_id) * \
-							  IRO[12].m1))
-#define TSTORM_LL2_RX_PRODS_SIZE			(IRO[12].size)
-/* Tstorm LiteL2 queue statistics */
-#define CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(core_rx_q_id) (IRO[13].base + \
-							     ((core_rx_q_id) * \
-							      IRO[13].m1))
-#define CORE_LL2_TSTORM_PER_QUEUE_STAT_SIZE		(IRO[13].size)
+#define TSTORM_LL2_RX_PRODS_OFFSET(core_rx_queue_id) \
+			(IRO[13].base + ((core_rx_queue_id) * IRO[13].m1))
+#define TSTORM_LL2_RX_PRODS_SIZE         (IRO[13].size)
+/* Tstorm LightL2 queue statistics */
+#define CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \
+			(IRO[14].base + ((core_rx_queue_id) * IRO[14].m1))
+#define CORE_LL2_TSTORM_PER_QUEUE_STAT_SIZE    (IRO[14].size)
 /* Ustorm LiteL2 queue statistics */
-#define CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(core_rx_q_id) (IRO[14].base + \
-							     ((core_rx_q_id) * \
-							      IRO[14].m1))
-#define CORE_LL2_USTORM_PER_QUEUE_STAT_SIZE		(IRO[14].size)
+#define CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \
+			(IRO[15].base +	((core_rx_queue_id) * IRO[15].m1))
+#define CORE_LL2_USTORM_PER_QUEUE_STAT_SIZE    (IRO[15].size)
 /* Pstorm LiteL2 queue statistics */
-#define CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(core_txst_id) (IRO[15].base + \
-							     ((core_txst_id) * \
-							      IRO[15].m1))
-#define CORE_LL2_PSTORM_PER_QUEUE_STAT_SIZE		(IRO[15].size)
+#define CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(core_tx_stats_id) \
+			(IRO[16].base +	((core_tx_stats_id) * IRO[16].m1))
+#define CORE_LL2_PSTORM_PER_QUEUE_STAT_SIZE    (IRO[16].size)
 /* Mstorm queue statistics */
-#define MSTORM_QUEUE_STAT_OFFSET(stat_counter_id) (IRO[16].base + \
-						   ((stat_counter_id) *	\
-						    IRO[16].m1))
-#define MSTORM_QUEUE_STAT_SIZE				(IRO[16].size)
+#define MSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
+			(IRO[17].base + ((stat_counter_id) * IRO[17].m1))
+#define MSTORM_QUEUE_STAT_SIZE                 (IRO[17].size)
 /* Mstorm producers */
-#define MSTORM_PRODS_OFFSET(queue_id)			(IRO[17].base +	\
-							 ((queue_id) *	\
-							  IRO[17].m1))
-#define MSTORM_PRODS_SIZE				(IRO[17].size)
+#define MSTORM_PRODS_OFFSET(queue_id) (IRO[18].base + ((queue_id) * IRO[18].m1))
+#define MSTORM_PRODS_SIZE             (IRO[18].size)
 /* TPA agregation timeout in us resolution (on ASIC) */
-#define MSTORM_TPA_TIMEOUT_US_OFFSET			(IRO[18].base)
-#define MSTORM_TPA_TIMEOUT_US_SIZE			(IRO[18].size)
+#define MSTORM_TPA_TIMEOUT_US_OFFSET  (IRO[19].base)
+#define MSTORM_TPA_TIMEOUT_US_SIZE    (IRO[19].size)
 /* Ustorm queue statistics */
-#define USTORM_QUEUE_STAT_OFFSET(stat_counter_id)	(IRO[19].base +	\
-							((stat_counter_id) * \
-							 IRO[19].m1))
-#define USTORM_QUEUE_STAT_SIZE				(IRO[19].size)
+#define USTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
+			(IRO[20].base + ((stat_counter_id) * IRO[20].m1))
+#define USTORM_QUEUE_STAT_SIZE        (IRO[20].size)
 /* Ustorm queue zone */
-#define USTORM_ETH_QUEUE_ZONE_OFFSET(queue_id)		(IRO[20].base +	\
-							 ((queue_id) *	\
-							  IRO[20].m1))
-#define USTORM_ETH_QUEUE_ZONE_SIZE			(IRO[20].size)
+#define USTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) \
+			(IRO[21].base +	((queue_id) * IRO[21].m1))
+#define USTORM_ETH_QUEUE_ZONE_SIZE    (IRO[21].size)
 /* Pstorm queue statistics */
-#define PSTORM_QUEUE_STAT_OFFSET(stat_counter_id)	(IRO[21].base +	\
-							 ((stat_counter_id) * \
-							  IRO[21].m1))
-#define PSTORM_QUEUE_STAT_SIZE				(IRO[21].size)
+#define PSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
+		(IRO[22].base + ((stat_counter_id) * IRO[22].m1))
+#define PSTORM_QUEUE_STAT_SIZE        (IRO[22].size)
 /* Tstorm last parser message */
-#define TSTORM_ETH_PRS_INPUT_OFFSET(pf_id)		(IRO[22].base +	\
-							 ((pf_id) *	\
-							  IRO[22].m1))
-#define TSTORM_ETH_PRS_INPUT_SIZE			(IRO[22].size)
+#define TSTORM_ETH_PRS_INPUT_OFFSET  (IRO[23].base)
+#define TSTORM_ETH_PRS_INPUT_SIZE    (IRO[23].size)
+/* Tstorm Eth limit Rx rate */
+#define ETH_RX_RATE_LIMIT_OFFSET(pf_id) (IRO[24].base +	((pf_id) * IRO[24].m1))
+#define ETH_RX_RATE_LIMIT_SIZE       (IRO[24].size)
 /* Ystorm queue zone */
-#define YSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id)		(IRO[23].base +	\
-							 ((queue_id) *	\
-							  IRO[23].m1))
-#define YSTORM_ETH_QUEUE_ZONE_SIZE			(IRO[23].size)
+#define YSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) \
+			(IRO[25].base +	((queue_id) * IRO[25].m1))
+#define YSTORM_ETH_QUEUE_ZONE_SIZE   (IRO[25].size)
 /* Ystorm cqe producer */
-#define YSTORM_TOE_CQ_PROD_OFFSET(rss_id)		(IRO[24].base +	\
-							 ((rss_id) *	\
-							  IRO[24].m1))
-#define YSTORM_TOE_CQ_PROD_SIZE				(IRO[24].size)
+#define YSTORM_TOE_CQ_PROD_OFFSET(rss_id) \
+			(IRO[26].base + ((rss_id) * IRO[26].m1))
+#define YSTORM_TOE_CQ_PROD_SIZE      (IRO[26].size)
 /* Ustorm cqe producer */
-#define USTORM_TOE_CQ_PROD_OFFSET(rss_id)		(IRO[25].base +	\
-							 ((rss_id) *	\
-							  IRO[25].m1))
-#define USTORM_TOE_CQ_PROD_SIZE				(IRO[25].size)
+#define USTORM_TOE_CQ_PROD_OFFSET(rss_id) \
+			(IRO[27].base + ((rss_id) * IRO[27].m1))
+#define USTORM_TOE_CQ_PROD_SIZE      (IRO[27].size)
 /* Ustorm grq producer */
-#define USTORM_TOE_GRQ_PROD_OFFSET(pf_id)		(IRO[26].base +	\
-							 ((pf_id) *	\
-							  IRO[26].m1))
-#define USTORM_TOE_GRQ_PROD_SIZE			(IRO[26].size)
+#define USTORM_TOE_GRQ_PROD_OFFSET(pf_id) \
+			(IRO[28].base + ((pf_id) * IRO[28].m1))
+#define USTORM_TOE_GRQ_PROD_SIZE     (IRO[28].size)
 /* Tstorm cmdq-cons of given command queue-id */
-#define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id)	(IRO[27].base +	\
-							 ((cmdq_queue_id) * \
-							  IRO[27].m1))
-#define TSTORM_SCSI_CMDQ_CONS_SIZE			(IRO[27].size)
+#define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id) \
+			(IRO[29].base + ((cmdq_queue_id) * IRO[29].m1))
+#define TSTORM_SCSI_CMDQ_CONS_SIZE   (IRO[29].size)
 /* Mstorm rq-cons of given queue-id */
-#define MSTORM_SCSI_RQ_CONS_OFFSET(rq_queue_id)		(IRO[28].base +	\
-							 ((rq_queue_id) * \
-							  IRO[28].m1))
-#define MSTORM_SCSI_RQ_CONS_SIZE			(IRO[28].size)
+#define MSTORM_SCSI_RQ_CONS_OFFSET(rq_queue_id) \
+		(IRO[30].base + ((rq_queue_id) * IRO[30].m1))
+#define MSTORM_SCSI_RQ_CONS_SIZE     (IRO[30].size)
+/* Mstorm bdq-external-producer of given BDQ function ID, BDqueue-id */
+#define MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \
+	(IRO[31].base + ((func_id) * IRO[31].m1) + ((bdq_id) * IRO[31].m2))
+#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE (IRO[31].size)
+/* Tstorm (reflects M-Storm) bdq-external-producer of given fn ID, BDqueue-id */
+#define TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \
+	(IRO[32].base + ((func_id) * IRO[32].m1) + ((bdq_id) * IRO[32].m2))
+#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE (IRO[32].size)
+/* Tstorm iSCSI RX stats */
+#define TSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
+				(IRO[33].base + ((pf_id) * IRO[33].m1))
+#define TSTORM_ISCSI_RX_STATS_SIZE    (IRO[33].size)
+/* Mstorm iSCSI RX stats */
+#define MSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
+				(IRO[34].base + ((pf_id) * IRO[34].m1))
+#define MSTORM_ISCSI_RX_STATS_SIZE    (IRO[34].size)
+/* Ustorm iSCSI RX stats */
+#define USTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
+				(IRO[35].base +	((pf_id) * IRO[35].m1))
+#define USTORM_ISCSI_RX_STATS_SIZE    (IRO[35].size)
+/* Xstorm iSCSI TX stats */
+#define XSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
+				(IRO[36].base +	((pf_id) * IRO[36].m1))
+#define XSTORM_ISCSI_TX_STATS_SIZE    (IRO[36].size)
+/* Ystorm iSCSI TX stats */
+#define YSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
+				(IRO[37].base +	((pf_id) * IRO[37].m1))
+#define YSTORM_ISCSI_TX_STATS_SIZE    (IRO[37].size)
+/* Pstorm iSCSI TX stats */
+#define PSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
+				(IRO[38].base +	((pf_id) * IRO[38].m1))
+#define PSTORM_ISCSI_TX_STATS_SIZE    (IRO[38].size)
+/* Tstorm FCoE RX stats */
+#define TSTORM_FCOE_RX_STATS_OFFSET(pf_id) \
+				(IRO[39].base +	((pf_id) * IRO[39].m1))
+#define TSTORM_FCOE_RX_STATS_SIZE      (IRO[39].size)
+/* Mstorm FCoE RX stats */
+#define MSTORM_FCOE_RX_STATS_OFFSET(pf_id) \
+				(IRO[40].base +	((pf_id) * IRO[40].m1))
+#define MSTORM_FCOE_RX_STATS_SIZE      (IRO[40].size)
+/* Pstorm FCoE TX stats */
+#define PSTORM_FCOE_TX_STATS_OFFSET(pf_id) \
+				(IRO[41].base +	((pf_id) * IRO[41].m1))
+#define PSTORM_FCOE_TX_STATS_SIZE      (IRO[41].size)
 /* Pstorm RoCE statistics */
-#define PSTORM_ROCE_STAT_OFFSET(stat_counter_id)	(IRO[29].base +	\
-							 ((stat_counter_id) * \
-							  IRO[29].m1))
-#define PSTORM_ROCE_STAT_SIZE				(IRO[29].size)
+#define PSTORM_ROCE_STAT_OFFSET(stat_counter_id) \
+			(IRO[42].base + ((stat_counter_id) * IRO[42].m1))
+#define PSTORM_ROCE_STAT_SIZE          (IRO[42].size)
 /* Tstorm RoCE statistics */
-#define TSTORM_ROCE_STAT_OFFSET(stat_counter_id)	(IRO[30].base +	\
-							 ((stat_counter_id) * \
-							  IRO[30].m1))
-#define TSTORM_ROCE_STAT_SIZE				(IRO[30].size)
-
-static const struct iro iro_arr[31] = {
-	{ 0x10,	  0x0,	 0x0,	0x0,   0x8     },
-	{ 0x4448, 0x60,	 0x0,	0x0,   0x60    },
-	{ 0x498,  0x8,	 0x0,	0x0,   0x4     },
-	{ 0x494,  0x0,	 0x0,	0x0,   0x4     },
-	{ 0x10,	  0x8,	 0x0,	0x0,   0x2     },
-	{ 0x90,	  0x8,	 0x0,	0x0,   0x2     },
-	{ 0x4540, 0x0,	 0x0,	0x0,   0xf8    },
-	{ 0x39e0, 0x0,	 0x0,	0x0,   0xf8    },
-	{ 0x2598, 0x0,	 0x0,	0x0,   0xf8    },
-	{ 0x4350, 0x0,	 0x0,	0x0,   0xf8    },
-	{ 0x52d0, 0x0,	 0x0,	0x0,   0xf8    },
-	{ 0x7a48, 0x0,	 0x0,	0x0,   0xf8    },
-	{ 0x100,  0x8,	 0x0,	0x0,   0x8     },
-	{ 0x5808, 0x10,	 0x0,	0x0,   0x10    },
-	{ 0xb100, 0x30,	 0x0,	0x0,   0x30    },
-	{ 0x95c0, 0x30,	 0x0,	0x0,   0x30    },
-	{ 0x54f8, 0x40,	 0x0,	0x0,   0x40    },
-	{ 0x200,  0x10,	 0x0,	0x0,   0x8     },
-	{ 0x9e70, 0x0,	 0x0,	0x0,   0x4     },
-	{ 0x7ca0, 0x40,	 0x0,	0x0,   0x30    },
-	{ 0xd00,  0x8,	 0x0,	0x0,   0x8     },
-	{ 0x2790, 0x80,	 0x0,	0x0,   0x38    },
-	{ 0xa520, 0xf0,	 0x0,	0x0,   0xf0    },
-	{ 0x80,	  0x8,	 0x0,	0x0,   0x8     },
-	{ 0xac0,  0x8,	 0x0,	0x0,   0x8     },
-	{ 0x2580, 0x8,	 0x0,	0x0,   0x8     },
-	{ 0x2500, 0x8,	 0x0,	0x0,   0x8     },
-	{ 0x440,  0x8,	 0x0,	0x0,   0x2     },
-	{ 0x1800, 0x8,	 0x0,	0x0,   0x2     },
-	{ 0x27c8, 0x80,	 0x0,	0x0,   0x10    },
-	{ 0x4710, 0x10,	 0x0,	0x0,   0x10    },
+#define TSTORM_ROCE_STAT_OFFSET(stat_counter_id) \
+			(IRO[43].base + ((stat_counter_id) * IRO[43].m1))
+#define TSTORM_ROCE_STAT_SIZE          (IRO[43].size)
+
+static const struct iro iro_arr[44] = {
+	{ 0x10,	   0x0,	   0x0,	   0x0,	   0x8	    },
+	{ 0x47c8,  0x60,   0x0,	   0x0,	   0x60	    },
+	{ 0x5e30,  0x20,   0x0,	   0x0,	   0x20	    },
+	{ 0x510,   0x8,	   0x0,	   0x0,	   0x4	    },
+	{ 0x490,   0x8,	   0x0,	   0x0,	   0x4	    },
+	{ 0x10,	   0x8,	   0x0,	   0x0,	   0x2	    },
+	{ 0x90,	   0x8,	   0x0,	   0x0,	   0x2	    },
+	{ 0x4940,  0x0,	   0x0,	   0x0,	   0x78	    },
+	{ 0x3de0,  0x0,	   0x0,	   0x0,	   0x78	    },
+	{ 0x2998,  0x0,	   0x0,	   0x0,	   0x78	    },
+	{ 0x4750,  0x0,	   0x0,	   0x0,	   0x78	    },
+	{ 0x56d0,  0x0,	   0x0,	   0x0,	   0x78	    },
+	{ 0x7e50,  0x0,	   0x0,	   0x0,	   0x78	    },
+	{ 0x100,   0x8,	   0x0,	   0x0,	   0x8	    },
+	{ 0x5c10,  0x10,   0x0,	   0x0,	   0x10	    },
+	{ 0xb508,  0x30,   0x0,	   0x0,	   0x30	    },
+	{ 0x95c0,  0x30,   0x0,	   0x0,	   0x30	    },
+	{ 0x58a0,  0x40,   0x0,	   0x0,	   0x40	    },
+	{ 0x200,   0x10,   0x0,	   0x0,	   0x8	    },
+	{ 0xa230,  0x0,	   0x0,	   0x0,	   0x4	    },
+	{ 0x8058,  0x40,   0x0,	   0x0,	   0x30	    },
+	{ 0xd00,   0x8,	   0x0,	   0x0,	   0x8	    },
+	{ 0x2b30,  0x80,   0x0,	   0x0,	   0x38	    },
+	{ 0xa808,  0x0,	   0x0,	   0x0,	   0xf0	    },
+	{ 0xa8f8,  0x8,	   0x0,	   0x0,	   0x8	    },
+	{ 0x80,	   0x8,	   0x0,	   0x0,	   0x8	    },
+	{ 0xac0,   0x8,	   0x0,	   0x0,	   0x8	    },
+	{ 0x2580,  0x8,	   0x0,	   0x0,	   0x8	    },
+	{ 0x2500,  0x8,	   0x0,	   0x0,	   0x8	    },
+	{ 0x440,   0x8,	   0x0,	   0x0,	   0x2	    },
+	{ 0x1800,  0x8,	   0x0,	   0x0,	   0x2	    },
+	{ 0x1a00,  0x10,   0x8,	   0x0,	   0x2	    },
+	{ 0x640,   0x10,   0x8,	   0x0,	   0x2	    },
+	{ 0xd9b8,  0x38,   0x0,	   0x0,	   0x24	    },
+	{ 0x11048, 0x10,   0x0,	   0x0,	   0x8	    },
+	{ 0x11678, 0x38,   0x0,	   0x0,	   0x18	    },
+	{ 0xaec0,  0x30,   0x0,	   0x0,	   0x10	    },
+	{ 0x8700,  0x28,   0x0,	   0x0,	   0x18	    },
+	{ 0xec00,  0x10,   0x0,	   0x0,	   0x10	    },
+	{ 0xde38,  0x40,   0x0,	   0x0,	   0x30	    },
+	{ 0x121a8, 0x38,   0x0,	   0x0,	   0x8	    },
+	{ 0xf068,  0x20,   0x0,	   0x0,	   0x20	    },
+	{ 0x2b68,  0x80,   0x0,	   0x0,	   0x10	    },
+	{ 0x4ab8,  0x10,   0x0,	   0x0,	   0x10	    },
 };
 
 /* Runtime array offsets */
@@ -1866,426 +1831,427 @@ static const struct iro iro_arr[31] = {
 #define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET                                14
 #define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET                                15
 #define DORQ_REG_PF_WAKE_ALL_RT_OFFSET                                  16
-#define IGU_REG_PF_CONFIGURATION_RT_OFFSET                              17
-#define IGU_REG_VF_CONFIGURATION_RT_OFFSET                              18
-#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET                               19
-#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET                               20
-#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET                            21
-#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET                           22
-#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET                             23
-#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET                                 760
+#define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET                               17
+#define IGU_REG_PF_CONFIGURATION_RT_OFFSET                              18
+#define IGU_REG_VF_CONFIGURATION_RT_OFFSET                              19
+#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET                               20
+#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET                               21
+#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET                            22
+#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET                           23
+#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET                             24
+#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET                                 761
 #define CAU_REG_SB_VAR_MEMORY_RT_SIZE                                   736
-#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET                                 760
+#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET                                 761
 #define CAU_REG_SB_VAR_MEMORY_RT_SIZE                                   736
-#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET                                1496
+#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET                                1497
 #define CAU_REG_SB_ADDR_MEMORY_RT_SIZE                                  736
-#define CAU_REG_PI_MEMORY_RT_OFFSET                                     2232
+#define CAU_REG_PI_MEMORY_RT_OFFSET                                     2233
 #define CAU_REG_PI_MEMORY_RT_SIZE                                       4416
-#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET                    6648
-#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET                      6649
-#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET                      6650
-#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET                         6651
-#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET                         6652
-#define PRS_REG_SEARCH_TCP_RT_OFFSET                                    6653
-#define PRS_REG_SEARCH_FCOE_RT_OFFSET                                   6654
-#define PRS_REG_SEARCH_ROCE_RT_OFFSET                                   6655
-#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET                           6656
-#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET                           6657
-#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET                               6658
-#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET                     6659
-#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET           6660
-#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET                      6661
-#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET                         6662
-#define SRC_REG_FIRSTFREE_RT_OFFSET                                     6663
+#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET                    6649
+#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET                      6650
+#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET                      6651
+#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET                         6652
+#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET                         6653
+#define PRS_REG_SEARCH_TCP_RT_OFFSET                                    6654
+#define PRS_REG_SEARCH_FCOE_RT_OFFSET                                   6655
+#define PRS_REG_SEARCH_ROCE_RT_OFFSET                                   6656
+#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET                           6657
+#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET                           6658
+#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET                               6659
+#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET                     6660
+#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET           6661
+#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET                      6662
+#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET                               6663
+#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET                         6664
+#define SRC_REG_FIRSTFREE_RT_OFFSET                                     6665
 #define SRC_REG_FIRSTFREE_RT_SIZE                                       2
-#define SRC_REG_LASTFREE_RT_OFFSET                                      6665
+#define SRC_REG_LASTFREE_RT_OFFSET                                      6667
 #define SRC_REG_LASTFREE_RT_SIZE                                        2
-#define SRC_REG_COUNTFREE_RT_OFFSET                                     6667
-#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET                              6668
-#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET                                6669
-#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET                                6670
-#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET                                  6671
-#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET                                  6672
-#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET                                 6673
-#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET                               6674
-#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET                                6675
-#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET                               6676
-#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET                                6677
-#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET                              6678
-#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET                               6679
-#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET                             6680
-#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET                              6681
-#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET                             6682
-#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET                              6683
-#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET                             6684
-#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET                              6685
-#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET                     6686
-#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET                   6687
-#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET                   6688
-#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET                               6689
-#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET                             6690
-#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET                             6691
-#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET                           6692
-#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET                         6693
-#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET                         6694
-#define PSWRQ2_REG_VF_BASE_RT_OFFSET                                    6695
-#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET                                6696
-#define PSWRQ2_REG_WR_MBS0_RT_OFFSET                                    6697
-#define PSWRQ2_REG_RD_MBS0_RT_OFFSET                                    6698
-#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET                              6699
-#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET                              6700
-#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET                                 6701
+#define SRC_REG_COUNTFREE_RT_OFFSET                                     6669
+#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET                              6670
+#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET                                6671
+#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET                                6672
+#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET                                  6673
+#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET                                  6674
+#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET                                 6675
+#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET                               6676
+#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET                                6677
+#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET                               6678
+#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET                                6679
+#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET                              6680
+#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET                               6681
+#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET                             6682
+#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET                              6683
+#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET                             6684
+#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET                              6685
+#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET                             6686
+#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET                              6687
+#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET                     6688
+#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET                   6689
+#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET                   6690
+#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET                               6691
+#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET                             6692
+#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET                             6693
+#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET                           6694
+#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET                         6695
+#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET                         6696
+#define PSWRQ2_REG_VF_BASE_RT_OFFSET                                    6697
+#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET                                6698
+#define PSWRQ2_REG_WR_MBS0_RT_OFFSET                                    6699
+#define PSWRQ2_REG_RD_MBS0_RT_OFFSET                                    6700
+#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET                              6701
+#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET                              6702
+#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET                                 6703
 #define PSWRQ2_REG_ILT_MEMORY_RT_SIZE                                   22000
-#define PGLUE_REG_B_VF_BASE_RT_OFFSET                                   28701
-#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET                           28702
-#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET                              28703
-#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET                              28704
-#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET                              28705
-#define TM_REG_VF_ENABLE_CONN_RT_OFFSET                                 28706
-#define TM_REG_PF_ENABLE_CONN_RT_OFFSET                                 28707
-#define TM_REG_PF_ENABLE_TASK_RT_OFFSET                                 28708
-#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET                     28709
-#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET                     28710
-#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET                                28711
+#define PGLUE_REG_B_VF_BASE_RT_OFFSET                                   28703
+#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET                           28704
+#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET                              28705
+#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET                              28706
+#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET                              28707
+#define TM_REG_VF_ENABLE_CONN_RT_OFFSET                                 28708
+#define TM_REG_PF_ENABLE_CONN_RT_OFFSET                                 28709
+#define TM_REG_PF_ENABLE_TASK_RT_OFFSET                                 28710
+#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET                     28711
+#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET                     28712
+#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET                                28713
 #define TM_REG_CONFIG_CONN_MEM_RT_SIZE                                  416
-#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET                                29127
+#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET                                29129
 #define TM_REG_CONFIG_TASK_MEM_RT_SIZE                                  512
-#define QM_REG_MAXPQSIZE_0_RT_OFFSET                                    29639
-#define QM_REG_MAXPQSIZE_1_RT_OFFSET                                    29640
-#define QM_REG_MAXPQSIZE_2_RT_OFFSET                                    29641
-#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET                               29642
-#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET                               29643
-#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET                               29644
-#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET                               29645
-#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET                               29646
-#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET                               29647
-#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET                               29648
-#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET                               29649
-#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET                               29650
-#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET                               29651
-#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET                              29652
-#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET                              29653
-#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET                              29654
-#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET                              29655
-#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET                              29656
-#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET                              29657
-#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET                              29658
-#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET                              29659
-#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET                              29660
-#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET                              29661
-#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET                              29662
-#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET                              29663
-#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET                              29664
-#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET                              29665
-#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET                              29666
-#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET                              29667
-#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET                              29668
-#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET                              29669
-#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET                              29670
-#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET                              29671
-#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET                              29672
-#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET                              29673
-#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET                              29674
-#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET                              29675
-#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET                              29676
-#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET                              29677
-#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET                              29678
-#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET                              29679
-#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET                              29680
-#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET                              29681
-#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET                              29682
-#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET                              29683
-#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET                              29684
-#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET                              29685
-#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET                              29686
-#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET                              29687
-#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET                              29688
-#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET                              29689
-#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET                              29690
-#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET                              29691
-#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET                              29692
-#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET                              29693
-#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET                              29694
-#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET                              29695
-#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET                              29696
-#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET                              29697
-#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET                              29698
-#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET                              29699
-#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET                              29700
-#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET                              29701
-#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET                              29702
-#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET                              29703
-#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET                              29704
-#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET                              29705
-#define QM_REG_BASEADDROTHERPQ_RT_OFFSET                                29706
+#define QM_REG_MAXPQSIZE_0_RT_OFFSET                                    29641
+#define QM_REG_MAXPQSIZE_1_RT_OFFSET                                    29642
+#define QM_REG_MAXPQSIZE_2_RT_OFFSET                                    29643
+#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET                               29644
+#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET                               29645
+#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET                               29646
+#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET                               29647
+#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET                               29648
+#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET                               29649
+#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET                               29650
+#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET                               29651
+#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET                               29652
+#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET                               29653
+#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET                              29654
+#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET                              29655
+#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET                              29656
+#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET                              29657
+#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET                              29658
+#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET                              29659
+#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET                              29660
+#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET                              29661
+#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET                              29662
+#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET                              29663
+#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET                              29664
+#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET                              29665
+#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET                              29666
+#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET                              29667
+#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET                              29668
+#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET                              29669
+#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET                              29670
+#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET                              29671
+#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET                              29672
+#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET                              29673
+#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET                              29674
+#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET                              29675
+#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET                              29676
+#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET                              29677
+#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET                              29678
+#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET                              29679
+#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET                              29680
+#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET                              29681
+#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET                              29682
+#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET                              29683
+#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET                              29684
+#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET                              29685
+#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET                              29686
+#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET                              29687
+#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET                              29688
+#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET                              29689
+#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET                              29690
+#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET                              29691
+#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET                              29692
+#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET                              29693
+#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET                              29694
+#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET                              29695
+#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET                              29696
+#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET                              29697
+#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET                              29698
+#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET                              29699
+#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET                              29700
+#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET                              29701
+#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET                              29702
+#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET                              29703
+#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET                              29704
+#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET                              29705
+#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET                              29706
+#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET                              29707
+#define QM_REG_BASEADDROTHERPQ_RT_OFFSET                                29708
 #define QM_REG_BASEADDROTHERPQ_RT_SIZE                                  128
-#define QM_REG_VOQCRDLINE_RT_OFFSET                                     29834
+#define QM_REG_VOQCRDLINE_RT_OFFSET                                     29836
 #define QM_REG_VOQCRDLINE_RT_SIZE                                       20
-#define QM_REG_VOQINITCRDLINE_RT_OFFSET                                 29854
+#define QM_REG_VOQINITCRDLINE_RT_OFFSET                                 29856
 #define QM_REG_VOQINITCRDLINE_RT_SIZE                                   20
-#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET                             29874
-#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET                             29875
-#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET                              29876
-#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET                            29877
-#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET                           29878
-#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET                                29879
-#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET                                29880
-#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET                                29881
-#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET                                29882
-#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET                                29883
-#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET                                29884
-#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET                                29885
-#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET                                29886
-#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET                                29887
-#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET                                29888
-#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET                               29889
-#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET                               29890
-#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET                               29891
-#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET                               29892
-#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET                               29893
-#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET                               29894
-#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET                            29895
-#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET                            29896
-#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET                            29897
-#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET                            29898
-#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET                               29899
-#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET                               29900
-#define QM_REG_PQTX2PF_0_RT_OFFSET                                      29901
-#define QM_REG_PQTX2PF_1_RT_OFFSET                                      29902
-#define QM_REG_PQTX2PF_2_RT_OFFSET                                      29903
-#define QM_REG_PQTX2PF_3_RT_OFFSET                                      29904
-#define QM_REG_PQTX2PF_4_RT_OFFSET                                      29905
-#define QM_REG_PQTX2PF_5_RT_OFFSET                                      29906
-#define QM_REG_PQTX2PF_6_RT_OFFSET                                      29907
-#define QM_REG_PQTX2PF_7_RT_OFFSET                                      29908
-#define QM_REG_PQTX2PF_8_RT_OFFSET                                      29909
-#define QM_REG_PQTX2PF_9_RT_OFFSET                                      29910
-#define QM_REG_PQTX2PF_10_RT_OFFSET                                     29911
-#define QM_REG_PQTX2PF_11_RT_OFFSET                                     29912
-#define QM_REG_PQTX2PF_12_RT_OFFSET                                     29913
-#define QM_REG_PQTX2PF_13_RT_OFFSET                                     29914
-#define QM_REG_PQTX2PF_14_RT_OFFSET                                     29915
-#define QM_REG_PQTX2PF_15_RT_OFFSET                                     29916
-#define QM_REG_PQTX2PF_16_RT_OFFSET                                     29917
-#define QM_REG_PQTX2PF_17_RT_OFFSET                                     29918
-#define QM_REG_PQTX2PF_18_RT_OFFSET                                     29919
-#define QM_REG_PQTX2PF_19_RT_OFFSET                                     29920
-#define QM_REG_PQTX2PF_20_RT_OFFSET                                     29921
-#define QM_REG_PQTX2PF_21_RT_OFFSET                                     29922
-#define QM_REG_PQTX2PF_22_RT_OFFSET                                     29923
-#define QM_REG_PQTX2PF_23_RT_OFFSET                                     29924
-#define QM_REG_PQTX2PF_24_RT_OFFSET                                     29925
-#define QM_REG_PQTX2PF_25_RT_OFFSET                                     29926
-#define QM_REG_PQTX2PF_26_RT_OFFSET                                     29927
-#define QM_REG_PQTX2PF_27_RT_OFFSET                                     29928
-#define QM_REG_PQTX2PF_28_RT_OFFSET                                     29929
-#define QM_REG_PQTX2PF_29_RT_OFFSET                                     29930
-#define QM_REG_PQTX2PF_30_RT_OFFSET                                     29931
-#define QM_REG_PQTX2PF_31_RT_OFFSET                                     29932
-#define QM_REG_PQTX2PF_32_RT_OFFSET                                     29933
-#define QM_REG_PQTX2PF_33_RT_OFFSET                                     29934
-#define QM_REG_PQTX2PF_34_RT_OFFSET                                     29935
-#define QM_REG_PQTX2PF_35_RT_OFFSET                                     29936
-#define QM_REG_PQTX2PF_36_RT_OFFSET                                     29937
-#define QM_REG_PQTX2PF_37_RT_OFFSET                                     29938
-#define QM_REG_PQTX2PF_38_RT_OFFSET                                     29939
-#define QM_REG_PQTX2PF_39_RT_OFFSET                                     29940
-#define QM_REG_PQTX2PF_40_RT_OFFSET                                     29941
-#define QM_REG_PQTX2PF_41_RT_OFFSET                                     29942
-#define QM_REG_PQTX2PF_42_RT_OFFSET                                     29943
-#define QM_REG_PQTX2PF_43_RT_OFFSET                                     29944
-#define QM_REG_PQTX2PF_44_RT_OFFSET                                     29945
-#define QM_REG_PQTX2PF_45_RT_OFFSET                                     29946
-#define QM_REG_PQTX2PF_46_RT_OFFSET                                     29947
-#define QM_REG_PQTX2PF_47_RT_OFFSET                                     29948
-#define QM_REG_PQTX2PF_48_RT_OFFSET                                     29949
-#define QM_REG_PQTX2PF_49_RT_OFFSET                                     29950
-#define QM_REG_PQTX2PF_50_RT_OFFSET                                     29951
-#define QM_REG_PQTX2PF_51_RT_OFFSET                                     29952
-#define QM_REG_PQTX2PF_52_RT_OFFSET                                     29953
-#define QM_REG_PQTX2PF_53_RT_OFFSET                                     29954
-#define QM_REG_PQTX2PF_54_RT_OFFSET                                     29955
-#define QM_REG_PQTX2PF_55_RT_OFFSET                                     29956
-#define QM_REG_PQTX2PF_56_RT_OFFSET                                     29957
-#define QM_REG_PQTX2PF_57_RT_OFFSET                                     29958
-#define QM_REG_PQTX2PF_58_RT_OFFSET                                     29959
-#define QM_REG_PQTX2PF_59_RT_OFFSET                                     29960
-#define QM_REG_PQTX2PF_60_RT_OFFSET                                     29961
-#define QM_REG_PQTX2PF_61_RT_OFFSET                                     29962
-#define QM_REG_PQTX2PF_62_RT_OFFSET                                     29963
-#define QM_REG_PQTX2PF_63_RT_OFFSET                                     29964
-#define QM_REG_PQOTHER2PF_0_RT_OFFSET                                   29965
-#define QM_REG_PQOTHER2PF_1_RT_OFFSET                                   29966
-#define QM_REG_PQOTHER2PF_2_RT_OFFSET                                   29967
-#define QM_REG_PQOTHER2PF_3_RT_OFFSET                                   29968
-#define QM_REG_PQOTHER2PF_4_RT_OFFSET                                   29969
-#define QM_REG_PQOTHER2PF_5_RT_OFFSET                                   29970
-#define QM_REG_PQOTHER2PF_6_RT_OFFSET                                   29971
-#define QM_REG_PQOTHER2PF_7_RT_OFFSET                                   29972
-#define QM_REG_PQOTHER2PF_8_RT_OFFSET                                   29973
-#define QM_REG_PQOTHER2PF_9_RT_OFFSET                                   29974
-#define QM_REG_PQOTHER2PF_10_RT_OFFSET                                  29975
-#define QM_REG_PQOTHER2PF_11_RT_OFFSET                                  29976
-#define QM_REG_PQOTHER2PF_12_RT_OFFSET                                  29977
-#define QM_REG_PQOTHER2PF_13_RT_OFFSET                                  29978
-#define QM_REG_PQOTHER2PF_14_RT_OFFSET                                  29979
-#define QM_REG_PQOTHER2PF_15_RT_OFFSET                                  29980
-#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET                                 29981
-#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET                                 29982
-#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET                            29983
-#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET                            29984
-#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET                              29985
-#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET                              29986
-#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET                              29987
-#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET                              29988
-#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET                              29989
-#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET                              29990
-#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET                              29991
-#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET                              29992
-#define QM_REG_RLGLBLINCVAL_RT_OFFSET                                   29993
+#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET                             29876
+#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET                             29877
+#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET                              29878
+#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET                            29879
+#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET                           29880
+#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET                                29881
+#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET                                29882
+#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET                                29883
+#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET                                29884
+#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET                                29885
+#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET                                29886
+#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET                                29887
+#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET                                29888
+#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET                                29889
+#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET                                29890
+#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET                               29891
+#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET                               29892
+#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET                               29893
+#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET                               29894
+#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET                               29895
+#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET                               29896
+#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET                            29897
+#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET                            29898
+#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET                            29899
+#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET                            29900
+#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET                               29901
+#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET                               29902
+#define QM_REG_PQTX2PF_0_RT_OFFSET                                      29903
+#define QM_REG_PQTX2PF_1_RT_OFFSET                                      29904
+#define QM_REG_PQTX2PF_2_RT_OFFSET                                      29905
+#define QM_REG_PQTX2PF_3_RT_OFFSET                                      29906
+#define QM_REG_PQTX2PF_4_RT_OFFSET                                      29907
+#define QM_REG_PQTX2PF_5_RT_OFFSET                                      29908
+#define QM_REG_PQTX2PF_6_RT_OFFSET                                      29909
+#define QM_REG_PQTX2PF_7_RT_OFFSET                                      29910
+#define QM_REG_PQTX2PF_8_RT_OFFSET                                      29911
+#define QM_REG_PQTX2PF_9_RT_OFFSET                                      29912
+#define QM_REG_PQTX2PF_10_RT_OFFSET                                     29913
+#define QM_REG_PQTX2PF_11_RT_OFFSET                                     29914
+#define QM_REG_PQTX2PF_12_RT_OFFSET                                     29915
+#define QM_REG_PQTX2PF_13_RT_OFFSET                                     29916
+#define QM_REG_PQTX2PF_14_RT_OFFSET                                     29917
+#define QM_REG_PQTX2PF_15_RT_OFFSET                                     29918
+#define QM_REG_PQTX2PF_16_RT_OFFSET                                     29919
+#define QM_REG_PQTX2PF_17_RT_OFFSET                                     29920
+#define QM_REG_PQTX2PF_18_RT_OFFSET                                     29921
+#define QM_REG_PQTX2PF_19_RT_OFFSET                                     29922
+#define QM_REG_PQTX2PF_20_RT_OFFSET                                     29923
+#define QM_REG_PQTX2PF_21_RT_OFFSET                                     29924
+#define QM_REG_PQTX2PF_22_RT_OFFSET                                     29925
+#define QM_REG_PQTX2PF_23_RT_OFFSET                                     29926
+#define QM_REG_PQTX2PF_24_RT_OFFSET                                     29927
+#define QM_REG_PQTX2PF_25_RT_OFFSET                                     29928
+#define QM_REG_PQTX2PF_26_RT_OFFSET                                     29929
+#define QM_REG_PQTX2PF_27_RT_OFFSET                                     29930
+#define QM_REG_PQTX2PF_28_RT_OFFSET                                     29931
+#define QM_REG_PQTX2PF_29_RT_OFFSET                                     29932
+#define QM_REG_PQTX2PF_30_RT_OFFSET                                     29933
+#define QM_REG_PQTX2PF_31_RT_OFFSET                                     29934
+#define QM_REG_PQTX2PF_32_RT_OFFSET                                     29935
+#define QM_REG_PQTX2PF_33_RT_OFFSET                                     29936
+#define QM_REG_PQTX2PF_34_RT_OFFSET                                     29937
+#define QM_REG_PQTX2PF_35_RT_OFFSET                                     29938
+#define QM_REG_PQTX2PF_36_RT_OFFSET                                     29939
+#define QM_REG_PQTX2PF_37_RT_OFFSET                                     29940
+#define QM_REG_PQTX2PF_38_RT_OFFSET                                     29941
+#define QM_REG_PQTX2PF_39_RT_OFFSET                                     29942
+#define QM_REG_PQTX2PF_40_RT_OFFSET                                     29943
+#define QM_REG_PQTX2PF_41_RT_OFFSET                                     29944
+#define QM_REG_PQTX2PF_42_RT_OFFSET                                     29945
+#define QM_REG_PQTX2PF_43_RT_OFFSET                                     29946
+#define QM_REG_PQTX2PF_44_RT_OFFSET                                     29947
+#define QM_REG_PQTX2PF_45_RT_OFFSET                                     29948
+#define QM_REG_PQTX2PF_46_RT_OFFSET                                     29949
+#define QM_REG_PQTX2PF_47_RT_OFFSET                                     29950
+#define QM_REG_PQTX2PF_48_RT_OFFSET                                     29951
+#define QM_REG_PQTX2PF_49_RT_OFFSET                                     29952
+#define QM_REG_PQTX2PF_50_RT_OFFSET                                     29953
+#define QM_REG_PQTX2PF_51_RT_OFFSET                                     29954
+#define QM_REG_PQTX2PF_52_RT_OFFSET                                     29955
+#define QM_REG_PQTX2PF_53_RT_OFFSET                                     29956
+#define QM_REG_PQTX2PF_54_RT_OFFSET                                     29957
+#define QM_REG_PQTX2PF_55_RT_OFFSET                                     29958
+#define QM_REG_PQTX2PF_56_RT_OFFSET                                     29959
+#define QM_REG_PQTX2PF_57_RT_OFFSET                                     29960
+#define QM_REG_PQTX2PF_58_RT_OFFSET                                     29961
+#define QM_REG_PQTX2PF_59_RT_OFFSET                                     29962
+#define QM_REG_PQTX2PF_60_RT_OFFSET                                     29963
+#define QM_REG_PQTX2PF_61_RT_OFFSET                                     29964
+#define QM_REG_PQTX2PF_62_RT_OFFSET                                     29965
+#define QM_REG_PQTX2PF_63_RT_OFFSET                                     29966
+#define QM_REG_PQOTHER2PF_0_RT_OFFSET                                   29967
+#define QM_REG_PQOTHER2PF_1_RT_OFFSET                                   29968
+#define QM_REG_PQOTHER2PF_2_RT_OFFSET                                   29969
+#define QM_REG_PQOTHER2PF_3_RT_OFFSET                                   29970
+#define QM_REG_PQOTHER2PF_4_RT_OFFSET                                   29971
+#define QM_REG_PQOTHER2PF_5_RT_OFFSET                                   29972
+#define QM_REG_PQOTHER2PF_6_RT_OFFSET                                   29973
+#define QM_REG_PQOTHER2PF_7_RT_OFFSET                                   29974
+#define QM_REG_PQOTHER2PF_8_RT_OFFSET                                   29975
+#define QM_REG_PQOTHER2PF_9_RT_OFFSET                                   29976
+#define QM_REG_PQOTHER2PF_10_RT_OFFSET                                  29977
+#define QM_REG_PQOTHER2PF_11_RT_OFFSET                                  29978
+#define QM_REG_PQOTHER2PF_12_RT_OFFSET                                  29979
+#define QM_REG_PQOTHER2PF_13_RT_OFFSET                                  29980
+#define QM_REG_PQOTHER2PF_14_RT_OFFSET                                  29981
+#define QM_REG_PQOTHER2PF_15_RT_OFFSET                                  29982
+#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET                                 29983
+#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET                                 29984
+#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET                            29985
+#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET                            29986
+#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET                              29987
+#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET                              29988
+#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET                              29989
+#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET                              29990
+#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET                              29991
+#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET                              29992
+#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET                              29993
+#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET                              29994
+#define QM_REG_RLGLBLINCVAL_RT_OFFSET                                   29995
 #define QM_REG_RLGLBLINCVAL_RT_SIZE                                     256
-#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET                               30249
+#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET                               30251
 #define QM_REG_RLGLBLUPPERBOUND_RT_SIZE                                 256
-#define QM_REG_RLGLBLCRD_RT_OFFSET                                      30505
+#define QM_REG_RLGLBLCRD_RT_OFFSET                                      30507
 #define QM_REG_RLGLBLCRD_RT_SIZE                                        256
-#define QM_REG_RLGLBLENABLE_RT_OFFSET                                   30761
-#define QM_REG_RLPFPERIOD_RT_OFFSET                                     30762
-#define QM_REG_RLPFPERIODTIMER_RT_OFFSET                                30763
-#define QM_REG_RLPFINCVAL_RT_OFFSET                                     30764
+#define QM_REG_RLGLBLENABLE_RT_OFFSET                                   30763
+#define QM_REG_RLPFPERIOD_RT_OFFSET                                     30764
+#define QM_REG_RLPFPERIODTIMER_RT_OFFSET                                30765
+#define QM_REG_RLPFINCVAL_RT_OFFSET                                     30766
 #define QM_REG_RLPFINCVAL_RT_SIZE                                       16
-#define QM_REG_RLPFUPPERBOUND_RT_OFFSET                                 30780
+#define QM_REG_RLPFUPPERBOUND_RT_OFFSET                                 30782
 #define QM_REG_RLPFUPPERBOUND_RT_SIZE                                   16
-#define QM_REG_RLPFCRD_RT_OFFSET                                        30796
+#define QM_REG_RLPFCRD_RT_OFFSET                                        30798
 #define QM_REG_RLPFCRD_RT_SIZE                                          16
-#define QM_REG_RLPFENABLE_RT_OFFSET                                     30812
-#define QM_REG_RLPFVOQENABLE_RT_OFFSET                                  30813
-#define QM_REG_WFQPFWEIGHT_RT_OFFSET                                    30814
+#define QM_REG_RLPFENABLE_RT_OFFSET                                     30814
+#define QM_REG_RLPFVOQENABLE_RT_OFFSET                                  30815
+#define QM_REG_WFQPFWEIGHT_RT_OFFSET                                    30816
 #define QM_REG_WFQPFWEIGHT_RT_SIZE                                      16
-#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET                                30830
+#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET                                30832
 #define QM_REG_WFQPFUPPERBOUND_RT_SIZE                                  16
-#define QM_REG_WFQPFCRD_RT_OFFSET                                       30846
+#define QM_REG_WFQPFCRD_RT_OFFSET                                       30848
 #define QM_REG_WFQPFCRD_RT_SIZE                                         160
-#define QM_REG_WFQPFENABLE_RT_OFFSET                                    31006
-#define QM_REG_WFQVPENABLE_RT_OFFSET                                    31007
-#define QM_REG_BASEADDRTXPQ_RT_OFFSET                                   31008
+#define QM_REG_WFQPFENABLE_RT_OFFSET                                    31008
+#define QM_REG_WFQVPENABLE_RT_OFFSET                                    31009
+#define QM_REG_BASEADDRTXPQ_RT_OFFSET                                   31010
 #define QM_REG_BASEADDRTXPQ_RT_SIZE                                     512
-#define QM_REG_TXPQMAP_RT_OFFSET                                        31520
+#define QM_REG_TXPQMAP_RT_OFFSET                                        31522
 #define QM_REG_TXPQMAP_RT_SIZE                                          512
-#define QM_REG_WFQVPWEIGHT_RT_OFFSET                                    32032
+#define QM_REG_WFQVPWEIGHT_RT_OFFSET                                    32034
 #define QM_REG_WFQVPWEIGHT_RT_SIZE                                      512
-#define QM_REG_WFQVPUPPERBOUND_RT_OFFSET                                32544
-#define QM_REG_WFQVPUPPERBOUND_RT_SIZE                                  512
-#define QM_REG_WFQVPCRD_RT_OFFSET                                       33056
+#define QM_REG_WFQVPCRD_RT_OFFSET                                       32546
 #define QM_REG_WFQVPCRD_RT_SIZE                                         512
-#define QM_REG_WFQVPMAP_RT_OFFSET                                       33568
+#define QM_REG_WFQVPMAP_RT_OFFSET                                       33058
 #define QM_REG_WFQVPMAP_RT_SIZE                                         512
-#define QM_REG_WFQPFCRD_MSB_RT_OFFSET                                   34080
+#define QM_REG_WFQPFCRD_MSB_RT_OFFSET                                   33570
 #define QM_REG_WFQPFCRD_MSB_RT_SIZE                                     160
-#define NIG_REG_LLH_CLS_TYPE_DUALMODE_RT_OFFSET                         34240
-#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET                         34241
-#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET                         34242
-#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET                         34243
-#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET                         34244
-#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET                          34245
-#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET                      34246
-#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET                               34247
+#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET                               33730
+#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET                         33731
+#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET                         33732
+#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET                         33733
+#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET                         33734
+#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET                          33735
+#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET                      33736
+#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET                               33737
 #define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE                                 4
-#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET                          34251
+#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET                          33741
 #define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE                            4
-#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET                            34255
+#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET                            33745
 #define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE                              4
-#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET                               34259
-#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET                         34260
+#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET                               33749
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET                         33750
 #define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE                           32
-#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET                            34292
+#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET                            33782
 #define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE                              16
-#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET                          34308
+#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET                          33798
 #define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE                            16
-#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET                 34324
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET                 33814
 #define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE                   16
-#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET                       34340
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET                       33830
 #define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE                         16
-#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET                                  34356
-#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET                               34357
-#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET                               34358
-#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET                               34359
-#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET                           34360
-#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET                           34361
-#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET                           34362
-#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET                           34363
-#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET                        34364
-#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET                        34365
-#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET                        34366
-#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET                        34367
-#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET                            34368
-#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET                         34369
-#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET                          34370
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET                        34371
-#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET                           34372
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET                    34373
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET                        34374
-#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET                           34375
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET                    34376
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET                        34377
-#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET                           34378
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET                    34379
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET                        34380
-#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET                           34381
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET                    34382
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET                        34383
-#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET                           34384
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET                    34385
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET                        34386
-#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET                           34387
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET                    34388
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET                        34389
-#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET                           34390
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET                    34391
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET                        34392
-#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET                           34393
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET                    34394
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET                        34395
-#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET                           34396
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET                    34397
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET                        34398
-#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET                           34399
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET                    34400
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET                       34401
-#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET                          34402
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET                   34403
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET                       34404
-#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET                          34405
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET                   34406
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET                       34407
-#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET                          34408
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET                   34409
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET                       34410
-#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET                          34411
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET                   34412
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET                       34413
-#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET                          34414
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET                   34415
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET                       34416
-#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET                          34417
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET                   34418
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET                       34419
-#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET                          34420
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET                   34421
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET                       34422
-#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET                          34423
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET                   34424
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET                       34425
-#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET                          34426
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET                   34427
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET                       34428
-#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET                          34429
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET                   34430
-#define XCM_REG_CON_PHY_Q3_RT_OFFSET                                    34431
-
-#define RUNTIME_ARRAY_SIZE 34432
+#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET                                  33846
+#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET                               33847
+#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET                               33848
+#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET                               33849
+#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET                           33850
+#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET                           33851
+#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET                           33852
+#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET                           33853
+#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET                        33854
+#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET                        33855
+#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET                        33856
+#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET                        33857
+#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET                            33858
+#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET                         33859
+#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET                               33860
+#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET                          33861
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET                        33862
+#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET                           33863
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET                    33864
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET                        33865
+#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET                           33866
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET                    33867
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET                        33868
+#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET                           33869
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET                    33870
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET                        33871
+#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET                           33872
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET                    33873
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET                        33874
+#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET                           33875
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET                    33876
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET                        33877
+#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET                           33878
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET                    33879
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET                        33880
+#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET                           33881
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET                    33882
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET                        33883
+#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET                           33884
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET                    33885
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET                        33886
+#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET                           33887
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET                    33888
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET                        33889
+#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET                           33890
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET                    33891
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET                       33892
+#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET                          33893
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET                   33894
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET                       33895
+#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET                          33896
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET                   33897
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET                       33898
+#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET                          33899
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET                   33900
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET                       33901
+#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET                          33902
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET                   33903
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET                       33904
+#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET                          33905
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET                   33906
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET                       33907
+#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET                          33908
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET                   33909
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET                       33910
+#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET                          33911
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET                   33912
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET                       33913
+#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET                          33914
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET                   33915
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET                       33916
+#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET                          33917
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET                   33918
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET                       33919
+#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET                          33920
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET                   33921
+#define XCM_REG_CON_PHY_Q3_RT_OFFSET                                    33922
+
+#define RUNTIME_ARRAY_SIZE 33923
 
-/* The eth storm context for the Ystorm */
-struct ystorm_eth_conn_st_ctx {
+/* The eth storm context for the Tstorm */
+struct tstorm_eth_conn_st_ctx {
 	__le32 reserved[4];
 };
 
@@ -2535,41 +2501,253 @@ struct xstorm_eth_conn_ag_ctx {
 	__le32	reg7 /* reg7 */;
 	__le32	reg8 /* reg8 */;
 	__le32	reg9 /* reg9 */;
-	u8	byte7 /* byte7 */;
-	u8	byte8 /* byte8 */;
-	u8	byte9 /* byte9 */;
-	u8	byte10 /* byte10 */;
-	u8	byte11 /* byte11 */;
-	u8	byte12 /* byte12 */;
-	u8	byte13 /* byte13 */;
-	u8	byte14 /* byte14 */;
-	u8	byte15 /* byte15 */;
-	u8	byte16 /* byte16 */;
-	__le16	word11 /* word11 */;
+	u8	byte7 /* byte7 */;
+	u8	byte8 /* byte8 */;
+	u8	byte9 /* byte9 */;
+	u8	byte10 /* byte10 */;
+	u8	byte11 /* byte11 */;
+	u8	byte12 /* byte12 */;
+	u8	byte13 /* byte13 */;
+	u8	byte14 /* byte14 */;
+	u8	byte15 /* byte15 */;
+	u8	byte16 /* byte16 */;
+	__le16	word11 /* word11 */;
+	__le32	reg10 /* reg10 */;
+	__le32	reg11 /* reg11 */;
+	__le32	reg12 /* reg12 */;
+	__le32	reg13 /* reg13 */;
+	__le32	reg14 /* reg14 */;
+	__le32	reg15 /* reg15 */;
+	__le32	reg16 /* reg16 */;
+	__le32	reg17 /* reg17 */;
+	__le32	reg18 /* reg18 */;
+	__le32	reg19 /* reg19 */;
+	__le16	word12 /* word12 */;
+	__le16	word13 /* word13 */;
+	__le16	word14 /* word14 */;
+	__le16	word15 /* word15 */;
+};
+
+/* The eth storm context for the Ystorm */
+struct ystorm_eth_conn_st_ctx {
+	__le32 reserved[8];
+};
+
+struct ystorm_eth_conn_ag_ctx {
+	u8	byte0 /* cdu_validation */;
+	u8	byte1 /* state */;
+	u8	flags0;
+#define YSTORM_ETH_CONN_AG_CTX_BIT0_MASK                  0x1 /* exist_in_qm0 */
+#define YSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT                 0
+#define YSTORM_ETH_CONN_AG_CTX_BIT1_MASK                  0x1 /* exist_in_qm1 */
+#define YSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT                 1
+#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK     0x3   /* cf0 */
+#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT    2
+#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_MASK      0x3   /* cf1 */
+#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_SHIFT     4
+#define YSTORM_ETH_CONN_AG_CTX_CF2_MASK                   0x3   /* cf2 */
+#define YSTORM_ETH_CONN_AG_CTX_CF2_SHIFT                  6
+	u8 flags1;
+#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK  0x1   /* cf0en */
+#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT 0
+#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_EN_MASK   0x1   /* cf1en */
+#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_EN_SHIFT  1
+#define YSTORM_ETH_CONN_AG_CTX_CF2EN_MASK                 0x1   /* cf2en */
+#define YSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT                2
+#define YSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK               0x1   /* rule0en */
+#define YSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT              3
+#define YSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK               0x1   /* rule1en */
+#define YSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT              4
+#define YSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK               0x1   /* rule2en */
+#define YSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT              5
+#define YSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK               0x1   /* rule3en */
+#define YSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT              6
+#define YSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK               0x1   /* rule4en */
+#define YSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT              7
+	u8	byte2 /* byte2 */;
+	u8	byte3 /* byte3 */;
+	__le16	word0 /* word0 */;
+	__le32	terminate_spqe /* reg0 */;
+	__le32	reg1 /* reg1 */;
+	__le16	tx_bd_cons_upd /* word1 */;
+	__le16	word2 /* word2 */;
+	__le16	word3 /* word3 */;
+	__le16	word4 /* word4 */;
+	__le32	reg2 /* reg2 */;
+	__le32	reg3 /* reg3 */;
+};
+
+struct tstorm_eth_conn_ag_ctx {
+	u8	byte0 /* cdu_validation */;
+	u8	byte1 /* state */;
+	u8	flags0;
+#define TSTORM_ETH_CONN_AG_CTX_BIT0_MASK      0x1       /* exist_in_qm0 */
+#define TSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT     0
+#define TSTORM_ETH_CONN_AG_CTX_BIT1_MASK      0x1       /* exist_in_qm1 */
+#define TSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT     1
+#define TSTORM_ETH_CONN_AG_CTX_BIT2_MASK      0x1       /* bit2 */
+#define TSTORM_ETH_CONN_AG_CTX_BIT2_SHIFT     2
+#define TSTORM_ETH_CONN_AG_CTX_BIT3_MASK      0x1       /* bit3 */
+#define TSTORM_ETH_CONN_AG_CTX_BIT3_SHIFT     3
+#define TSTORM_ETH_CONN_AG_CTX_BIT4_MASK      0x1       /* bit4 */
+#define TSTORM_ETH_CONN_AG_CTX_BIT4_SHIFT     4
+#define TSTORM_ETH_CONN_AG_CTX_BIT5_MASK      0x1       /* bit5 */
+#define TSTORM_ETH_CONN_AG_CTX_BIT5_SHIFT     5
+#define TSTORM_ETH_CONN_AG_CTX_CF0_MASK       0x3       /* timer0cf */
+#define TSTORM_ETH_CONN_AG_CTX_CF0_SHIFT      6
+	u8 flags1;
+#define TSTORM_ETH_CONN_AG_CTX_CF1_MASK       0x3       /* timer1cf */
+#define TSTORM_ETH_CONN_AG_CTX_CF1_SHIFT      0
+#define TSTORM_ETH_CONN_AG_CTX_CF2_MASK       0x3       /* timer2cf */
+#define TSTORM_ETH_CONN_AG_CTX_CF2_SHIFT      2
+#define TSTORM_ETH_CONN_AG_CTX_CF3_MASK       0x3       /* timer_stop_all */
+#define TSTORM_ETH_CONN_AG_CTX_CF3_SHIFT      4
+#define TSTORM_ETH_CONN_AG_CTX_CF4_MASK       0x3       /* cf4 */
+#define TSTORM_ETH_CONN_AG_CTX_CF4_SHIFT      6
+	u8 flags2;
+#define TSTORM_ETH_CONN_AG_CTX_CF5_MASK       0x3       /* cf5 */
+#define TSTORM_ETH_CONN_AG_CTX_CF5_SHIFT      0
+#define TSTORM_ETH_CONN_AG_CTX_CF6_MASK       0x3       /* cf6 */
+#define TSTORM_ETH_CONN_AG_CTX_CF6_SHIFT      2
+#define TSTORM_ETH_CONN_AG_CTX_CF7_MASK       0x3       /* cf7 */
+#define TSTORM_ETH_CONN_AG_CTX_CF7_SHIFT      4
+#define TSTORM_ETH_CONN_AG_CTX_CF8_MASK       0x3       /* cf8 */
+#define TSTORM_ETH_CONN_AG_CTX_CF8_SHIFT      6
+	u8 flags3;
+#define TSTORM_ETH_CONN_AG_CTX_CF9_MASK       0x3       /* cf9 */
+#define TSTORM_ETH_CONN_AG_CTX_CF9_SHIFT      0
+#define TSTORM_ETH_CONN_AG_CTX_CF10_MASK      0x3       /* cf10 */
+#define TSTORM_ETH_CONN_AG_CTX_CF10_SHIFT     2
+#define TSTORM_ETH_CONN_AG_CTX_CF0EN_MASK     0x1       /* cf0en */
+#define TSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT    4
+#define TSTORM_ETH_CONN_AG_CTX_CF1EN_MASK     0x1       /* cf1en */
+#define TSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT    5
+#define TSTORM_ETH_CONN_AG_CTX_CF2EN_MASK     0x1       /* cf2en */
+#define TSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT    6
+#define TSTORM_ETH_CONN_AG_CTX_CF3EN_MASK     0x1       /* cf3en */
+#define TSTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT    7
+	u8 flags4;
+#define TSTORM_ETH_CONN_AG_CTX_CF4EN_MASK     0x1       /* cf4en */
+#define TSTORM_ETH_CONN_AG_CTX_CF4EN_SHIFT    0
+#define TSTORM_ETH_CONN_AG_CTX_CF5EN_MASK     0x1       /* cf5en */
+#define TSTORM_ETH_CONN_AG_CTX_CF5EN_SHIFT    1
+#define TSTORM_ETH_CONN_AG_CTX_CF6EN_MASK     0x1       /* cf6en */
+#define TSTORM_ETH_CONN_AG_CTX_CF6EN_SHIFT    2
+#define TSTORM_ETH_CONN_AG_CTX_CF7EN_MASK     0x1       /* cf7en */
+#define TSTORM_ETH_CONN_AG_CTX_CF7EN_SHIFT    3
+#define TSTORM_ETH_CONN_AG_CTX_CF8EN_MASK     0x1       /* cf8en */
+#define TSTORM_ETH_CONN_AG_CTX_CF8EN_SHIFT    4
+#define TSTORM_ETH_CONN_AG_CTX_CF9EN_MASK     0x1       /* cf9en */
+#define TSTORM_ETH_CONN_AG_CTX_CF9EN_SHIFT    5
+#define TSTORM_ETH_CONN_AG_CTX_CF10EN_MASK    0x1       /* cf10en */
+#define TSTORM_ETH_CONN_AG_CTX_CF10EN_SHIFT   6
+#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK   0x1       /* rule0en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT  7
+	u8 flags5;
+#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK   0x1       /* rule1en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT  0
+#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK   0x1       /* rule2en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT  1
+#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK   0x1       /* rule3en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT  2
+#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK   0x1       /* rule4en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT  3
+#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_MASK   0x1       /* rule5en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT  4
+#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_MASK  0x1       /* rule6en */
+#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_SHIFT 5
+#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_MASK   0x1       /* rule7en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT  6
+#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_MASK   0x1       /* rule8en */
+#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT  7
+	__le32	reg0 /* reg0 */;
+	__le32	reg1 /* reg1 */;
+	__le32	reg2 /* reg2 */;
+	__le32	reg3 /* reg3 */;
+	__le32	reg4 /* reg4 */;
+	__le32	reg5 /* reg5 */;
+	__le32	reg6 /* reg6 */;
+	__le32	reg7 /* reg7 */;
+	__le32	reg8 /* reg8 */;
+	u8	byte2 /* byte2 */;
+	u8	byte3 /* byte3 */;
+	__le16	rx_bd_cons /* word0 */;
+	u8	byte4 /* byte4 */;
+	u8	byte5 /* byte5 */;
+	__le16	rx_bd_prod /* word1 */;
+	__le16	word2 /* conn_dpi */;
+	__le16	word3 /* word3 */;
+	__le32	reg9 /* reg9 */;
 	__le32	reg10 /* reg10 */;
-	__le32	reg11 /* reg11 */;
-	__le32	reg12 /* reg12 */;
-	__le32	reg13 /* reg13 */;
-	__le32	reg14 /* reg14 */;
-	__le32	reg15 /* reg15 */;
-	__le32	reg16 /* reg16 */;
-	__le32	reg17 /* reg17 */;
-	__le32	reg18 /* reg18 */;
-	__le32	reg19 /* reg19 */;
-	__le16	word12 /* word12 */;
-	__le16	word13 /* word13 */;
-	__le16	word14 /* word14 */;
-	__le16	word15 /* word15 */;
-};
-
-/* The eth storm context for the Tstorm */
-struct tstorm_eth_conn_st_ctx {
-	__le32 reserved[4];
 };
 
-/* The eth storm context for the Mstorm */
-struct mstorm_eth_conn_st_ctx {
-	__le32 reserved[8];
+struct ustorm_eth_conn_ag_ctx {
+	u8	byte0 /* cdu_validation */;
+	u8	byte1 /* state */;
+	u8	flags0;
+#define USTORM_ETH_CONN_AG_CTX_BIT0_MASK                  0x1 /* exist_in_qm0 */
+#define USTORM_ETH_CONN_AG_CTX_BIT0_SHIFT                 0
+#define USTORM_ETH_CONN_AG_CTX_BIT1_MASK                  0x1 /* exist_in_qm1 */
+#define USTORM_ETH_CONN_AG_CTX_BIT1_SHIFT                 1
+#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_MASK   0x3 /* timer0cf */
+#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_SHIFT  2
+#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_MASK   0x3 /* timer1cf */
+#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_SHIFT  4
+#define USTORM_ETH_CONN_AG_CTX_CF2_MASK                   0x3 /* timer2cf */
+#define USTORM_ETH_CONN_AG_CTX_CF2_SHIFT                  6
+	u8 flags1;
+#define USTORM_ETH_CONN_AG_CTX_CF3_MASK                 0x3 /* timer_stop_all */
+#define USTORM_ETH_CONN_AG_CTX_CF3_SHIFT                0
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_MASK           0x3 /* cf4 */
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_SHIFT          2
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_MASK           0x3 /* cf5 */
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_SHIFT          4
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK   0x3 /* cf6 */
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT  6
+	u8 flags2;
+#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_EN_MASK  0x1 /* cf0en */
+#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_EN_SHIFT 0
+#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_EN_MASK  0x1 /* cf1en */
+#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_EN_SHIFT 1
+#define USTORM_ETH_CONN_AG_CTX_CF2EN_MASK                   0x1 /* cf2en */
+#define USTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT                  2
+#define USTORM_ETH_CONN_AG_CTX_CF3EN_MASK                   0x1 /* cf3en */
+#define USTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT                  3
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_MASK            0x1 /* cf4en */
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_SHIFT           4
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_MASK            0x1 /* cf5en */
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_SHIFT           5
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK    0x1 /* cf6en */
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT   6
+#define USTORM_ETH_CONN_AG_CTX_RULE0EN_MASK                 0x1 /* rule0en */
+#define USTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT                7
+	u8 flags3;
+#define USTORM_ETH_CONN_AG_CTX_RULE1EN_MASK                 0x1 /* rule1en */
+#define USTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT                0
+#define USTORM_ETH_CONN_AG_CTX_RULE2EN_MASK                 0x1 /* rule2en */
+#define USTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT                1
+#define USTORM_ETH_CONN_AG_CTX_RULE3EN_MASK                 0x1 /* rule3en */
+#define USTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT                2
+#define USTORM_ETH_CONN_AG_CTX_RULE4EN_MASK                 0x1 /* rule4en */
+#define USTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT                3
+#define USTORM_ETH_CONN_AG_CTX_RULE5EN_MASK                 0x1 /* rule5en */
+#define USTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT                4
+#define USTORM_ETH_CONN_AG_CTX_RULE6EN_MASK                 0x1 /* rule6en */
+#define USTORM_ETH_CONN_AG_CTX_RULE6EN_SHIFT                5
+#define USTORM_ETH_CONN_AG_CTX_RULE7EN_MASK                 0x1 /* rule7en */
+#define USTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT                6
+#define USTORM_ETH_CONN_AG_CTX_RULE8EN_MASK                 0x1 /* rule8en */
+#define USTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT                7
+	u8	byte2 /* byte2 */;
+	u8	byte3 /* byte3 */;
+	__le16	word0 /* conn_dpi */;
+	__le16	tx_bd_cons /* word1 */;
+	__le32	reg0 /* reg0 */;
+	__le32	reg1 /* reg1 */;
+	__le32	reg2 /* reg2 */;
+	__le32	tx_int_coallecing_timeset /* reg3 */;
+	__le16	tx_drv_bd_cons /* word2 */;
+	__le16	rx_drv_cqe_cons /* word3 */;
 };
 
 /* The eth storm context for the Ustorm */
@@ -2577,24 +2755,30 @@ struct ustorm_eth_conn_st_ctx {
 	__le32 reserved[40];
 };
 
+/* The eth storm context for the Mstorm */
+struct mstorm_eth_conn_st_ctx {
+	__le32 reserved[8];
+};
+
 /* eth connection context */
 struct eth_conn_context {
-	struct ystorm_eth_conn_st_ctx	ystorm_st_context;
-	struct regpair			ystorm_st_padding[2] /* padding */;
+	struct tstorm_eth_conn_st_ctx	tstorm_st_context;
+	struct regpair			tstorm_st_padding[2];
 	struct pstorm_eth_conn_st_ctx	pstorm_st_context;
-	struct regpair			pstorm_st_padding[2] /* padding */;
 	struct xstorm_eth_conn_st_ctx	xstorm_st_context;
 	struct xstorm_eth_conn_ag_ctx	xstorm_ag_context;
-	struct tstorm_eth_conn_st_ctx	tstorm_st_context;
-	struct regpair			tstorm_st_padding[2] /* padding */;
-	struct mstorm_eth_conn_st_ctx	mstorm_st_context;
+	struct ystorm_eth_conn_st_ctx	ystorm_st_context;
+	struct ystorm_eth_conn_ag_ctx	ystorm_ag_context;
+	struct tstorm_eth_conn_ag_ctx	tstorm_ag_context;
+	struct ustorm_eth_conn_ag_ctx	ustorm_ag_context;
 	struct ustorm_eth_conn_st_ctx	ustorm_st_context;
+	struct mstorm_eth_conn_st_ctx	mstorm_st_context;
 };
 
 enum eth_filter_action {
 	ETH_FILTER_ACTION_REMOVE,
 	ETH_FILTER_ACTION_ADD,
-	ETH_FILTER_ACTION_REPLACE,
+	ETH_FILTER_ACTION_REMOVE_ALL,
 	MAX_ETH_FILTER_ACTION
 };
 
@@ -2653,6 +2837,32 @@ enum eth_ramrod_cmd_id {
 	MAX_ETH_RAMROD_CMD_ID
 };
 
+enum eth_tx_err {
+	ETH_TX_ERR_DROP /* Drop erronous packet. */,
+	ETH_TX_ERR_ASSERT_MALICIOUS,
+	MAX_ETH_TX_ERR
+};
+
+struct eth_tx_err_vals {
+	__le16 values;
+#define ETH_TX_ERR_VALS_ILLEGAL_VLAN_MODE_MASK            0x1
+#define ETH_TX_ERR_VALS_ILLEGAL_VLAN_MODE_SHIFT           0
+#define ETH_TX_ERR_VALS_PACKET_TOO_SMALL_MASK             0x1
+#define ETH_TX_ERR_VALS_PACKET_TOO_SMALL_SHIFT            1
+#define ETH_TX_ERR_VALS_ANTI_SPOOFING_ERR_MASK            0x1
+#define ETH_TX_ERR_VALS_ANTI_SPOOFING_ERR_SHIFT           2
+#define ETH_TX_ERR_VALS_ILLEGAL_INBAND_TAGS_MASK          0x1
+#define ETH_TX_ERR_VALS_ILLEGAL_INBAND_TAGS_SHIFT         3
+#define ETH_TX_ERR_VALS_VLAN_INSERTION_W_INBAND_TAG_MASK  0x1
+#define ETH_TX_ERR_VALS_VLAN_INSERTION_W_INBAND_TAG_SHIFT 4
+#define ETH_TX_ERR_VALS_MTU_VIOLATION_MASK                0x1
+#define ETH_TX_ERR_VALS_MTU_VIOLATION_SHIFT               5
+#define ETH_TX_ERR_VALS_ILLEGAL_CONTROL_FRAME_MASK        0x1
+#define ETH_TX_ERR_VALS_ILLEGAL_CONTROL_FRAME_SHIFT       6
+#define ETH_TX_ERR_VALS_RESERVED_MASK                     0x1FF
+#define ETH_TX_ERR_VALS_RESERVED_SHIFT                    7
+};
+
 struct eth_vport_rss_config {
 	__le16 capabilities;
 #define ETH_VPORT_RSS_CONFIG_IPV4_CAPABILITY_MASK	0x1
@@ -2669,12 +2879,8 @@ struct eth_vport_rss_config {
 #define ETH_VPORT_RSS_CONFIG_IPV6_UDP_CAPABILITY_SHIFT   5
 #define ETH_VPORT_RSS_CONFIG_EN_5_TUPLE_CAPABILITY_MASK  0x1
 #define ETH_VPORT_RSS_CONFIG_EN_5_TUPLE_CAPABILITY_SHIFT 6
-#define ETH_VPORT_RSS_CONFIG_CALC_4TUP_TCP_FRAG_MASK     0x1
-#define ETH_VPORT_RSS_CONFIG_CALC_4TUP_TCP_FRAG_SHIFT    7
-#define ETH_VPORT_RSS_CONFIG_CALC_4TUP_UDP_FRAG_MASK     0x1
-#define ETH_VPORT_RSS_CONFIG_CALC_4TUP_UDP_FRAG_SHIFT    8
-#define ETH_VPORT_RSS_CONFIG_RESERVED0_MASK	      0x7F
-#define ETH_VPORT_RSS_CONFIG_RESERVED0_SHIFT	     9
+#define ETH_VPORT_RSS_CONFIG_RESERVED0_MASK	      0x1FF
+#define ETH_VPORT_RSS_CONFIG_RESERVED0_SHIFT	     7
 	u8      rss_id;
 	u8      rss_mode;
 	u8      update_rss_key;
@@ -2749,10 +2955,14 @@ struct rx_queue_start_ramrod_data {
 	u8	      pxp_tph_valid_pkt;
 	u8	      pxp_st_hint;
 	__le16	  pxp_st_index;
-	u8	      reserved[4];
-	struct regpair  cqe_pbl_addr;
-	struct regpair  bd_base;
-	struct regpair  sge_base;
+	u8		pmd_mode;
+	u8		notify_en;
+	u8		toggle_val;
+	u8		reserved[7];
+	__le16		reserved1;
+	struct regpair	cqe_pbl_addr;
+	struct regpair	bd_base;
+	struct regpair	reserved2;
 };
 
 struct rx_queue_stop_ramrod_data {
@@ -2764,23 +2974,24 @@ struct rx_queue_stop_ramrod_data {
 };
 
 struct rx_queue_update_ramrod_data {
-	__le16	  rx_queue_id;
-	u8	      complete_cqe_flg;
-	u8	      complete_event_flg;
-	u8	      init_sge_ring_flg;
-	u8	      vport_id;
-	u8	      pxp_tph_valid_sge;
-	u8	      pxp_st_hint;
-	__le16	  pxp_st_index;
-	u8	      reserved[6];
-	struct regpair  sge_base;
+	__le16	rx_queue_id;
+	u8	complete_cqe_flg;
+	u8	complete_event_flg;
+	u8	vport_id;
+	u8	reserved[4];
+	u8	reserved1;
+	u8	reserved2;
+	u8	reserved3;
+	__le16	reserved4;
+	__le16	reserved5;
+	struct regpair reserved6;
 };
 
 struct tx_queue_start_ramrod_data {
 	__le16  sb_id;
 	u8      sb_index;
 	u8      vport_id;
-	u8      tc;
+	u8      reserved0;
 	u8      stats_counter_id;
 	__le16  qm_pq_id;
 	u8      flags;
@@ -2790,18 +3001,25 @@ struct tx_queue_start_ramrod_data {
 #define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_PKT_DUP_SHIFT     1
 #define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_TX_DEST_MASK      0x1
 #define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_TX_DEST_SHIFT     2
-#define TX_QUEUE_START_RAMROD_DATA_RESERVED0_MASK	      0x1F
-#define TX_QUEUE_START_RAMROD_DATA_RESERVED0_SHIFT	     3
-	u8	      pin_context;
-	u8	      pxp_tph_valid_bd;
-	u8	      pxp_tph_valid_pkt;
-	__le16	  pxp_st_index;
-	u8	      pxp_st_hint;
-	u8	      reserved1[3];
-	__le16	  queue_zone_id;
-	__le16	  test_dup_count;
-	__le16	  pbl_size;
-	struct regpair  pbl_base_addr;
+#define TX_QUEUE_START_RAMROD_DATA_PMD_MODE_MASK               0x1
+#define TX_QUEUE_START_RAMROD_DATA_PMD_MODE_SHIFT              3
+#define TX_QUEUE_START_RAMROD_DATA_NOTIFY_EN_MASK              0x1
+#define TX_QUEUE_START_RAMROD_DATA_NOTIFY_EN_SHIFT             4
+#define TX_QUEUE_START_RAMROD_DATA_PIN_CONTEXT_MASK            0x1
+#define TX_QUEUE_START_RAMROD_DATA_PIN_CONTEXT_SHIFT           5
+#define TX_QUEUE_START_RAMROD_DATA_RESERVED1_MASK              0x3
+#define TX_QUEUE_START_RAMROD_DATA_RESERVED1_SHIFT             6
+	u8	pxp_st_hint;
+	u8	pxp_tph_valid_bd;
+	u8	pxp_tph_valid_pkt;
+	__le16	pxp_st_index;
+	__le16	comp_agg_size;
+	__le16	queue_zone_id;
+	__le16	test_dup_count;
+	__le16	pbl_size;
+	__le16	tx_queue_id;
+	struct regpair	pbl_base_addr;
+	struct regpair	bd_cons_address;
 };
 
 struct tx_queue_stop_ramrod_data {
@@ -2822,16 +3040,16 @@ struct vport_start_ramrod_data {
 	struct eth_vport_rx_mode	rx_mode;
 	struct eth_vport_tx_mode	tx_mode;
 	struct eth_vport_tpa_param      tpa_param;
-	__le16			  sge_buff_size;
-	u8			      max_sges_num;
-	u8			      tx_switching_en;
-	u8			      anti_spoofing_en;
-	u8			      default_vlan_en;
-	u8			      handle_ptp_pkts;
-	u8			      silent_vlan_removal_en;
-	__le16			  default_vlan;
-	u8			      untagged;
-	u8			      reserved[7];
+	__le16				default_vlan;
+	u8				tx_switching_en;
+	u8				anti_spoofing_en;
+	u8				default_vlan_en;
+	u8				handle_ptp_pkts;
+	u8				silent_vlan_removal_en;
+	u8				untagged;
+	struct eth_tx_err_vals		tx_err_behav;
+	u8				zero_placement_offset;
+	u8				reserved[7];
 };
 
 struct vport_stop_ramrod_data {
@@ -2840,36 +3058,35 @@ struct vport_stop_ramrod_data {
 };
 
 struct vport_update_ramrod_data_cmn {
-	u8      vport_id;
-	u8      update_rx_active_flg;
-	u8      rx_active_flg;
-	u8      update_tx_active_flg;
-	u8      tx_active_flg;
-	u8      update_rx_mode_flg;
-	u8      update_tx_mode_flg;
-	u8      update_approx_mcast_flg;
-	u8      update_rss_flg;
-	u8      update_inner_vlan_removal_en_flg;
-	u8      inner_vlan_removal_en;
-	u8      update_tpa_param_flg;
-	u8      update_tpa_en_flg;
-	u8      update_sge_param_flg;
-	__le16  sge_buff_size;
-	u8      max_sges_num;
-	u8      update_tx_switching_en_flg;
-	u8      tx_switching_en;
-	u8      update_anti_spoofing_en_flg;
-	u8      anti_spoofing_en;
-	u8      update_handle_ptp_pkts;
-	u8      handle_ptp_pkts;
-	u8      update_default_vlan_en_flg;
-	u8      default_vlan_en;
-	u8      update_default_vlan_flg;
-	__le16  default_vlan;
-	u8      update_accept_any_vlan_flg;
-	u8      accept_any_vlan;
-	u8      silent_vlan_removal_en;
-	u8      reserved;
+	u8	vport_id;
+	u8	update_rx_active_flg;
+	u8	rx_active_flg;
+	u8	update_tx_active_flg;
+	u8	tx_active_flg;
+	u8	update_rx_mode_flg;
+	u8	update_tx_mode_flg;
+	u8	update_approx_mcast_flg;
+	u8	update_rss_flg;
+	u8	update_inner_vlan_removal_en_flg;
+	u8	inner_vlan_removal_en;
+	u8	update_tpa_param_flg;
+	u8	update_tpa_en_flg;
+	u8	update_tx_switching_en_flg;
+	u8	tx_switching_en;
+	u8	update_anti_spoofing_en_flg;
+	u8	anti_spoofing_en;
+	u8	update_handle_ptp_pkts;
+	u8	handle_ptp_pkts;
+	u8	update_default_vlan_en_flg;
+	u8	default_vlan_en;
+	u8	update_default_vlan_flg;
+	__le16	default_vlan;
+	u8	update_accept_any_vlan_flg;
+	u8	accept_any_vlan;
+	u8	silent_vlan_removal_en;
+	u8	update_mtu_flg;
+	__le16	mtu;
+	u8	reserved[2];
 };
 
 struct vport_update_ramrod_mcast {
@@ -2885,436 +3102,6 @@ struct vport_update_ramrod_data {
 	struct eth_vport_rss_config	     rss_config;
 };
 
-struct mstorm_eth_conn_ag_ctx {
-	u8	byte0 /* cdu_validation */;
-	u8	byte1 /* state */;
-	u8	flags0;
-#define MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_MASK  0x1   /* exist_in_qm0 */
-#define MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_SHIFT 0
-#define MSTORM_ETH_CONN_AG_CTX_BIT1_MASK          0x1   /* exist_in_qm1 */
-#define MSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT         1
-#define MSTORM_ETH_CONN_AG_CTX_CF0_MASK           0x3   /* cf0 */
-#define MSTORM_ETH_CONN_AG_CTX_CF0_SHIFT          2
-#define MSTORM_ETH_CONN_AG_CTX_CF1_MASK           0x3   /* cf1 */
-#define MSTORM_ETH_CONN_AG_CTX_CF1_SHIFT          4
-#define MSTORM_ETH_CONN_AG_CTX_CF2_MASK           0x3   /* cf2 */
-#define MSTORM_ETH_CONN_AG_CTX_CF2_SHIFT          6
-	u8 flags1;
-#define MSTORM_ETH_CONN_AG_CTX_CF0EN_MASK         0x1   /* cf0en */
-#define MSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT        0
-#define MSTORM_ETH_CONN_AG_CTX_CF1EN_MASK         0x1   /* cf1en */
-#define MSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT        1
-#define MSTORM_ETH_CONN_AG_CTX_CF2EN_MASK         0x1   /* cf2en */
-#define MSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT        2
-#define MSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK       0x1   /* rule0en */
-#define MSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT      3
-#define MSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK       0x1   /* rule1en */
-#define MSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT      4
-#define MSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK       0x1   /* rule2en */
-#define MSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT      5
-#define MSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK       0x1   /* rule3en */
-#define MSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT      6
-#define MSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK       0x1   /* rule4en */
-#define MSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT      7
-	__le16	word0 /* word0 */;
-	__le16	word1 /* word1 */;
-	__le32	reg0 /* reg0 */;
-	__le32	reg1 /* reg1 */;
-};
-
-struct tstorm_eth_conn_ag_ctx {
-	u8	byte0 /* cdu_validation */;
-	u8	byte1 /* state */;
-	u8	flags0;
-#define TSTORM_ETH_CONN_AG_CTX_BIT0_MASK      0x1       /* exist_in_qm0 */
-#define TSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT     0
-#define TSTORM_ETH_CONN_AG_CTX_BIT1_MASK      0x1       /* exist_in_qm1 */
-#define TSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT     1
-#define TSTORM_ETH_CONN_AG_CTX_BIT2_MASK      0x1       /* bit2 */
-#define TSTORM_ETH_CONN_AG_CTX_BIT2_SHIFT     2
-#define TSTORM_ETH_CONN_AG_CTX_BIT3_MASK      0x1       /* bit3 */
-#define TSTORM_ETH_CONN_AG_CTX_BIT3_SHIFT     3
-#define TSTORM_ETH_CONN_AG_CTX_BIT4_MASK      0x1       /* bit4 */
-#define TSTORM_ETH_CONN_AG_CTX_BIT4_SHIFT     4
-#define TSTORM_ETH_CONN_AG_CTX_BIT5_MASK      0x1       /* bit5 */
-#define TSTORM_ETH_CONN_AG_CTX_BIT5_SHIFT     5
-#define TSTORM_ETH_CONN_AG_CTX_CF0_MASK       0x3       /* timer0cf */
-#define TSTORM_ETH_CONN_AG_CTX_CF0_SHIFT      6
-	u8 flags1;
-#define TSTORM_ETH_CONN_AG_CTX_CF1_MASK       0x3       /* timer1cf */
-#define TSTORM_ETH_CONN_AG_CTX_CF1_SHIFT      0
-#define TSTORM_ETH_CONN_AG_CTX_CF2_MASK       0x3       /* timer2cf */
-#define TSTORM_ETH_CONN_AG_CTX_CF2_SHIFT      2
-#define TSTORM_ETH_CONN_AG_CTX_CF3_MASK       0x3       /* timer_stop_all */
-#define TSTORM_ETH_CONN_AG_CTX_CF3_SHIFT      4
-#define TSTORM_ETH_CONN_AG_CTX_CF4_MASK       0x3       /* cf4 */
-#define TSTORM_ETH_CONN_AG_CTX_CF4_SHIFT      6
-	u8 flags2;
-#define TSTORM_ETH_CONN_AG_CTX_CF5_MASK       0x3       /* cf5 */
-#define TSTORM_ETH_CONN_AG_CTX_CF5_SHIFT      0
-#define TSTORM_ETH_CONN_AG_CTX_CF6_MASK       0x3       /* cf6 */
-#define TSTORM_ETH_CONN_AG_CTX_CF6_SHIFT      2
-#define TSTORM_ETH_CONN_AG_CTX_CF7_MASK       0x3       /* cf7 */
-#define TSTORM_ETH_CONN_AG_CTX_CF7_SHIFT      4
-#define TSTORM_ETH_CONN_AG_CTX_CF8_MASK       0x3       /* cf8 */
-#define TSTORM_ETH_CONN_AG_CTX_CF8_SHIFT      6
-	u8 flags3;
-#define TSTORM_ETH_CONN_AG_CTX_CF9_MASK       0x3       /* cf9 */
-#define TSTORM_ETH_CONN_AG_CTX_CF9_SHIFT      0
-#define TSTORM_ETH_CONN_AG_CTX_CF10_MASK      0x3       /* cf10 */
-#define TSTORM_ETH_CONN_AG_CTX_CF10_SHIFT     2
-#define TSTORM_ETH_CONN_AG_CTX_CF0EN_MASK     0x1       /* cf0en */
-#define TSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT    4
-#define TSTORM_ETH_CONN_AG_CTX_CF1EN_MASK     0x1       /* cf1en */
-#define TSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT    5
-#define TSTORM_ETH_CONN_AG_CTX_CF2EN_MASK     0x1       /* cf2en */
-#define TSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT    6
-#define TSTORM_ETH_CONN_AG_CTX_CF3EN_MASK     0x1       /* cf3en */
-#define TSTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT    7
-	u8 flags4;
-#define TSTORM_ETH_CONN_AG_CTX_CF4EN_MASK     0x1       /* cf4en */
-#define TSTORM_ETH_CONN_AG_CTX_CF4EN_SHIFT    0
-#define TSTORM_ETH_CONN_AG_CTX_CF5EN_MASK     0x1       /* cf5en */
-#define TSTORM_ETH_CONN_AG_CTX_CF5EN_SHIFT    1
-#define TSTORM_ETH_CONN_AG_CTX_CF6EN_MASK     0x1       /* cf6en */
-#define TSTORM_ETH_CONN_AG_CTX_CF6EN_SHIFT    2
-#define TSTORM_ETH_CONN_AG_CTX_CF7EN_MASK     0x1       /* cf7en */
-#define TSTORM_ETH_CONN_AG_CTX_CF7EN_SHIFT    3
-#define TSTORM_ETH_CONN_AG_CTX_CF8EN_MASK     0x1       /* cf8en */
-#define TSTORM_ETH_CONN_AG_CTX_CF8EN_SHIFT    4
-#define TSTORM_ETH_CONN_AG_CTX_CF9EN_MASK     0x1       /* cf9en */
-#define TSTORM_ETH_CONN_AG_CTX_CF9EN_SHIFT    5
-#define TSTORM_ETH_CONN_AG_CTX_CF10EN_MASK    0x1       /* cf10en */
-#define TSTORM_ETH_CONN_AG_CTX_CF10EN_SHIFT   6
-#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK   0x1       /* rule0en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT  7
-	u8 flags5;
-#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK   0x1       /* rule1en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT  0
-#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK   0x1       /* rule2en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT  1
-#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK   0x1       /* rule3en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT  2
-#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK   0x1       /* rule4en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT  3
-#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_MASK   0x1       /* rule5en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT  4
-#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_MASK  0x1       /* rule6en */
-#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_SHIFT 5
-#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_MASK   0x1       /* rule7en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT  6
-#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_MASK   0x1       /* rule8en */
-#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT  7
-	__le32	reg0 /* reg0 */;
-	__le32	reg1 /* reg1 */;
-	__le32	reg2 /* reg2 */;
-	__le32	reg3 /* reg3 */;
-	__le32	reg4 /* reg4 */;
-	__le32	reg5 /* reg5 */;
-	__le32	reg6 /* reg6 */;
-	__le32	reg7 /* reg7 */;
-	__le32	reg8 /* reg8 */;
-	u8	byte2 /* byte2 */;
-	u8	byte3 /* byte3 */;
-	__le16	rx_bd_cons /* word0 */;
-	u8	byte4 /* byte4 */;
-	u8	byte5 /* byte5 */;
-	__le16	rx_bd_prod /* word1 */;
-	__le16	word2 /* conn_dpi */;
-	__le16	word3 /* word3 */;
-	__le32	reg9 /* reg9 */;
-	__le32	reg10 /* reg10 */;
-};
-
-struct ustorm_eth_conn_ag_ctx {
-	u8	byte0 /* cdu_validation */;
-	u8	byte1 /* state */;
-	u8	flags0;
-#define USTORM_ETH_CONN_AG_CTX_BIT0_MASK                  0x1
-#define USTORM_ETH_CONN_AG_CTX_BIT0_SHIFT                 0
-#define USTORM_ETH_CONN_AG_CTX_BIT1_MASK                  0x1
-#define USTORM_ETH_CONN_AG_CTX_BIT1_SHIFT                 1
-#define USTORM_ETH_CONN_AG_CTX_CF0_MASK                   0x3   /* timer0cf */
-#define USTORM_ETH_CONN_AG_CTX_CF0_SHIFT                  2
-#define USTORM_ETH_CONN_AG_CTX_CF1_MASK                   0x3   /* timer1cf */
-#define USTORM_ETH_CONN_AG_CTX_CF1_SHIFT                  4
-#define USTORM_ETH_CONN_AG_CTX_CF2_MASK                   0x3   /* timer2cf */
-#define USTORM_ETH_CONN_AG_CTX_CF2_SHIFT                  6
-	u8 flags1;
-#define USTORM_ETH_CONN_AG_CTX_CF3_MASK                   0x3
-#define USTORM_ETH_CONN_AG_CTX_CF3_SHIFT                  0
-#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_MASK             0x3   /* cf4 */
-#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_SHIFT            2
-#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_MASK             0x3   /* cf5 */
-#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_SHIFT            4
-#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK     0x3   /* cf6 */
-#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT    6
-	u8 flags2;
-#define USTORM_ETH_CONN_AG_CTX_CF0EN_MASK                 0x1   /* cf0en */
-#define USTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT                0
-#define USTORM_ETH_CONN_AG_CTX_CF1EN_MASK                 0x1   /* cf1en */
-#define USTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT                1
-#define USTORM_ETH_CONN_AG_CTX_CF2EN_MASK                 0x1   /* cf2en */
-#define USTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT                2
-#define USTORM_ETH_CONN_AG_CTX_CF3EN_MASK                 0x1   /* cf3en */
-#define USTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT                3
-#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_MASK          0x1   /* cf4en */
-#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_SHIFT         4
-#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_MASK          0x1   /* cf5en */
-#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_SHIFT         5
-#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK  0x1   /* cf6en */
-#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT 6
-#define USTORM_ETH_CONN_AG_CTX_RULE0EN_MASK               0x1   /* rule0en */
-#define USTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT              7
-	u8 flags3;
-#define USTORM_ETH_CONN_AG_CTX_RULE1EN_MASK               0x1   /* rule1en */
-#define USTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT              0
-#define USTORM_ETH_CONN_AG_CTX_RULE2EN_MASK               0x1   /* rule2en */
-#define USTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT              1
-#define USTORM_ETH_CONN_AG_CTX_RULE3EN_MASK               0x1   /* rule3en */
-#define USTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT              2
-#define USTORM_ETH_CONN_AG_CTX_RULE4EN_MASK               0x1   /* rule4en */
-#define USTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT              3
-#define USTORM_ETH_CONN_AG_CTX_RULE5EN_MASK               0x1   /* rule5en */
-#define USTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT              4
-#define USTORM_ETH_CONN_AG_CTX_RULE6EN_MASK               0x1   /* rule6en */
-#define USTORM_ETH_CONN_AG_CTX_RULE6EN_SHIFT              5
-#define USTORM_ETH_CONN_AG_CTX_RULE7EN_MASK               0x1   /* rule7en */
-#define USTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT              6
-#define USTORM_ETH_CONN_AG_CTX_RULE8EN_MASK               0x1   /* rule8en */
-#define USTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT              7
-	u8	byte2 /* byte2 */;
-	u8	byte3 /* byte3 */;
-	__le16	word0 /* conn_dpi */;
-	__le16	tx_bd_cons /* word1 */;
-	__le32	reg0 /* reg0 */;
-	__le32	reg1 /* reg1 */;
-	__le32	reg2 /* reg2 */;
-	__le32	reg3 /* reg3 */;
-	__le16	tx_drv_bd_cons /* word2 */;
-	__le16	rx_drv_cqe_cons /* word3 */;
-};
-
-struct xstorm_eth_hw_conn_ag_ctx {
-	u8	reserved0 /* cdu_validation */;
-	u8	eth_state /* state */;
-	u8	flags0;
-#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM0_MASK            0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM0_SHIFT           0
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED1_MASK               0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED1_SHIFT              1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED2_MASK               0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED2_SHIFT              2
-#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM3_MASK            0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM3_SHIFT           3
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED3_MASK               0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED3_SHIFT              4
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED4_MASK               0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED4_SHIFT              5
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED5_MASK               0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED5_SHIFT              6
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED6_MASK               0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED6_SHIFT              7
-	u8 flags1;
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED7_MASK               0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED7_SHIFT              0
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED8_MASK               0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED8_SHIFT              1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED9_MASK               0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED9_SHIFT              2
-#define XSTORM_ETH_HW_CONN_AG_CTX_BIT11_MASK                   0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_BIT11_SHIFT                  3
-#define XSTORM_ETH_HW_CONN_AG_CTX_BIT12_MASK                   0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_BIT12_SHIFT                  4
-#define XSTORM_ETH_HW_CONN_AG_CTX_BIT13_MASK                   0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_BIT13_SHIFT                  5
-#define XSTORM_ETH_HW_CONN_AG_CTX_TX_RULE_ACTIVE_MASK          0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT         6
-#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_ACTIVE_MASK            0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT           7
-	u8 flags2;
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF0_MASK                     0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF0_SHIFT                    0
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF1_MASK                     0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF1_SHIFT                    2
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF2_MASK                     0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF2_SHIFT                    4
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF3_MASK                     0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF3_SHIFT                    6
-	u8 flags3;
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF4_MASK                     0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF4_SHIFT                    0
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF5_MASK                     0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF5_SHIFT                    2
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF6_MASK                     0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF6_SHIFT                    4
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF7_MASK                     0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF7_SHIFT                    6
-	u8 flags4;
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF8_MASK                     0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF8_SHIFT                    0
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF9_MASK                     0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF9_SHIFT                    2
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF10_MASK                    0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF10_SHIFT                   4
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF11_MASK                    0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF11_SHIFT                   6
-	u8 flags5;
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF12_MASK                    0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF12_SHIFT                   0
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF13_MASK                    0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF13_SHIFT                   2
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF14_MASK                    0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF14_SHIFT                   4
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF15_MASK                    0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF15_SHIFT                   6
-	u8 flags6;
-#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_MASK        0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_SHIFT       0
-#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_MASK        0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_SHIFT       2
-#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_MASK                   0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_SHIFT                  4
-#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_MASK            0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_SHIFT           6
-	u8 flags7;
-#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_MASK                0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_SHIFT               0
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED10_MASK              0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED10_SHIFT             2
-#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_MASK               0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_SHIFT              4
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF0EN_MASK                   0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF0EN_SHIFT                  6
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF1EN_MASK                   0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF1EN_SHIFT                  7
-	u8 flags8;
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF2EN_MASK                   0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF2EN_SHIFT                  0
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF3EN_MASK                   0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF3EN_SHIFT                  1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF4EN_MASK                   0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF4EN_SHIFT                  2
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF5EN_MASK                   0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF5EN_SHIFT                  3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF6EN_MASK                   0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF6EN_SHIFT                  4
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF7EN_MASK                   0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF7EN_SHIFT                  5
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF8EN_MASK                   0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF8EN_SHIFT                  6
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF9EN_MASK                   0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF9EN_SHIFT                  7
-	u8 flags9;
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF10EN_MASK                  0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF10EN_SHIFT                 0
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF11EN_MASK                  0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF11EN_SHIFT                 1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF12EN_MASK                  0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF12EN_SHIFT                 2
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF13EN_MASK                  0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF13EN_SHIFT                 3
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF14EN_MASK                  0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF14EN_SHIFT                 4
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF15EN_MASK                  0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_CF15EN_SHIFT                 5
-#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_MASK     0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_SHIFT    6
-#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_EN_MASK     0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_EN_SHIFT    7
-	u8 flags10;
-#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_EN_MASK                0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_EN_SHIFT               0
-#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_EN_MASK         0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT        1
-#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_EN_MASK             0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT            2
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED11_MASK              0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED11_SHIFT             3
-#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_EN_MASK            0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_EN_SHIFT           4
-#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_MASK  0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_SHIFT 5
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED12_MASK              0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED12_SHIFT             6
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED13_MASK              0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED13_SHIFT             7
-	u8 flags11;
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED14_MASK              0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED14_SHIFT             0
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED15_MASK              0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED15_SHIFT             1
-#define XSTORM_ETH_HW_CONN_AG_CTX_TX_DEC_RULE_EN_MASK          0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT         2
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE5EN_MASK                 0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE5EN_SHIFT                3
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE6EN_MASK                 0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE6EN_SHIFT                4
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE7EN_MASK                 0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE7EN_SHIFT                5
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED1_MASK            0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED1_SHIFT           6
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE9EN_MASK                 0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE9EN_SHIFT                7
-	u8 flags12;
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE10EN_MASK                0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE10EN_SHIFT               0
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE11EN_MASK                0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE11EN_SHIFT               1
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED2_MASK            0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED2_SHIFT           2
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED3_MASK            0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED3_SHIFT           3
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE14EN_MASK                0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE14EN_SHIFT               4
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE15EN_MASK                0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE15EN_SHIFT               5
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE16EN_MASK                0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE16EN_SHIFT               6
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE17EN_MASK                0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE17EN_SHIFT               7
-	u8 flags13;
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE18EN_MASK                0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE18EN_SHIFT               0
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE19EN_MASK                0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_RULE19EN_SHIFT               1
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED4_MASK            0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED4_SHIFT           2
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED5_MASK            0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED5_SHIFT           3
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED6_MASK            0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED6_SHIFT           4
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED7_MASK            0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED7_SHIFT           5
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED8_MASK            0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED8_SHIFT           6
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED9_MASK            0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED9_SHIFT           7
-	u8 flags14;
-#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_USE_EXT_HDR_MASK        0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_USE_EXT_HDR_SHIFT       0
-#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_MASK      0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_SHIFT     1
-#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_MASK    0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_SHIFT   2
-#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_MASK    0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_SHIFT   3
-#define XSTORM_ETH_HW_CONN_AG_CTX_L2_EDPM_ENABLE_MASK          0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_L2_EDPM_ENABLE_SHIFT         4
-#define XSTORM_ETH_HW_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK        0x1
-#define XSTORM_ETH_HW_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT       5
-#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_MASK              0x3
-#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_SHIFT             6
-	u8	edpm_event_id /* byte2 */;
-	__le16	physical_q0 /* physical_q0 */;
-	__le16	word1 /* physical_q1 */;
-	__le16	edpm_num_bds /* physical_q2 */;
-	__le16	tx_bd_cons /* word3 */;
-	__le16	tx_bd_prod /* word4 */;
-	__le16	go_to_bd_cons /* word5 */;
-	__le16	conn_dpi /* conn_dpi */;
-};
-
 #define VF_MAX_STATIC 192       /* In case of K2 */
 
 #define MCP_GLOB_PATH_MAX       2
@@ -3818,6 +3605,10 @@ struct public_port {
 	struct dcbx_local_params		local_admin_dcbx_mib;
 	struct dcbx_mib				remote_dcbx_mib;
 	struct dcbx_mib				operational_dcbx_mib;
+
+	u32					fc_npiv_nvram_tbl_addr;
+	u32					fc_npiv_nvram_tbl_size;
+	u32					transceiver_data;
 };
 
 /**************************************/
@@ -3830,7 +3621,11 @@ struct public_func {
 	u32	iscsi_boot_signature;
 	u32	iscsi_boot_block_offset;
 
-	u32	reserved[8];
+	u32	mtu_size;
+	u32	c2s_pcp_map_lower;
+	u32	c2s_pcp_map_upper;
+	u32	c2s_pcp_map_default;
+	u32	reserved[4];
 
 	u32	config;
 
@@ -3894,10 +3689,10 @@ struct public_func {
 #define DRV_ID_MCP_HSI_VER_SHIFT        16
 #define DRV_ID_MCP_HSI_VER_CURRENT	BIT(DRV_ID_MCP_HSI_VER_SHIFT)
 
-#define DRV_ID_DRV_TYPE_MASK            0xff000000
+#define DRV_ID_DRV_TYPE_MASK            0x7f000000
 #define DRV_ID_DRV_TYPE_SHIFT           24
 #define DRV_ID_DRV_TYPE_UNKNOWN         (0 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_LINUX		BIT(DRV_ID_DRV_TYPE_SHIFT)
+#define DRV_ID_DRV_TYPE_LINUX           (1 << DRV_ID_DRV_TYPE_SHIFT)
 #define DRV_ID_DRV_TYPE_WINDOWS         (2 << DRV_ID_DRV_TYPE_SHIFT)
 #define DRV_ID_DRV_TYPE_DIAG            (3 << DRV_ID_DRV_TYPE_SHIFT)
 #define DRV_ID_DRV_TYPE_PREBOOT         (4 << DRV_ID_DRV_TYPE_SHIFT)
@@ -3905,6 +3700,10 @@ struct public_func {
 #define DRV_ID_DRV_TYPE_VMWARE          (6 << DRV_ID_DRV_TYPE_SHIFT)
 #define DRV_ID_DRV_TYPE_FREEBSD         (7 << DRV_ID_DRV_TYPE_SHIFT)
 #define DRV_ID_DRV_TYPE_AIX             (8 << DRV_ID_DRV_TYPE_SHIFT)
+
+#define DRV_ID_DRV_INIT_HW_MASK         0x80000000
+#define DRV_ID_DRV_INIT_HW_SHIFT        31
+#define DRV_ID_DRV_INIT_HW_FLAG         BIT(DRV_ID_DRV_INIT_HW_SHIFT)
 };
 
 /**************************************/
@@ -3964,6 +3763,7 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_MASK                       0xffff0000
 #define DRV_MSG_CODE_LOAD_REQ                   0x10000000
 #define DRV_MSG_CODE_LOAD_DONE                  0x11000000
+#define DRV_MSG_CODE_INIT_HW                    0x12000000
 #define DRV_MSG_CODE_UNLOAD_REQ                 0x20000000
 #define DRV_MSG_CODE_UNLOAD_DONE                0x21000000
 #define DRV_MSG_CODE_INIT_PHY                   0x22000000
@@ -4100,6 +3900,7 @@ struct public_drv_mb {
 #define FW_MSG_CODE_SET_SECURE_MODE_ERROR       0x00130000
 #define FW_MSG_CODE_SET_SECURE_MODE_OK          0x00140000
 #define FW_MSG_MODE_PHY_PRIVILEGE_ERROR         0x00150000
+#define FW_MSG_CODE_OK                          0x00160000
 
 #define FW_MSG_SEQ_NUMBER_MASK                  0x0000ffff
 
@@ -4212,7 +4013,7 @@ struct nvm_cfg1_glob {
 #define NVM_CFG1_GLOB_MF_MODE_MASK                              0x00000FF0
 #define NVM_CFG1_GLOB_MF_MODE_OFFSET                            4
 #define NVM_CFG1_GLOB_MF_MODE_MF_ALLOWED                        0x0
-#define NVM_CFG1_GLOB_MF_MODE_FORCED_SF                         0x1
+#define NVM_CFG1_GLOB_MF_MODE_DEFAULT                           0x1
 #define NVM_CFG1_GLOB_MF_MODE_SPIO4                             0x2
 #define NVM_CFG1_GLOB_MF_MODE_NPAR1_0                           0x3
 #define NVM_CFG1_GLOB_MF_MODE_NPAR1_5                           0x4
@@ -4643,8 +4444,12 @@ struct nvm_cfg1_glob {
 #define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO29                  0x1E
 #define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO30                  0x1F
 #define NVM_CFG1_GLOB_I2C_MUX_SEL_GPIO__GPIO31                  0x20
-
-	u32 reserved[46];					/* 0x88 */
+	u32	device_capabilities;                            /* 0x88 */
+#define NVM_CFG1_GLOB_DEVICE_CAPABILITIES_ETHERNET              0x1
+	u32	power_dissipated;                               /* 0x8C */
+	u32 power_consumed;                                     /* 0x90 */
+	u32	efi_version;                                    /* 0x94 */
+	u32	reserved[42];                                   /* 0x98 */
 };
 
 struct nvm_cfg1_path {
@@ -4652,26 +4457,8 @@ struct nvm_cfg1_path {
 };
 
 struct nvm_cfg1_port {
-	u32 power_dissipated;					/* 0x0 */
-#define NVM_CFG1_PORT_POWER_DIS_D0_MASK                         0x000000FF
-#define NVM_CFG1_PORT_POWER_DIS_D0_OFFSET                       0
-#define NVM_CFG1_PORT_POWER_DIS_D1_MASK                         0x0000FF00
-#define NVM_CFG1_PORT_POWER_DIS_D1_OFFSET                       8
-#define NVM_CFG1_PORT_POWER_DIS_D2_MASK                         0x00FF0000
-#define NVM_CFG1_PORT_POWER_DIS_D2_OFFSET                       16
-#define NVM_CFG1_PORT_POWER_DIS_D3_MASK                         0xFF000000
-#define NVM_CFG1_PORT_POWER_DIS_D3_OFFSET                       24
-
-	u32 power_consumed;					/* 0x4 */
-#define NVM_CFG1_PORT_POWER_CONS_D0_MASK                        0x000000FF
-#define NVM_CFG1_PORT_POWER_CONS_D0_OFFSET                      0
-#define NVM_CFG1_PORT_POWER_CONS_D1_MASK                        0x0000FF00
-#define NVM_CFG1_PORT_POWER_CONS_D1_OFFSET                      8
-#define NVM_CFG1_PORT_POWER_CONS_D2_MASK                        0x00FF0000
-#define NVM_CFG1_PORT_POWER_CONS_D2_OFFSET                      16
-#define NVM_CFG1_PORT_POWER_CONS_D3_MASK                        0xFF000000
-#define NVM_CFG1_PORT_POWER_CONS_D3_OFFSET                      24
-
+	u32	reserved__m_relocated_to_option_123;           /* 0x0 */
+	u32	reserved__m_relocated_to_option_124;           /* 0x4 */
 	u32 generic_cont0;					/* 0x8 */
 #define NVM_CFG1_PORT_LED_MODE_MASK                             0x000000FF
 #define NVM_CFG1_PORT_LED_MODE_OFFSET                           0
@@ -4699,7 +4486,9 @@ struct nvm_cfg1_port {
 #define NVM_CFG1_PORT_DCBX_MODE_IEEE                            0x1
 #define NVM_CFG1_PORT_DCBX_MODE_CEE                             0x2
 #define NVM_CFG1_PORT_DCBX_MODE_DYNAMIC                         0x3
-
+#define NVM_CFG1_PORT_DEFAULT_ENABLED_PROTOCOLS_MASK            0x00F00000
+#define NVM_CFG1_PORT_DEFAULT_ENABLED_PROTOCOLS_OFFSET          20
+#define NVM_CFG1_PORT_DEFAULT_ENABLED_PROTOCOLS_ETHERNET        0x1
 	u32	pcie_cfg;					/* 0xC */
 #define NVM_CFG1_PORT_RESERVED15_MASK                           0x00000007
 #define NVM_CFG1_PORT_RESERVED15_OFFSET                         0
@@ -4784,10 +4573,11 @@ struct nvm_cfg1_port {
 #define NVM_CFG1_PORT_SERDES_NET_INTERFACE_SFI                  0x9
 #define NVM_CFG1_PORT_SERDES_NET_INTERFACE_1000X                0xB
 #define NVM_CFG1_PORT_SERDES_NET_INTERFACE_SGMII                0xC
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_XLAUI                0xD
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_CAUI                 0xE
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_XLPPI                0xF
-#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_CPPI                 0x10
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_XLAUI                0x11
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_XLPPI                0x12
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_CAUI                 0x21
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_CPPI                 0x22
+#define NVM_CFG1_PORT_SERDES_NET_INTERFACE_25GAUI               0x31
 #define NVM_CFG1_PORT_AN_MODE_MASK                              0xFF000000
 #define NVM_CFG1_PORT_AN_MODE_OFFSET                            24
 #define NVM_CFG1_PORT_AN_MODE_NONE                              0x0
@@ -4801,9 +4591,6 @@ struct nvm_cfg1_port {
 	u32 mgmt_traffic;					/* 0x20 */
 #define NVM_CFG1_PORT_RESERVED61_MASK                           0x0000000F
 #define NVM_CFG1_PORT_RESERVED61_OFFSET                         0
-#define NVM_CFG1_PORT_RESERVED61_DISABLED                       0x0
-#define NVM_CFG1_PORT_RESERVED61_NCSI_OVER_RMII                 0x1
-#define NVM_CFG1_PORT_RESERVED61_NCSI_OVER_SMBUS                0x2
 
 	u32 ext_phy;						/* 0x24 */
 #define NVM_CFG1_PORT_EXTERNAL_PHY_TYPE_MASK                    0x000000FF
@@ -4814,16 +4601,12 @@ struct nvm_cfg1_port {
 #define NVM_CFG1_PORT_EXTERNAL_PHY_ADDRESS_OFFSET               8
 
 	u32 mba_cfg1;						/* 0x28 */
-#define NVM_CFG1_PORT_MBA_MASK                                  0x00000001
-#define NVM_CFG1_PORT_MBA_OFFSET                                0
-#define NVM_CFG1_PORT_MBA_DISABLED                              0x0
-#define NVM_CFG1_PORT_MBA_ENABLED                               0x1
-#define NVM_CFG1_PORT_MBA_BOOT_TYPE_MASK                        0x00000006
-#define NVM_CFG1_PORT_MBA_BOOT_TYPE_OFFSET                      1
-#define NVM_CFG1_PORT_MBA_BOOT_TYPE_AUTO                        0x0
-#define NVM_CFG1_PORT_MBA_BOOT_TYPE_BBS                         0x1
-#define NVM_CFG1_PORT_MBA_BOOT_TYPE_INT18H                      0x2
-#define NVM_CFG1_PORT_MBA_BOOT_TYPE_INT19H                      0x3
+#define NVM_CFG1_PORT_PREBOOT_OPROM_MASK                        0x00000001
+#define NVM_CFG1_PORT_PREBOOT_OPROM_OFFSET                      0
+#define NVM_CFG1_PORT_PREBOOT_OPROM_DISABLED                    0x0
+#define NVM_CFG1_PORT_PREBOOT_OPROM_ENABLED                     0x1
+#define NVM_CFG1_PORT_RESERVED__M_MBA_BOOT_TYPE_MASK            0x00000006
+#define NVM_CFG1_PORT_RESERVED__M_MBA_BOOT_TYPE_OFFSET          1
 #define NVM_CFG1_PORT_MBA_DELAY_TIME_MASK                       0x00000078
 #define NVM_CFG1_PORT_MBA_DELAY_TIME_OFFSET                     3
 #define NVM_CFG1_PORT_MBA_SETUP_HOT_KEY_MASK                    0x00000080
@@ -4836,61 +4619,30 @@ struct nvm_cfg1_port {
 #define NVM_CFG1_PORT_MBA_HIDE_SETUP_PROMPT_ENABLED             0x1
 #define NVM_CFG1_PORT_RESERVED5_MASK                            0x0001FE00
 #define NVM_CFG1_PORT_RESERVED5_OFFSET                          9
-#define NVM_CFG1_PORT_RESERVED5_DISABLED                        0x0
-#define NVM_CFG1_PORT_RESERVED5_2K                              0x1
-#define NVM_CFG1_PORT_RESERVED5_4K                              0x2
-#define NVM_CFG1_PORT_RESERVED5_8K                              0x3
-#define NVM_CFG1_PORT_RESERVED5_16K                             0x4
-#define NVM_CFG1_PORT_RESERVED5_32K                             0x5
-#define NVM_CFG1_PORT_RESERVED5_64K                             0x6
-#define NVM_CFG1_PORT_RESERVED5_128K                            0x7
-#define NVM_CFG1_PORT_RESERVED5_256K                            0x8
-#define NVM_CFG1_PORT_RESERVED5_512K                            0x9
-#define NVM_CFG1_PORT_RESERVED5_1M                              0xA
-#define NVM_CFG1_PORT_RESERVED5_2M                              0xB
-#define NVM_CFG1_PORT_RESERVED5_4M                              0xC
-#define NVM_CFG1_PORT_RESERVED5_8M                              0xD
-#define NVM_CFG1_PORT_RESERVED5_16M                             0xE
-#define NVM_CFG1_PORT_RESERVED5_32M                             0xF
-#define NVM_CFG1_PORT_MBA_LINK_SPEED_MASK                       0x001E0000
-#define NVM_CFG1_PORT_MBA_LINK_SPEED_OFFSET                     17
-#define NVM_CFG1_PORT_MBA_LINK_SPEED_AUTONEG                    0x0
-#define NVM_CFG1_PORT_MBA_LINK_SPEED_1G                         0x1
-#define NVM_CFG1_PORT_MBA_LINK_SPEED_10G                        0x2
-#define NVM_CFG1_PORT_MBA_LINK_SPEED_25G                        0x4
-#define NVM_CFG1_PORT_MBA_LINK_SPEED_40G                        0x5
-#define NVM_CFG1_PORT_MBA_LINK_SPEED_50G                        0x6
-#define NVM_CFG1_PORT_MBA_LINK_SPEED_100G                       0x7
-#define NVM_CFG1_PORT_MBA_BOOT_RETRY_COUNT_MASK                 0x00E00000
-#define NVM_CFG1_PORT_MBA_BOOT_RETRY_COUNT_OFFSET               21
+#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_MASK                   0x001E0000
+#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_OFFSET                 17
+#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_AUTONEG                0x0
+#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_1G                     0x1
+#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_10G                    0x2
+#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_25G                    0x4
+#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_40G                    0x5
+#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_50G                    0x6
+#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_100G                   0x7
+#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_SMARTLINQ              0x8
+#define NVM_CFG1_PORT_RESERVED__M_MBA_BOOT_RETRY_COUNT_MASK     0x00E00000
+#define NVM_CFG1_PORT_RESERVED__M_MBA_BOOT_RETRY_COUNT_OFFSET   21
 
 	u32	mba_cfg2;					/* 0x2C */
-#define NVM_CFG1_PORT_MBA_VLAN_VALUE_MASK                       0x0000FFFF
-#define NVM_CFG1_PORT_MBA_VLAN_VALUE_OFFSET                     0
-#define NVM_CFG1_PORT_MBA_VLAN_MASK                             0x00010000
-#define NVM_CFG1_PORT_MBA_VLAN_OFFSET                           16
+#define NVM_CFG1_PORT_RESERVED65_MASK                           0x0000FFFF
+#define NVM_CFG1_PORT_RESERVED65_OFFSET                         0
+#define NVM_CFG1_PORT_RESERVED66_MASK                           0x00010000
+#define NVM_CFG1_PORT_RESERVED66_OFFSET                         16
 
 	u32	vf_cfg;						/* 0x30 */
 #define NVM_CFG1_PORT_RESERVED8_MASK                            0x0000FFFF
 #define NVM_CFG1_PORT_RESERVED8_OFFSET                          0
 #define NVM_CFG1_PORT_RESERVED6_MASK                            0x000F0000
 #define NVM_CFG1_PORT_RESERVED6_OFFSET                          16
-#define NVM_CFG1_PORT_RESERVED6_DISABLED                        0x0
-#define NVM_CFG1_PORT_RESERVED6_4K                              0x1
-#define NVM_CFG1_PORT_RESERVED6_8K                              0x2
-#define NVM_CFG1_PORT_RESERVED6_16K                             0x3
-#define NVM_CFG1_PORT_RESERVED6_32K                             0x4
-#define NVM_CFG1_PORT_RESERVED6_64K                             0x5
-#define NVM_CFG1_PORT_RESERVED6_128K                            0x6
-#define NVM_CFG1_PORT_RESERVED6_256K                            0x7
-#define NVM_CFG1_PORT_RESERVED6_512K                            0x8
-#define NVM_CFG1_PORT_RESERVED6_1M                              0x9
-#define NVM_CFG1_PORT_RESERVED6_2M                              0xA
-#define NVM_CFG1_PORT_RESERVED6_4M                              0xB
-#define NVM_CFG1_PORT_RESERVED6_8M                              0xC
-#define NVM_CFG1_PORT_RESERVED6_16M                             0xD
-#define NVM_CFG1_PORT_RESERVED6_32M                             0xE
-#define NVM_CFG1_PORT_RESERVED6_64M                             0xF
 
 	struct nvm_cfg_mac_address	lldp_mac_address;	/* 0x34 */
 
@@ -4973,18 +4725,16 @@ struct nvm_cfg1_func {
 	u32				device_id;		/* 0x10 */
 #define NVM_CFG1_FUNC_MF_VENDOR_DEVICE_ID_MASK                  0x0000FFFF
 #define NVM_CFG1_FUNC_MF_VENDOR_DEVICE_ID_OFFSET                0
-#define NVM_CFG1_FUNC_VENDOR_DEVICE_ID_MASK                     0xFFFF0000
-#define NVM_CFG1_FUNC_VENDOR_DEVICE_ID_OFFSET                   16
+#define NVM_CFG1_FUNC_RESERVED77_MASK                           0xFFFF0000
+#define NVM_CFG1_FUNC_RESERVED77_OFFSET                         16
 
 	u32				cmn_cfg;		/* 0x14 */
-#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_MASK                    0x00000007
-#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_OFFSET                  0
-#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_PXE                     0x0
-#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_RPL                     0x1
-#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_BOOTP                   0x2
-#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_ISCSI_BOOT              0x3
-#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_FCOE_BOOT               0x4
-#define NVM_CFG1_FUNC_MBA_BOOT_PROTOCOL_NONE                    0x7
+#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_MASK                0x00000007
+#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_OFFSET              0
+#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_PXE                 0x0
+#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_ISCSI_BOOT          0x3
+#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_FCOE_BOOT           0x4
+#define NVM_CFG1_FUNC_PREBOOT_BOOT_PROTOCOL_NONE                0x7
 #define NVM_CFG1_FUNC_VF_PCI_DEVICE_ID_MASK                     0x0007FFF8
 #define NVM_CFG1_FUNC_VF_PCI_DEVICE_ID_OFFSET                   3
 #define NVM_CFG1_FUNC_PERSONALITY_MASK                          0x00780000
@@ -5029,8 +4779,8 @@ struct nvm_cfg1_func {
 	struct nvm_cfg_mac_address	fcoe_node_wwn_mac_addr;	/* 0x1C */
 
 	struct nvm_cfg_mac_address	fcoe_port_wwn_mac_addr;	/* 0x24 */
-
-	u32				reserved[9];		/* 0x2C */
+	u32				preboot_generic_cfg;    /* 0x2C */
+	u32				reserved[8];            /* 0x30 */
 };
 
 struct nvm_cfg1 {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
index 0b21a553cc7d..f55ebdc3c832 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
@@ -513,17 +513,14 @@ static int qed_pf_rl_rt_init(struct qed_hwfn *p_hwfn,
  * Return -1 on error.
  */
 static int qed_vp_wfq_rt_init(struct qed_hwfn *p_hwfn,
-			      u8 start_vport,
 			      u8 num_vports,
 			      struct init_qm_vport_params *vport_params)
 {
-	u8 tc, i, vport_id;
 	u32 inc_val;
+	u8 tc, i;
 
 	/* go over all PF VPORTs */
-	for (i = 0, vport_id = start_vport; i < num_vports; i++, vport_id++) {
-		u32 temp = QM_REG_WFQVPUPPERBOUND_RT_OFFSET;
-		u16 *pq_ids = &vport_params[i].first_tx_pq_id[0];
+	for (i = 0; i < num_vports; i++) {
 
 		if (!vport_params[i].vport_wfq)
 			continue;
@@ -539,20 +536,16 @@ static int qed_vp_wfq_rt_init(struct qed_hwfn *p_hwfn,
 		 * different TCs
 		 */
 		for (tc = 0; tc < NUM_OF_TCS; tc++) {
-			u16 vport_pq_id = pq_ids[tc];
+			u16 vport_pq_id = vport_params[i].first_tx_pq_id[tc];
 
 			if (vport_pq_id != QM_INVALID_PQ_ID) {
-				STORE_RT_REG(p_hwfn,
-					     QM_REG_WFQVPWEIGHT_RT_OFFSET +
-					     vport_pq_id, inc_val);
-				STORE_RT_REG(p_hwfn, temp + vport_pq_id,
-					     QM_WFQ_UPPER_BOUND |
-					     QM_WFQ_CRD_REG_SIGN_BIT);
 				STORE_RT_REG(p_hwfn,
 					     QM_REG_WFQVPCRD_RT_OFFSET +
 					     vport_pq_id,
-					     QM_WFQ_INIT_CRD(inc_val) |
 					     QM_WFQ_CRD_REG_SIGN_BIT);
+				STORE_RT_REG(p_hwfn,
+					     QM_REG_WFQVPWEIGHT_RT_OFFSET +
+					     vport_pq_id, inc_val);
 			}
 		}
 	}
@@ -709,8 +702,7 @@ int qed_qm_pf_rt_init(struct qed_hwfn *p_hwfn,
 	if (qed_pf_rl_rt_init(p_hwfn, p_params->pf_id, p_params->pf_rl))
 		return -1;
 
-	if (qed_vp_wfq_rt_init(p_hwfn, p_params->start_vport,
-			       p_params->num_vports, vport_params))
+	if (qed_vp_wfq_rt_init(p_hwfn, p_params->num_vports, vport_params))
 		return -1;
 
 	if (qed_vport_rl_rt_init(p_hwfn, p_params->start_vport,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
index 796f1390e598..3269b3610e03 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
@@ -55,63 +55,98 @@ void qed_init_clear_rt_data(struct qed_hwfn *p_hwfn)
 	int i;
 
 	for (i = 0; i < RUNTIME_ARRAY_SIZE; i++)
-		p_hwfn->rt_data[i].b_valid = false;
+		p_hwfn->rt_data.b_valid[i] = false;
 }
 
 void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn,
 			   u32 rt_offset,
 			   u32 val)
 {
-	p_hwfn->rt_data[rt_offset].init_val = val;
-	p_hwfn->rt_data[rt_offset].b_valid = true;
+	p_hwfn->rt_data.init_val[rt_offset] = val;
+	p_hwfn->rt_data.b_valid[rt_offset] = true;
 }
 
 void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn,
-			   u32 rt_offset,
-			   u32 *val,
+			   u32 rt_offset, u32 *p_val,
 			   size_t size)
 {
 	size_t i;
 
 	for (i = 0; i < size / sizeof(u32); i++) {
-		p_hwfn->rt_data[rt_offset + i].init_val = val[i];
-		p_hwfn->rt_data[rt_offset + i].b_valid = true;
+		p_hwfn->rt_data.init_val[rt_offset + i] = p_val[i];
+		p_hwfn->rt_data.b_valid[rt_offset + i]	= true;
 	}
 }
 
-static void qed_init_rt(struct qed_hwfn *p_hwfn,
-			struct qed_ptt *p_ptt,
-			u32 addr,
-			u32 rt_offset,
-			u32 size)
+static int qed_init_rt(struct qed_hwfn	*p_hwfn,
+		       struct qed_ptt *p_ptt,
+		       u32 addr,
+		       u16 rt_offset,
+		       u16 size,
+		       bool b_must_dmae)
 {
-	struct qed_rt_data *rt_data = p_hwfn->rt_data + rt_offset;
-	u32 i;
+	u32 *p_init_val = &p_hwfn->rt_data.init_val[rt_offset];
+	bool *p_valid = &p_hwfn->rt_data.b_valid[rt_offset];
+	u16 i, segment;
+	int rc = 0;
 
+	/* Since not all RT entries are initialized, go over the RT and
+	 * for each segment of initialized values use DMA.
+	 */
 	for (i = 0; i < size; i++) {
-		if (!rt_data[i].b_valid)
+		if (!p_valid[i])
 			continue;
-		qed_wr(p_hwfn, p_ptt, addr + (i << 2), rt_data[i].init_val);
+
+		/* In case there isn't any wide-bus configuration here,
+		 * simply write the data instead of using dmae.
+		 */
+		if (!b_must_dmae) {
+			qed_wr(p_hwfn, p_ptt, addr + (i << 2),
+			       p_init_val[i]);
+			continue;
+		}
+
+		/* Start of a new segment */
+		for (segment = 1; i + segment < size; segment++)
+			if (!p_valid[i + segment])
+				break;
+
+		rc = qed_dmae_host2grc(p_hwfn, p_ptt,
+				       (uintptr_t)(p_init_val + i),
+				       addr + (i << 2), segment, 0);
+		if (rc != 0)
+			return rc;
+
+		/* Jump over the entire segment, including invalid entry */
+		i += segment;
 	}
+
+	return rc;
 }
 
 int qed_init_alloc(struct qed_hwfn *p_hwfn)
 {
-	struct qed_rt_data *rt_data;
+	struct qed_rt_data *rt_data = &p_hwfn->rt_data;
 
-	rt_data = kzalloc(sizeof(*rt_data) * RUNTIME_ARRAY_SIZE, GFP_ATOMIC);
-	if (!rt_data)
+	rt_data->b_valid = kzalloc(sizeof(bool) * RUNTIME_ARRAY_SIZE,
+				   GFP_KERNEL);
+	if (!rt_data->b_valid)
 		return -ENOMEM;
 
-	p_hwfn->rt_data = rt_data;
+	rt_data->init_val = kzalloc(sizeof(u32) * RUNTIME_ARRAY_SIZE,
+				    GFP_KERNEL);
+	if (!rt_data->init_val) {
+		kfree(rt_data->b_valid);
+		return -ENOMEM;
+	}
 
 	return 0;
 }
 
 void qed_init_free(struct qed_hwfn *p_hwfn)
 {
-	kfree(p_hwfn->rt_data);
-	p_hwfn->rt_data = NULL;
+	kfree(p_hwfn->rt_data.init_val);
+	kfree(p_hwfn->rt_data.b_valid);
 }
 
 static int qed_init_array_dmae(struct qed_hwfn *p_hwfn,
@@ -289,7 +324,8 @@ static int qed_init_cmd_wr(struct qed_hwfn *p_hwfn,
 	case INIT_SRC_RUNTIME:
 		qed_init_rt(p_hwfn, p_ptt, addr,
 			    le16_to_cpu(arg->runtime.offset),
-			    le16_to_cpu(arg->runtime.size));
+			    le16_to_cpu(arg->runtime.size),
+			    b_must_dmae);
 		break;
 	}
 
@@ -316,49 +352,50 @@ static void qed_init_cmd_rd(struct qed_hwfn *p_hwfn,
 			    struct qed_ptt *p_ptt,
 			    struct init_read_op *cmd)
 {
-	u32 data = le32_to_cpu(cmd->op_data);
-	u32 addr = GET_FIELD(data, INIT_READ_OP_ADDRESS) << 2;
+	bool (*comp_check)(u32 val, u32 expected_val);
+	u32 delay = QED_INIT_POLL_PERIOD_US, val;
+	u32 data, addr, poll;
+	int i;
+
+	data = le32_to_cpu(cmd->op_data);
+	addr = GET_FIELD(data, INIT_READ_OP_ADDRESS) << 2;
+	poll = GET_FIELD(data, INIT_READ_OP_POLL_TYPE);
 
-	bool	(*comp_check)(u32	val,
-			      u32	expected_val);
-	u32	delay = QED_INIT_POLL_PERIOD_US, val;
 
 	val = qed_rd(p_hwfn, p_ptt, addr);
 
-	data = le32_to_cpu(cmd->op_data);
-	if (GET_FIELD(data, INIT_READ_OP_POLL)) {
-		int i;
+	if (poll == INIT_POLL_NONE)
+		return;
 
-		switch (GET_FIELD(data, INIT_READ_OP_POLL_COMP)) {
-		case INIT_COMPARISON_EQ:
-			comp_check = comp_eq;
-			break;
-		case INIT_COMPARISON_OR:
-			comp_check = comp_or;
-			break;
-		case INIT_COMPARISON_AND:
-			comp_check = comp_and;
-			break;
-		default:
-			comp_check = NULL;
-			DP_ERR(p_hwfn, "Invalid poll comparison type %08x\n",
-			       data);
-			return;
-		}
+	switch (poll) {
+	case INIT_POLL_EQ:
+		comp_check = comp_eq;
+		break;
+	case INIT_POLL_OR:
+		comp_check = comp_or;
+		break;
+	case INIT_POLL_AND:
+		comp_check = comp_and;
+		break;
+	default:
+		DP_ERR(p_hwfn, "Invalid poll comparison type %08x\n",
+		       cmd->op_data);
+		return;
+	}
 
-		for (i = 0;
-		     i < QED_INIT_MAX_POLL_COUNT &&
-		     !comp_check(val, le32_to_cpu(cmd->expected_val));
-		     i++) {
-			udelay(delay);
-			val = qed_rd(p_hwfn, p_ptt, addr);
-		}
+	data = le32_to_cpu(cmd->expected_val);
+	for (i = 0;
+	     i < QED_INIT_MAX_POLL_COUNT && !comp_check(val, data);
+	     i++) {
+		udelay(delay);
+		val = qed_rd(p_hwfn, p_ptt, addr);
+	}
 
-		if (i == QED_INIT_MAX_POLL_COUNT)
-			DP_ERR(p_hwfn,
-			       "Timeout when polling reg: 0x%08x [ Waiting-for: %08x Got: %08x (comparsion %08x)]\n",
-			       addr, le32_to_cpu(cmd->expected_val),
-			       val, data);
+	if (i == QED_INIT_MAX_POLL_COUNT) {
+		DP_ERR(p_hwfn,
+		       "Timeout when polling reg: 0x%08x [ Waiting-for: %08x Got: %08x (comparsion %08x)]\n",
+		       addr, le32_to_cpu(cmd->expected_val),
+		       val, le32_to_cpu(cmd->op_data));
 	}
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index f72036a2ef5b..978d07a61bbf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -714,7 +714,6 @@ qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
 	p_ramrod->sb_id			= cpu_to_le16(p_params->sb);
 	p_ramrod->sb_index		= p_params->sb_idx;
 	p_ramrod->stats_counter_id	= stats_id;
-	p_ramrod->tc			= p_pq_params->eth.tc;
 
 	p_ramrod->pbl_size		= cpu_to_le16(pbl_size);
 	p_ramrod->pbl_base_addr.hi	= DMA_HI_LE(pbl_addr);
@@ -821,9 +820,8 @@ qed_filter_action(enum qed_filter_opcode opcode)
 	case QED_FILTER_REMOVE:
 		action = ETH_FILTER_ACTION_REMOVE;
 		break;
-	case QED_FILTER_REPLACE:
 	case QED_FILTER_FLUSH:
-		action = ETH_FILTER_ACTION_REPLACE;
+		action = ETH_FILTER_ACTION_REMOVE_ALL;
 		break;
 	default:
 		action = MAX_ETH_FILTER_ACTION;
@@ -892,8 +890,7 @@ qed_filter_ucast_common(struct qed_hwfn *p_hwfn,
 	p_ramrod->filter_cmd_hdr.tx = p_filter_cmd->is_tx_filter ? 1 : 0;
 
 	switch (p_filter_cmd->opcode) {
-	case QED_FILTER_FLUSH:
-		p_ramrod->filter_cmd_hdr.cmd_cnt = 0; break;
+	case QED_FILTER_REPLACE:
 	case QED_FILTER_MOVE:
 		p_ramrod->filter_cmd_hdr.cmd_cnt = 2; break;
 	default:
@@ -962,6 +959,12 @@ qed_filter_ucast_common(struct qed_hwfn *p_hwfn,
 
 		p_second_filter->action		= ETH_FILTER_ACTION_ADD;
 		p_second_filter->vport_id	= vport_to_add_to;
+	} else if (p_filter_cmd->opcode == QED_FILTER_REPLACE) {
+		p_first_filter->vport_id = vport_to_add_to;
+		memcpy(p_second_filter, p_first_filter,
+		       sizeof(*p_second_filter));
+		p_first_filter->action	= ETH_FILTER_ACTION_REMOVE_ALL;
+		p_second_filter->action = ETH_FILTER_ACTION_ADD;
 	} else {
 		action = qed_filter_action(p_filter_cmd->opcode);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 9d76ce249277..593f8871adb6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -190,7 +190,7 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 	dev_info->pci_mem_start = cdev->pci_params.mem_start;
 	dev_info->pci_mem_end = cdev->pci_params.mem_end;
 	dev_info->pci_irq = cdev->pci_params.irq;
-	dev_info->is_mf = IS_MF(&cdev->hwfns[0]);
+	dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]);
 	ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr);
 
 	dev_info->fw_major = FW_MAJOR_VERSION;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index ba1b1f1ef789..1457e30faccf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -720,26 +720,25 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
 		return -EINVAL;
 	}
 
-	if (p_hwfn->cdev->mf_mode != SF) {
-		info->bandwidth_min = (shmem_info.config &
-				       FUNC_MF_CFG_MIN_BW_MASK) >>
-				      FUNC_MF_CFG_MIN_BW_SHIFT;
-		if (info->bandwidth_min < 1 || info->bandwidth_min > 100) {
-			DP_INFO(p_hwfn,
-				"bandwidth minimum out of bounds [%02x]. Set to 1\n",
-				info->bandwidth_min);
-			info->bandwidth_min = 1;
-		}
 
-		info->bandwidth_max = (shmem_info.config &
-				       FUNC_MF_CFG_MAX_BW_MASK) >>
-				      FUNC_MF_CFG_MAX_BW_SHIFT;
-		if (info->bandwidth_max < 1 || info->bandwidth_max > 100) {
-			DP_INFO(p_hwfn,
-				"bandwidth maximum out of bounds [%02x]. Set to 100\n",
-				info->bandwidth_max);
-			info->bandwidth_max = 100;
-		}
+	info->bandwidth_min = (shmem_info.config &
+			       FUNC_MF_CFG_MIN_BW_MASK) >>
+			      FUNC_MF_CFG_MIN_BW_SHIFT;
+	if (info->bandwidth_min < 1 || info->bandwidth_min > 100) {
+		DP_INFO(p_hwfn,
+			"bandwidth minimum out of bounds [%02x]. Set to 1\n",
+			info->bandwidth_min);
+		info->bandwidth_min = 1;
+	}
+
+	info->bandwidth_max = (shmem_info.config &
+			       FUNC_MF_CFG_MAX_BW_MASK) >>
+			      FUNC_MF_CFG_MAX_BW_SHIFT;
+	if (info->bandwidth_max < 1 || info->bandwidth_max > 100) {
+		DP_INFO(p_hwfn,
+			"bandwidth maximum out of bounds [%02x]. Set to 100\n",
+			info->bandwidth_max);
+		info->bandwidth_max = 100;
 	}
 
 	if (shmem_info.mac_upper || shmem_info.mac_lower) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index 287fadfab52d..8a83609c443c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -343,7 +343,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
  */
 
 int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
-		    enum mf_mode mode);
+		    enum qed_mf_mode mode);
 
 /**
  * @brief qed_sp_pf_stop - PF Function Stop Ramrod
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 6f7879136633..33090f63548c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -90,7 +90,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 }
 
 int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
-		    enum mf_mode mode)
+		    enum qed_mf_mode mode)
 {
 	struct qed_sp_init_request_params params;
 	struct pf_start_ramrod_data *p_ramrod = NULL;
@@ -125,6 +125,18 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->dont_log_ramrods	= 0;
 	p_ramrod->log_type_mask		= cpu_to_le16(0xf);
 	p_ramrod->mf_mode = mode;
+	switch (mode) {
+	case QED_MF_DEFAULT:
+	case QED_MF_NPAR:
+		p_ramrod->mf_mode = MF_NPAR;
+		break;
+	case QED_MF_OVLAN:
+		p_ramrod->mf_mode = MF_OVLAN;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Unsupported MF mode, init as DEFAULT\n");
+		p_ramrod->mf_mode = MF_NPAR;
+	}
 	p_ramrod->outer_tag = p_hwfn->hw_info.ovlan;
 
 	/* Place EQ address in RAMROD */
@@ -142,9 +154,8 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 	p_hwfn->hw_info.personality = PERSONALITY_ETH;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
-		   "Setting event_ring_sb [id %04x index %02x], mf [%s] outer_tag [%d]\n",
+		   "Setting event_ring_sb [id %04x index %02x], outer_tag [%d]\n",
 		   sb, sb_index,
-		   (p_ramrod->mf_mode == SF) ? "SF" : "Multi-Pf",
 		   p_ramrod->outer_tag);
 
 	return qed_spq_post(p_hwfn, p_ent, NULL);
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 7c6caf7f6612..f75d9e0676ce 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -173,9 +173,9 @@ enum QEDE_STATE {
  * skb are built only after the frame was DMA-ed.
  */
 struct sw_rx_data {
-	u8 *data;
-
-	DEFINE_DMA_UNMAP_ADDR(mapping);
+	struct page *data;
+	dma_addr_t mapping;
+	unsigned int page_offset;
 };
 
 struct qede_rx_queue {
@@ -188,6 +188,7 @@ struct qede_rx_queue {
 	void __iomem		*hw_rxq_prod_addr;
 
 	int			rx_buf_size;
+	unsigned int		rx_buf_seg_size;
 
 	u16			num_rx_buffers;
 	u16			rxq_id;
@@ -281,6 +282,7 @@ void qede_fill_by_demand_stats(struct qede_dev *edev);
 #define NUM_TX_BDS_MIN		128
 #define NUM_TX_BDS_DEF		NUM_TX_BDS_MAX
 
+#define QEDE_RX_HDR_SIZE	256
 #define	for_each_rss(i) for (i = 0; i < edev->num_rss; i++)
 
 #endif /* _QEDE_H_ */
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index e442b85c9a5e..c49dc10ce151 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -217,9 +217,9 @@ static int qede_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	struct qed_link_params params;
 	u32 speed;
 
-	if (edev->dev_info.common.is_mf) {
+	if (!edev->dev_info.common.is_mf_default) {
 		DP_INFO(edev,
-			"Link parameters can not be changed in MF mode\n");
+			"Link parameters can not be changed in non-default mode\n");
 		return -EOPNOTSUPP;
 	}
 
@@ -428,7 +428,7 @@ static int qede_set_pauseparam(struct net_device *dev,
 	struct qed_link_params params;
 	struct qed_link_output current_link;
 
-	if (!edev->dev_info.common.is_mf) {
+	if (!edev->dev_info.common.is_mf_default) {
 		DP_INFO(edev,
 			"Pause parameters can not be updated in non-default mode\n");
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 6237f10b5119..f50e0bd7fb2c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -330,15 +330,15 @@ static void qede_set_params_for_ipv6_ext(struct sk_buff *skb,
 					 struct eth_tx_3rd_bd *third_bd)
 {
 	u8 l4_proto;
-	u16 bd2_bits = 0, bd2_bits2 = 0;
+	u16 bd2_bits1 = 0, bd2_bits2 = 0;
 
-	bd2_bits2 |= (1 << ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT);
+	bd2_bits1 |= (1 << ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT);
 
-	bd2_bits |= ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) &
+	bd2_bits2 |= ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) &
 		     ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK)
 		    << ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT;
 
-	bd2_bits2 |= (ETH_L4_PSEUDO_CSUM_CORRECT_LENGTH <<
+	bd2_bits1 |= (ETH_L4_PSEUDO_CSUM_CORRECT_LENGTH <<
 		      ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT);
 
 	if (vlan_get_protocol(skb) == htons(ETH_P_IPV6))
@@ -347,16 +347,15 @@ static void qede_set_params_for_ipv6_ext(struct sk_buff *skb,
 		l4_proto = ip_hdr(skb)->protocol;
 
 	if (l4_proto == IPPROTO_UDP)
-		bd2_bits2 |= 1 << ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT;
+		bd2_bits1 |= 1 << ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT;
 
-	if (third_bd) {
+	if (third_bd)
 		third_bd->data.bitfields |=
-			((tcp_hdrlen(skb) / 4) &
-			 ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_MASK) <<
-			ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_SHIFT;
-	}
+			cpu_to_le16(((tcp_hdrlen(skb) / 4) &
+				ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_MASK) <<
+				ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_SHIFT);
 
-	second_bd->data.bitfields = cpu_to_le16(bd2_bits);
+	second_bd->data.bitfields1 = cpu_to_le16(bd2_bits1);
 	second_bd->data.bitfields2 = cpu_to_le16(bd2_bits2);
 }
 
@@ -464,12 +463,16 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 
 	/* Fill the parsing flags & params according to the requested offload */
 	if (xmit_type & XMIT_L4_CSUM) {
+		u16 temp = 1 << ETH_TX_DATA_1ST_BD_TUNN_CFG_OVERRIDE_SHIFT;
+
 		/* We don't re-calculate IP checksum as it is already done by
 		 * the upper stack
 		 */
 		first_bd->data.bd_flags.bitfields |=
 			1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT;
 
+		first_bd->data.bitfields |= cpu_to_le16(temp);
+
 		/* If the packet is IPv6 with extension header, indicate that
 		 * to FW and pass few params, since the device cracker doesn't
 		 * support parsing IPv6 with extension header/s.
@@ -491,7 +494,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 
 		/* @@@TBD - if will not be removed need to check */
 		third_bd->data.bitfields |=
-			(1 << ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT);
+			cpu_to_le16((1 << ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT));
 
 		/* Make life easier for FW guys who can't deal with header and
 		 * data on same BD. If we need to split, use the second bd...
@@ -719,26 +722,52 @@ static bool qede_has_tx_work(struct qede_fastpath *fp)
 	return false;
 }
 
-/* This function copies the Rx buffer from the CONS position to the PROD
- * position, since we failed to allocate a new Rx buffer.
+/* This function reuses the buffer(from an offset) from
+ * consumer index to producer index in the bd ring
  */
-static void qede_reuse_rx_data(struct qede_rx_queue *rxq)
+static inline void qede_reuse_page(struct qede_dev *edev,
+				   struct qede_rx_queue *rxq,
+				   struct sw_rx_data *curr_cons)
 {
-	struct eth_rx_bd *rx_bd_cons = qed_chain_consume(&rxq->rx_bd_ring);
 	struct eth_rx_bd *rx_bd_prod = qed_chain_produce(&rxq->rx_bd_ring);
-	struct sw_rx_data *sw_rx_data_cons =
-		&rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS_MAX];
-	struct sw_rx_data *sw_rx_data_prod =
-		&rxq->sw_rx_ring[rxq->sw_rx_prod & NUM_RX_BDS_MAX];
+	struct sw_rx_data *curr_prod;
+	dma_addr_t new_mapping;
 
-	dma_unmap_addr_set(sw_rx_data_prod, mapping,
-			   dma_unmap_addr(sw_rx_data_cons, mapping));
+	curr_prod = &rxq->sw_rx_ring[rxq->sw_rx_prod & NUM_RX_BDS_MAX];
+	*curr_prod = *curr_cons;
 
-	sw_rx_data_prod->data = sw_rx_data_cons->data;
-	memcpy(rx_bd_prod, rx_bd_cons, sizeof(struct eth_rx_bd));
+	new_mapping = curr_prod->mapping + curr_prod->page_offset;
+
+	rx_bd_prod->addr.hi = cpu_to_le32(upper_32_bits(new_mapping));
+	rx_bd_prod->addr.lo = cpu_to_le32(lower_32_bits(new_mapping));
 
-	rxq->sw_rx_cons++;
 	rxq->sw_rx_prod++;
+	curr_cons->data = NULL;
+}
+
+static inline int qede_realloc_rx_buffer(struct qede_dev *edev,
+					 struct qede_rx_queue *rxq,
+					 struct sw_rx_data *curr_cons)
+{
+	/* Move to the next segment in the page */
+	curr_cons->page_offset += rxq->rx_buf_seg_size;
+
+	if (curr_cons->page_offset == PAGE_SIZE) {
+		if (unlikely(qede_alloc_rx_buffer(edev, rxq)))
+			return -ENOMEM;
+
+		dma_unmap_page(&edev->pdev->dev, curr_cons->mapping,
+			       PAGE_SIZE, DMA_FROM_DEVICE);
+	} else {
+		/* Increment refcount of the page as we don't want
+		 * network stack to take the ownership of the page
+		 * which can be recycled multiple times by the driver.
+		 */
+		atomic_inc(&curr_cons->data->_count);
+		qede_reuse_page(edev, rxq, curr_cons);
+	}
+
+	return 0;
 }
 
 static inline void qede_update_rx_prod(struct qede_dev *edev,
@@ -857,9 +886,10 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
 		struct sw_rx_data *sw_rx_data;
 		union eth_rx_cqe *cqe;
 		struct sk_buff *skb;
+		struct page *data;
+		__le16 flags;
 		u16 len, pad;
 		u32 rx_hash;
-		u8 *data;
 
 		/* Get the CQE from the completion ring */
 		cqe = (union eth_rx_cqe *)
@@ -879,56 +909,110 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
 		data = sw_rx_data->data;
 
 		fp_cqe = &cqe->fast_path_regular;
-		len =  le16_to_cpu(fp_cqe->pkt_len);
+		len =  le16_to_cpu(fp_cqe->len_on_first_bd);
 		pad = fp_cqe->placement_offset;
+		flags = cqe->fast_path_regular.pars_flags.flags;
 
-		/* For every Rx BD consumed, we allocate a new BD so the BD ring
-		 * is always with a fixed size. If allocation fails, we take the
-		 * consumed BD and return it to the ring in the PROD position.
-		 * The packet that was received on that BD will be dropped (and
-		 * not passed to the upper stack).
-		 */
-		if (likely(qede_alloc_rx_buffer(edev, rxq) == 0)) {
-			dma_unmap_single(&edev->pdev->dev,
-					 dma_unmap_addr(sw_rx_data, mapping),
-					 rxq->rx_buf_size, DMA_FROM_DEVICE);
-
-			/* If this is an error packet then drop it */
-			parse_flag =
-			le16_to_cpu(cqe->fast_path_regular.pars_flags.flags);
-			csum_flag = qede_check_csum(parse_flag);
-			if (csum_flag == QEDE_CSUM_ERROR) {
-				DP_NOTICE(edev,
-					  "CQE in CONS = %u has error, flags = %x, dropping incoming packet\n",
-					  sw_comp_cons, parse_flag);
-				rxq->rx_hw_errors++;
-				kfree(data);
-				goto next_rx;
-			}
-
-			skb = build_skb(data, 0);
-
-			if (unlikely(!skb)) {
-				DP_NOTICE(edev,
-					  "Build_skb failed, dropping incoming packet\n");
-				kfree(data);
-				rxq->rx_alloc_errors++;
-				goto next_rx;
-			}
+		/* If this is an error packet then drop it */
+		parse_flag = le16_to_cpu(flags);
 
-			skb_reserve(skb, pad);
+		csum_flag = qede_check_csum(parse_flag);
+		if (unlikely(csum_flag == QEDE_CSUM_ERROR)) {
+			DP_NOTICE(edev,
+				  "CQE in CONS = %u has error, flags = %x, dropping incoming packet\n",
+				  sw_comp_cons, parse_flag);
+			rxq->rx_hw_errors++;
+			qede_reuse_page(edev, rxq, sw_rx_data);
+			goto next_rx;
+		}
 
-		} else {
+		skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE);
+		if (unlikely(!skb)) {
 			DP_NOTICE(edev,
-				  "New buffer allocation failed, dropping incoming packet and reusing its buffer\n");
-			qede_reuse_rx_data(rxq);
+				  "Build_skb failed, dropping incoming packet\n");
+			qede_reuse_page(edev, rxq, sw_rx_data);
 			rxq->rx_alloc_errors++;
-			goto next_cqe;
+			goto next_rx;
 		}
 
-		sw_rx_data->data = NULL;
+		/* Copy data into SKB */
+		if (len + pad <= QEDE_RX_HDR_SIZE) {
+			memcpy(skb_put(skb, len),
+			       page_address(data) + pad +
+				sw_rx_data->page_offset, len);
+			qede_reuse_page(edev, rxq, sw_rx_data);
+		} else {
+			struct skb_frag_struct *frag;
+			unsigned int pull_len;
+			unsigned char *va;
 
-		skb_put(skb, len);
+			frag = &skb_shinfo(skb)->frags[0];
+
+			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, data,
+					pad + sw_rx_data->page_offset,
+					len, rxq->rx_buf_seg_size);
+
+			va = skb_frag_address(frag);
+			pull_len = eth_get_headlen(va, QEDE_RX_HDR_SIZE);
+
+			/* Align the pull_len to optimize memcpy */
+			memcpy(skb->data, va, ALIGN(pull_len, sizeof(long)));
+
+			skb_frag_size_sub(frag, pull_len);
+			frag->page_offset += pull_len;
+			skb->data_len -= pull_len;
+			skb->tail += pull_len;
+
+			if (unlikely(qede_realloc_rx_buffer(edev, rxq,
+							    sw_rx_data))) {
+				DP_ERR(edev, "Failed to allocate rx buffer\n");
+				rxq->rx_alloc_errors++;
+				goto next_cqe;
+			}
+		}
+
+		if (fp_cqe->bd_num != 1) {
+			u16 pkt_len = le16_to_cpu(fp_cqe->pkt_len);
+			u8 num_frags;
+
+			pkt_len -= len;
+
+			for (num_frags = fp_cqe->bd_num - 1; num_frags > 0;
+			     num_frags--) {
+				u16 cur_size = pkt_len > rxq->rx_buf_size ?
+						rxq->rx_buf_size : pkt_len;
+
+				WARN_ONCE(!cur_size,
+					  "Still got %d BDs for mapping jumbo, but length became 0\n",
+					  num_frags);
+
+				if (unlikely(qede_alloc_rx_buffer(edev, rxq)))
+					goto next_cqe;
+
+				rxq->sw_rx_cons++;
+				sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX;
+				sw_rx_data = &rxq->sw_rx_ring[sw_rx_index];
+				qed_chain_consume(&rxq->rx_bd_ring);
+				dma_unmap_page(&edev->pdev->dev,
+					       sw_rx_data->mapping,
+					       PAGE_SIZE, DMA_FROM_DEVICE);
+
+				skb_fill_page_desc(skb,
+						   skb_shinfo(skb)->nr_frags++,
+						   sw_rx_data->data, 0,
+						   cur_size);
+
+				skb->truesize += PAGE_SIZE;
+				skb->data_len += cur_size;
+				skb->len += cur_size;
+				pkt_len -= cur_size;
+			}
+
+			if (pkt_len)
+				DP_ERR(edev,
+				       "Mapped all BDs of jumbo, but still have %d bytes\n",
+				       pkt_len);
+		}
 
 		skb->protocol = eth_type_trans(skb, edev->ndev);
 
@@ -1566,17 +1650,17 @@ static void qede_free_rx_buffers(struct qede_dev *edev,
 
 	for (i = rxq->sw_rx_cons; i != rxq->sw_rx_prod; i++) {
 		struct sw_rx_data *rx_buf;
-		u8 *data;
+		struct page *data;
 
 		rx_buf = &rxq->sw_rx_ring[i & NUM_RX_BDS_MAX];
 		data = rx_buf->data;
 
-		dma_unmap_single(&edev->pdev->dev,
-				 dma_unmap_addr(rx_buf, mapping),
-				 rxq->rx_buf_size, DMA_FROM_DEVICE);
+		dma_unmap_page(&edev->pdev->dev,
+			       rx_buf->mapping,
+			       PAGE_SIZE, DMA_FROM_DEVICE);
 
 		rx_buf->data = NULL;
-		kfree(data);
+		__free_page(data);
 	}
 }
 
@@ -1600,29 +1684,32 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev,
 	struct sw_rx_data *sw_rx_data;
 	struct eth_rx_bd *rx_bd;
 	dma_addr_t mapping;
+	struct page *data;
 	u16 rx_buf_size;
-	u8 *data;
 
 	rx_buf_size = rxq->rx_buf_size;
 
-	data = kmalloc(rx_buf_size, GFP_ATOMIC);
+	data = alloc_pages(GFP_ATOMIC, 0);
 	if (unlikely(!data)) {
-		DP_NOTICE(edev, "Failed to allocate Rx data\n");
+		DP_NOTICE(edev, "Failed to allocate Rx data [page]\n");
 		return -ENOMEM;
 	}
 
-	mapping = dma_map_single(&edev->pdev->dev, data,
-				 rx_buf_size, DMA_FROM_DEVICE);
+	/* Map the entire page as it would be used
+	 * for multiple RX buffer segment size mapping.
+	 */
+	mapping = dma_map_page(&edev->pdev->dev, data, 0,
+			       PAGE_SIZE, DMA_FROM_DEVICE);
 	if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
-		kfree(data);
+		__free_page(data);
 		DP_NOTICE(edev, "Failed to map Rx buffer\n");
 		return -ENOMEM;
 	}
 
 	sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_prod & NUM_RX_BDS_MAX];
+	sw_rx_data->page_offset = 0;
 	sw_rx_data->data = data;
-
-	dma_unmap_addr_set(sw_rx_data, mapping, mapping);
+	sw_rx_data->mapping = mapping;
 
 	/* Advance PROD and get BD pointer */
 	rx_bd = (struct eth_rx_bd *)qed_chain_produce(&rxq->rx_bd_ring);
@@ -1643,13 +1730,16 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev,
 
 	rxq->num_rx_buffers = edev->q_num_rx_buffers;
 
-	rxq->rx_buf_size = NET_IP_ALIGN +
-			   ETH_OVERHEAD +
-			   edev->ndev->mtu +
-			   QEDE_FW_RX_ALIGN_END;
+	rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD +
+			   edev->ndev->mtu;
+	if (rxq->rx_buf_size > PAGE_SIZE)
+		rxq->rx_buf_size = PAGE_SIZE;
+
+	/* Segment size to spilt a page in multiple equal parts */
+	rxq->rx_buf_seg_size = roundup_pow_of_two(rxq->rx_buf_size);
 
 	/* Allocate the parallel driver ring for Rx buffers */
-	size = sizeof(*rxq->sw_rx_ring) * NUM_RX_BDS_MAX;
+	size = sizeof(*rxq->sw_rx_ring) * RX_RING_SIZE;
 	rxq->sw_rx_ring = kzalloc(size, GFP_KERNEL);
 	if (!rxq->sw_rx_ring) {
 		DP_ERR(edev, "Rx buffers ring allocation failed\n");
@@ -1660,7 +1750,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev,
 	rc = edev->ops->common->chain_alloc(edev->cdev,
 					    QED_CHAIN_USE_TO_CONSUME_PRODUCE,
 					    QED_CHAIN_MODE_NEXT_PTR,
-					    NUM_RX_BDS_MAX,
+					    RX_RING_SIZE,
 					    sizeof(struct eth_rx_bd),
 					    &rxq->rx_bd_ring);
 
@@ -1671,7 +1761,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev,
 	rc = edev->ops->common->chain_alloc(edev->cdev,
 					    QED_CHAIN_USE_TO_CONSUME,
 					    QED_CHAIN_MODE_PBL,
-					    NUM_RX_BDS_MAX,
+					    RX_RING_SIZE,
 					    sizeof(union eth_rx_cqe),
 					    &rxq->rx_comp_ring);
 	if (rc)
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 1d1ba2c5ee7a..53ecb37ae563 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -11,9 +11,11 @@
 
 #define CORE_SPQE_PAGE_SIZE_BYTES                       4096
 
+#define X_FINAL_CLEANUP_AGG_INT 1
+
 #define FW_MAJOR_VERSION	8
-#define FW_MINOR_VERSION	4
-#define FW_REVISION_VERSION	2
+#define FW_MINOR_VERSION	7
+#define FW_REVISION_VERSION	3
 #define FW_ENGINEERING_VERSION	0
 
 /***********************/
@@ -152,6 +154,9 @@
 /* number of queues in a PF queue group */
 #define QM_PF_QUEUE_GROUP_SIZE	8
 
+/* the size of a single queue element in bytes */
+#define QM_PQ_ELEMENT_SIZE                      4
+
 /* base number of Tx PQs in the CM PQ representation.
  * should be used when storing PQ IDs in CM PQ registers and context
  */
@@ -285,6 +290,16 @@
 #define PXP_NUM_ILT_RECORDS_K2 11000
 #define MAX_NUM_ILT_RECORDS MAX(PXP_NUM_ILT_RECORDS_BB, PXP_NUM_ILT_RECORDS_K2)
 
+#define SDM_COMP_TYPE_NONE              0
+#define SDM_COMP_TYPE_WAKE_THREAD       1
+#define SDM_COMP_TYPE_AGG_INT           2
+#define SDM_COMP_TYPE_CM                3
+#define SDM_COMP_TYPE_LOADER            4
+#define SDM_COMP_TYPE_PXP               5
+#define SDM_COMP_TYPE_INDICATE_ERROR    6
+#define SDM_COMP_TYPE_RELEASE_THREAD    7
+#define SDM_COMP_TYPE_RAM               8
+
 /******************/
 /* PBF CONSTANTS  */
 /******************/
@@ -335,7 +350,7 @@ struct event_ring_entry {
 
 /* Multi function mode */
 enum mf_mode {
-	SF,
+	ERROR_MODE /* Unsupported mode */,
 	MF_OVLAN,
 	MF_NPAR,
 	MAX_MF_MODE
@@ -606,4 +621,19 @@ struct status_block {
 #define STATUS_BLOCK_ZERO_PAD3_SHIFT  24
 };
 
+struct tunnel_parsing_flags {
+	u8 flags;
+#define TUNNEL_PARSING_FLAGS_TYPE_MASK              0x3
+#define TUNNEL_PARSING_FLAGS_TYPE_SHIFT             0
+#define TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_MASK  0x1
+#define TUNNEL_PARSING_FLAGS_TENNANT_ID_EXIST_SHIFT 2
+#define TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_MASK     0x3
+#define TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_SHIFT    3
+#define TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_MASK   0x1
+#define TUNNEL_PARSING_FLAGS_FIRSTHDRIPMATCH_SHIFT  5
+#define TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_MASK     0x1
+#define TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_SHIFT    6
+#define TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_MASK      0x1
+#define TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_SHIFT     7
+};
 #endif /* __COMMON_HSI__ */
diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h
index 320b3373ac1d..092cb0c1afcb 100644
--- a/include/linux/qed/eth_common.h
+++ b/include/linux/qed/eth_common.h
@@ -17,10 +17,8 @@
 #define ETH_MAX_RAMROD_PER_CON                          8
 #define ETH_TX_BD_PAGE_SIZE_BYTES                       4096
 #define ETH_RX_BD_PAGE_SIZE_BYTES                       4096
-#define ETH_RX_SGE_PAGE_SIZE_BYTES                      4096
 #define ETH_RX_CQE_PAGE_SIZE_BYTES                      4096
 #define ETH_RX_NUM_NEXT_PAGE_BDS                        2
-#define ETH_RX_NUM_NEXT_PAGE_SGES                       2
 
 #define ETH_TX_MIN_BDS_PER_NON_LSO_PKT                          1
 #define ETH_TX_MAX_BDS_PER_NON_LSO_PACKET                       18
@@ -34,7 +32,8 @@
 
 #define ETH_NUM_STATISTIC_COUNTERS                      MAX_NUM_VPORTS
 
-#define ETH_REG_CQE_PBL_SIZE                3
+/* Maximum number of buffers, used for RX packet placement */
+#define ETH_RX_MAX_BUFF_PER_PKT             5
 
 /* num of MAC/VLAN filters */
 #define ETH_NUM_MAC_FILTERS                                     512
@@ -54,9 +53,9 @@
 
 /* TPA constants */
 #define ETH_TPA_MAX_AGGS_NUM              64
-#define ETH_TPA_CQE_START_SGL_SIZE        3
-#define ETH_TPA_CQE_CONT_SGL_SIZE         6
-#define ETH_TPA_CQE_END_SGL_SIZE          4
+#define ETH_TPA_CQE_START_LEN_LIST_SIZE   ETH_RX_MAX_BUFF_PER_PKT
+#define ETH_TPA_CQE_CONT_LEN_LIST_SIZE    6
+#define ETH_TPA_CQE_END_LEN_LIST_SIZE     4
 
 /* Queue Zone sizes */
 #define TSTORM_QZONE_SIZE    0
@@ -74,18 +73,18 @@ struct coalescing_timeset {
 
 struct eth_tx_1st_bd_flags {
 	u8 bitfields;
+#define ETH_TX_1ST_BD_FLAGS_START_BD_MASK         0x1
+#define ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT        0
 #define ETH_TX_1ST_BD_FLAGS_FORCE_VLAN_MODE_MASK  0x1
-#define ETH_TX_1ST_BD_FLAGS_FORCE_VLAN_MODE_SHIFT 0
+#define ETH_TX_1ST_BD_FLAGS_FORCE_VLAN_MODE_SHIFT 1
 #define ETH_TX_1ST_BD_FLAGS_IP_CSUM_MASK          0x1
-#define ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT         1
+#define ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT         2
 #define ETH_TX_1ST_BD_FLAGS_L4_CSUM_MASK          0x1
-#define ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT         2
+#define ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT         3
 #define ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_MASK   0x1
-#define ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT  3
+#define ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT  4
 #define ETH_TX_1ST_BD_FLAGS_LSO_MASK              0x1
-#define ETH_TX_1ST_BD_FLAGS_LSO_SHIFT             4
-#define ETH_TX_1ST_BD_FLAGS_START_BD_MASK         0x1
-#define ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT        5
+#define ETH_TX_1ST_BD_FLAGS_LSO_SHIFT             5
 #define ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_MASK     0x1
 #define ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT    6
 #define ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK     0x1
@@ -97,38 +96,44 @@ struct eth_tx_data_1st_bd {
 	__le16				vlan;
 	u8				nbds;
 	struct eth_tx_1st_bd_flags	bd_flags;
-	__le16				fw_use_only;
+	__le16				bitfields;
+#define ETH_TX_DATA_1ST_BD_TUNN_CFG_OVERRIDE_MASK  0x1
+#define ETH_TX_DATA_1ST_BD_TUNN_CFG_OVERRIDE_SHIFT 0
+#define ETH_TX_DATA_1ST_BD_RESERVED0_MASK          0x1
+#define ETH_TX_DATA_1ST_BD_RESERVED0_SHIFT         1
+#define ETH_TX_DATA_1ST_BD_FW_USE_ONLY_MASK        0x3FFF
+#define ETH_TX_DATA_1ST_BD_FW_USE_ONLY_SHIFT       2
 };
 
 /* The parsing information data for the second tx bd of a given packet. */
 struct eth_tx_data_2nd_bd {
 	__le16	tunn_ip_size;
-	__le16	bitfields;
-#define ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK     0x1FFF
-#define ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT    0
-#define ETH_TX_DATA_2ND_BD_RESERVED0_MASK                 0x7
-#define ETH_TX_DATA_2ND_BD_RESERVED0_SHIFT                13
-	__le16	bitfields2;
+	__le16	bitfields1;
 #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK  0xF
 #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_SHIFT 0
 #define ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_MASK       0x3
 #define ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_SHIFT      4
 #define ETH_TX_DATA_2ND_BD_DEST_PORT_MODE_MASK            0x3
 #define ETH_TX_DATA_2ND_BD_DEST_PORT_MODE_SHIFT           6
+#define ETH_TX_DATA_2ND_BD_START_BD_MASK                  0x1
+#define ETH_TX_DATA_2ND_BD_START_BD_SHIFT                 8
 #define ETH_TX_DATA_2ND_BD_TUNN_TYPE_MASK                 0x3
-#define ETH_TX_DATA_2ND_BD_TUNN_TYPE_SHIFT                8
+#define ETH_TX_DATA_2ND_BD_TUNN_TYPE_SHIFT                9
 #define ETH_TX_DATA_2ND_BD_TUNN_INNER_IPV6_MASK           0x1
-#define ETH_TX_DATA_2ND_BD_TUNN_INNER_IPV6_SHIFT          10
+#define ETH_TX_DATA_2ND_BD_TUNN_INNER_IPV6_SHIFT          11
 #define ETH_TX_DATA_2ND_BD_IPV6_EXT_MASK                  0x1
-#define ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT                 11
+#define ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT                 12
 #define ETH_TX_DATA_2ND_BD_TUNN_IPV6_EXT_MASK             0x1
-#define ETH_TX_DATA_2ND_BD_TUNN_IPV6_EXT_SHIFT            12
+#define ETH_TX_DATA_2ND_BD_TUNN_IPV6_EXT_SHIFT            13
 #define ETH_TX_DATA_2ND_BD_L4_UDP_MASK                    0x1
-#define ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT                   13
+#define ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT                   14
 #define ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_MASK       0x1
-#define ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT      14
-#define ETH_TX_DATA_2ND_BD_RESERVED1_MASK                 0x1
-#define ETH_TX_DATA_2ND_BD_RESERVED1_SHIFT                15
+#define ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT      15
+	__le16 bitfields2;
+#define ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK     0x1FFF
+#define ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT    0
+#define ETH_TX_DATA_2ND_BD_RESERVED0_MASK                 0x7
+#define ETH_TX_DATA_2ND_BD_RESERVED0_SHIFT                13
 };
 
 /* Regular ETH Rx FP CQE. */
@@ -145,11 +150,68 @@ struct eth_fast_path_rx_reg_cqe {
 	struct parsing_and_err_flags	pars_flags;
 	__le16				vlan_tag;
 	__le32				rss_hash;
-	__le16				len_on_bd;
+	__le16				len_on_first_bd;
 	u8				placement_offset;
-	u8				reserved;
-	__le16				pbl[ETH_REG_CQE_PBL_SIZE];
-	u8				reserved1[10];
+	struct tunnel_parsing_flags	tunnel_pars_flags;
+	u8				bd_num;
+	u8				reserved[7];
+	u32				fw_debug;
+	u8				reserved1[3];
+	u8				flags;
+#define ETH_FAST_PATH_RX_REG_CQE_VALID_MASK          0x1
+#define ETH_FAST_PATH_RX_REG_CQE_VALID_SHIFT         0
+#define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_MASK   0x1
+#define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_SHIFT  1
+#define ETH_FAST_PATH_RX_REG_CQE_RESERVED2_MASK      0x3F
+#define ETH_FAST_PATH_RX_REG_CQE_RESERVED2_SHIFT     2
+};
+
+/* TPA-continue ETH Rx FP CQE. */
+struct eth_fast_path_rx_tpa_cont_cqe {
+	u8	type;
+	u8	tpa_agg_index;
+	__le16	len_list[ETH_TPA_CQE_CONT_LEN_LIST_SIZE];
+	u8	reserved[5];
+	u8	reserved1;
+	__le16	reserved2[ETH_TPA_CQE_CONT_LEN_LIST_SIZE];
+};
+
+/* TPA-end ETH Rx FP CQE. */
+struct eth_fast_path_rx_tpa_end_cqe {
+	u8	type;
+	u8	tpa_agg_index;
+	__le16	total_packet_len;
+	u8	num_of_bds;
+	u8	end_reason;
+	__le16	num_of_coalesced_segs;
+	__le32	ts_delta;
+	__le16	len_list[ETH_TPA_CQE_END_LEN_LIST_SIZE];
+	u8	reserved1[3];
+	u8	reserved2;
+	__le16	reserved3[ETH_TPA_CQE_END_LEN_LIST_SIZE];
+};
+
+/* TPA-start ETH Rx FP CQE. */
+struct eth_fast_path_rx_tpa_start_cqe {
+	u8	type;
+	u8	bitfields;
+#define ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE_MASK  0x7
+#define ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE_SHIFT 0
+#define ETH_FAST_PATH_RX_TPA_START_CQE_TC_MASK             0xF
+#define ETH_FAST_PATH_RX_TPA_START_CQE_TC_SHIFT            3
+#define ETH_FAST_PATH_RX_TPA_START_CQE_RESERVED0_MASK      0x1
+#define ETH_FAST_PATH_RX_TPA_START_CQE_RESERVED0_SHIFT     7
+	__le16	seg_len;
+	struct parsing_and_err_flags pars_flags;
+	__le16	vlan_tag;
+	__le32	rss_hash;
+	__le16	len_on_first_bd;
+	u8	placement_offset;
+	struct tunnel_parsing_flags tunnel_pars_flags;
+	u8	tpa_agg_index;
+	u8	header_len;
+	__le16	ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE];
+	u32	fw_debug;
 };
 
 /* The L4 pseudo checksum mode for Ethernet */
@@ -168,13 +230,26 @@ struct eth_slow_path_rx_cqe {
 	u8	type;
 	u8	ramrod_cmd_id;
 	u8	error_flag;
-	u8	reserved[27];
+	u8	reserved[25];
 	__le16	echo;
+	u8	reserved1;
+	u8	flags;
+/* for PMD mode - valid indication */
+#define ETH_SLOW_PATH_RX_CQE_VALID_MASK         0x1
+#define ETH_SLOW_PATH_RX_CQE_VALID_SHIFT        0
+/* for PMD mode - valid toggle indication */
+#define ETH_SLOW_PATH_RX_CQE_VALID_TOGGLE_MASK  0x1
+#define ETH_SLOW_PATH_RX_CQE_VALID_TOGGLE_SHIFT 1
+#define ETH_SLOW_PATH_RX_CQE_RESERVED2_MASK     0x3F
+#define ETH_SLOW_PATH_RX_CQE_RESERVED2_SHIFT    2
 };
 
 /* union for all ETH Rx CQE types */
 union eth_rx_cqe {
 	struct eth_fast_path_rx_reg_cqe		fast_path_regular;
+	struct eth_fast_path_rx_tpa_start_cqe	fast_path_tpa_start;
+	struct eth_fast_path_rx_tpa_cont_cqe	fast_path_tpa_cont;
+	struct eth_fast_path_rx_tpa_end_cqe	fast_path_tpa_end;
 	struct eth_slow_path_rx_cqe		slow_path;
 };
 
@@ -183,15 +258,18 @@ enum eth_rx_cqe_type {
 	ETH_RX_CQE_TYPE_UNUSED,
 	ETH_RX_CQE_TYPE_REGULAR,
 	ETH_RX_CQE_TYPE_SLOW_PATH,
+	ETH_RX_CQE_TYPE_TPA_START,
+	ETH_RX_CQE_TYPE_TPA_CONT,
+	ETH_RX_CQE_TYPE_TPA_END,
 	MAX_ETH_RX_CQE_TYPE
 };
 
 /* ETH Rx producers data */
 struct eth_rx_prod_data {
 	__le16	bd_prod;
-	__le16	sge_prod;
 	__le16	cqe_prod;
 	__le16	reserved;
+	__le16	reserved1;
 };
 
 /* The first tx bd of a given packet */
@@ -211,12 +289,17 @@ struct eth_tx_2nd_bd {
 /* The parsing information data for the third tx bd of a given packet. */
 struct eth_tx_data_3rd_bd {
 	__le16	lso_mss;
-	u8	bitfields;
+	__le16	bitfields;
 #define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_MASK  0xF
 #define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_SHIFT 0
 #define ETH_TX_DATA_3RD_BD_HDR_NBD_MASK         0xF
 #define ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT        4
-	u8	resereved0[3];
+#define ETH_TX_DATA_3RD_BD_START_BD_MASK        0x1
+#define ETH_TX_DATA_3RD_BD_START_BD_SHIFT       8
+#define ETH_TX_DATA_3RD_BD_RESERVED0_MASK       0x7F
+#define ETH_TX_DATA_3RD_BD_RESERVED0_SHIFT      9
+	u8	tunn_l4_hdr_start_offset_w;
+	u8	tunn_hdr_size_w;
 };
 
 /* The third tx bd of a given packet */
@@ -226,12 +309,24 @@ struct eth_tx_3rd_bd {
 	struct eth_tx_data_3rd_bd	data;
 };
 
+/* Complementary information for the regular tx bd of a given packet. */
+struct eth_tx_data_bd {
+	__le16	reserved0;
+	__le16	bitfields;
+#define ETH_TX_DATA_BD_RESERVED1_MASK  0xFF
+#define ETH_TX_DATA_BD_RESERVED1_SHIFT 0
+#define ETH_TX_DATA_BD_START_BD_MASK   0x1
+#define ETH_TX_DATA_BD_START_BD_SHIFT  8
+#define ETH_TX_DATA_BD_RESERVED2_MASK  0x7F
+#define ETH_TX_DATA_BD_RESERVED2_SHIFT 9
+	__le16 reserved3;
+};
+
 /* The common non-special TX BD ring element */
 struct eth_tx_bd {
 	struct regpair	addr;
 	__le16		nbytes;
-	__le16		reserved0;
-	__le32		reserved1;
+	struct eth_tx_data_bd	data;
 };
 
 union eth_tx_bd_types {
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index d4a32e878180..3d43c1d4ecef 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -80,7 +80,7 @@ struct qed_dev_info {
 	u8		num_hwfns;
 
 	u8		hw_mac[ETH_ALEN];
-	bool		is_mf;
+	bool		is_mf_default;
 
 	/* FW version */
 	u16		fw_major;
@@ -360,6 +360,12 @@ enum DP_MODULE {
 	/* to be added...up to 0x8000000 */
 };
 
+enum qed_mf_mode {
+	QED_MF_DEFAULT,
+	QED_MF_OVLAN,
+	QED_MF_NPAR,
+};
+
 struct qed_eth_stats {
 	u64	no_buff_discards;
 	u64	packet_too_big_discard;
-- 
cgit v1.2.3


From 3347a0656e7a83b959b1d0ad5396fb4f9c9b2a0e Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Mon, 15 Feb 2016 12:47:42 -0500
Subject: iio: Fix typos in the struct iio_event_spec documentation comments

This patch fixes a few minor typos in the documentation comments for the
scan_type member of the iio_event_spec structure. The sign member name
was improperly capitalized as "Sign" in the comments. The storagebits
member name was improperly listed as "storage_bits" in the comments. The
endianness member entry in the comments was moved after the repeat
member entry in order to maintain consistency with the actual struct
iio_event_spec layout.

Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/iio.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index ce9e9c1adf34..b2b16772c651 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -180,18 +180,18 @@ struct iio_event_spec {
  * @address:		Driver specific identifier.
  * @scan_index:		Monotonic index to give ordering in scans when read
  *			from a buffer.
- * @scan_type:		Sign:		's' or 'u' to specify signed or unsigned
+ * @scan_type:		sign:		's' or 'u' to specify signed or unsigned
  *			realbits:	Number of valid bits of data
- *			storage_bits:	Realbits + padding
+ *			storagebits:	Realbits + padding
  *			shift:		Shift right by this before masking out
  *					realbits.
- *			endianness:	little or big endian
  *			repeat:		Number of times real/storage bits
  *					repeats. When the repeat element is
  *					more than 1, then the type element in
  *					sysfs will show a repeat value.
  *					Otherwise, the number of repetitions is
  *					omitted.
+ *			endianness:	little or big endian
  * @info_mask_separate: What information is to be exported that is specific to
  *			this channel.
  * @info_mask_shared_by_type: What information is to be exported that is shared
-- 
cgit v1.2.3


From 7bbf3cae65b6e438bf52033b63fdce4a86e89e17 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 15 Feb 2016 21:25:57 +0000
Subject: ipv4: Remove inet_lro library

There are no longer any in-tree drivers that use it.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_lro.h | 142 ------------------
 net/ipv4/Kconfig         |   8 -
 net/ipv4/Makefile        |   1 -
 net/ipv4/inet_lro.c      | 374 -----------------------------------------------
 4 files changed, 525 deletions(-)
 delete mode 100644 include/linux/inet_lro.h
 delete mode 100644 net/ipv4/inet_lro.c

(limited to 'include/linux')

diff --git a/include/linux/inet_lro.h b/include/linux/inet_lro.h
deleted file mode 100644
index 9a715cfa1fe3..000000000000
--- a/include/linux/inet_lro.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- *  linux/include/linux/inet_lro.h
- *
- *  Large Receive Offload (ipv4 / tcp)
- *
- *  (C) Copyright IBM Corp. 2007
- *
- *  Authors:
- *       Jan-Bernd Themann <themann@de.ibm.com>
- *       Christoph Raisch <raisch@de.ibm.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __INET_LRO_H_
-#define __INET_LRO_H_
-
-#include <net/ip.h>
-#include <net/tcp.h>
-
-/*
- * LRO statistics
- */
-
-struct net_lro_stats {
-	unsigned long aggregated;
-	unsigned long flushed;
-	unsigned long no_desc;
-};
-
-/*
- * LRO descriptor for a tcp session
- */
-struct net_lro_desc {
-	struct sk_buff *parent;
-	struct sk_buff *last_skb;
-	struct skb_frag_struct *next_frag;
-	struct iphdr *iph;
-	struct tcphdr *tcph;
-	__wsum  data_csum;
-	__be32 tcp_rcv_tsecr;
-	__be32 tcp_rcv_tsval;
-	__be32 tcp_ack;
-	u32 tcp_next_seq;
-	u32 skb_tot_frags_len;
-	u16 ip_tot_len;
-	u16 tcp_saw_tstamp; 		/* timestamps enabled */
-	__be16 tcp_window;
-	int pkt_aggr_cnt;		/* counts aggregated packets */
-	int vlan_packet;
-	int mss;
-	int active;
-};
-
-/*
- * Large Receive Offload (LRO) Manager
- *
- * Fields must be set by driver
- */
-
-struct net_lro_mgr {
-	struct net_device *dev;
-	struct net_lro_stats stats;
-
-	/* LRO features */
-	unsigned long features;
-#define LRO_F_NAPI            1  /* Pass packets to stack via NAPI */
-#define LRO_F_EXTRACT_VLAN_ID 2  /* Set flag if VLAN IDs are extracted
-				    from received packets and eth protocol
-				    is still ETH_P_8021Q */
-
-	/*
-	 * Set for generated SKBs that are not added to
-	 * the frag list in fragmented mode
-	 */
-	u32 ip_summed;
-	u32 ip_summed_aggr; /* Set in aggregated SKBs: CHECKSUM_UNNECESSARY
-			     * or CHECKSUM_NONE */
-
-	int max_desc; /* Max number of LRO descriptors  */
-	int max_aggr; /* Max number of LRO packets to be aggregated */
-
-	int frag_align_pad; /* Padding required to properly align layer 3
-			     * headers in generated skb when using frags */
-
-	struct net_lro_desc *lro_arr; /* Array of LRO descriptors */
-
-	/*
-	 * Optimized driver functions
-	 *
-	 * get_skb_header: returns tcp and ip header for packet in SKB
-	 */
-	int (*get_skb_header)(struct sk_buff *skb, void **ip_hdr,
-			      void **tcpudp_hdr, u64 *hdr_flags, void *priv);
-
-	/* hdr_flags: */
-#define LRO_IPV4 1 /* ip_hdr is IPv4 header */
-#define LRO_TCP  2 /* tcpudp_hdr is TCP header */
-
-	/*
-	 * get_frag_header: returns mac, tcp and ip header for packet in SKB
-	 *
-	 * @hdr_flags: Indicate what kind of LRO has to be done
-	 *             (IPv4/IPv6/TCP/UDP)
-	 */
-	int (*get_frag_header)(struct skb_frag_struct *frag, void **mac_hdr,
-			       void **ip_hdr, void **tcpudp_hdr, u64 *hdr_flags,
-			       void *priv);
-};
-
-/*
- * Processes a SKB
- *
- * @lro_mgr: LRO manager to use
- * @skb: SKB to aggregate
- * @priv: Private data that may be used by driver functions
- *        (for example get_tcp_ip_hdr)
- */
-
-void lro_receive_skb(struct net_lro_mgr *lro_mgr,
-		     struct sk_buff *skb,
-		     void *priv);
-/*
- * Forward all aggregated SKBs held by lro_mgr to network stack
- */
-
-void lro_flush_all(struct net_lro_mgr *lro_mgr);
-
-#endif
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 395d82754626..238225b0c970 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -406,14 +406,6 @@ config INET_XFRM_MODE_BEET
 
 	  If unsure, say Y.
 
-config INET_LRO
-	tristate "Large Receive Offload (ipv4/tcp)"
-	default y
-	---help---
-	  Support for Large Receive Offload (ipv4/tcp).
-
-	  If unsure, say Y.
-
 config INET_DIAG
 	tristate "INET: socket monitoring interface"
 	default y
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 62c049b647e9..bfa133691cde 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -32,7 +32,6 @@ obj-$(CONFIG_INET_ESP) += esp4.o
 obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
 obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o
 obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o
-obj-$(CONFIG_INET_LRO) += inet_lro.o
 obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
 obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
 obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
deleted file mode 100644
index f17ea49b28fb..000000000000
--- a/net/ipv4/inet_lro.c
+++ /dev/null
@@ -1,374 +0,0 @@
-/*
- *  linux/net/ipv4/inet_lro.c
- *
- *  Large Receive Offload (ipv4 / tcp)
- *
- *  (C) Copyright IBM Corp. 2007
- *
- *  Authors:
- *       Jan-Bernd Themann <themann@de.ibm.com>
- *       Christoph Raisch <raisch@de.ibm.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-#include <linux/module.h>
-#include <linux/if_vlan.h>
-#include <linux/inet_lro.h>
-#include <net/checksum.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
-MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
-
-#define TCP_HDR_LEN(tcph) (tcph->doff << 2)
-#define IP_HDR_LEN(iph) (iph->ihl << 2)
-#define TCP_PAYLOAD_LENGTH(iph, tcph) \
-	(ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
-
-#define IPH_LEN_WO_OPTIONS 5
-#define TCPH_LEN_WO_OPTIONS 5
-#define TCPH_LEN_W_TIMESTAMP 8
-
-#define LRO_MAX_PG_HLEN 64
-
-#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
-
-/*
- * Basic tcp checks whether packet is suitable for LRO
- */
-
-static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
-			    int len, const struct net_lro_desc *lro_desc)
-{
-        /* check ip header: don't aggregate padded frames */
-	if (ntohs(iph->tot_len) != len)
-		return -1;
-
-	if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
-		return -1;
-
-	if (iph->ihl != IPH_LEN_WO_OPTIONS)
-		return -1;
-
-	if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
-	    tcph->rst || tcph->syn || tcph->fin)
-		return -1;
-
-	if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
-		return -1;
-
-	if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
-	    tcph->doff != TCPH_LEN_W_TIMESTAMP)
-		return -1;
-
-	/* check tcp options (only timestamp allowed) */
-	if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
-		__be32 *topt = (__be32 *)(tcph + 1);
-
-		if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
-				   | (TCPOPT_TIMESTAMP << 8)
-				   | TCPOLEN_TIMESTAMP))
-			return -1;
-
-		/* timestamp should be in right order */
-		topt++;
-		if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
-				      ntohl(*topt)))
-			return -1;
-
-		/* timestamp reply should not be zero */
-		topt++;
-		if (*topt == 0)
-			return -1;
-	}
-
-	return 0;
-}
-
-static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
-{
-	struct iphdr *iph = lro_desc->iph;
-	struct tcphdr *tcph = lro_desc->tcph;
-	__be32 *p;
-	__wsum tcp_hdr_csum;
-
-	tcph->ack_seq = lro_desc->tcp_ack;
-	tcph->window = lro_desc->tcp_window;
-
-	if (lro_desc->tcp_saw_tstamp) {
-		p = (__be32 *)(tcph + 1);
-		*(p+2) = lro_desc->tcp_rcv_tsecr;
-	}
-
-	csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
-	iph->tot_len = htons(lro_desc->ip_tot_len);
-
-	tcph->check = 0;
-	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
-	lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
-	tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
-					lro_desc->ip_tot_len -
-					IP_HDR_LEN(iph), IPPROTO_TCP,
-					lro_desc->data_csum);
-}
-
-static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
-{
-	__wsum tcp_csum;
-	__wsum tcp_hdr_csum;
-	__wsum tcp_ps_hdr_csum;
-
-	tcp_csum = ~csum_unfold(tcph->check);
-	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
-
-	tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
-					     len + TCP_HDR_LEN(tcph),
-					     IPPROTO_TCP, 0);
-
-	return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
-			tcp_ps_hdr_csum);
-}
-
-static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
-			  struct iphdr *iph, struct tcphdr *tcph)
-{
-	int nr_frags;
-	__be32 *ptr;
-	u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
-
-	nr_frags = skb_shinfo(skb)->nr_frags;
-	lro_desc->parent = skb;
-	lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
-	lro_desc->iph = iph;
-	lro_desc->tcph = tcph;
-	lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
-	lro_desc->tcp_ack = tcph->ack_seq;
-	lro_desc->tcp_window = tcph->window;
-
-	lro_desc->pkt_aggr_cnt = 1;
-	lro_desc->ip_tot_len = ntohs(iph->tot_len);
-
-	if (tcph->doff == 8) {
-		ptr = (__be32 *)(tcph+1);
-		lro_desc->tcp_saw_tstamp = 1;
-		lro_desc->tcp_rcv_tsval = *(ptr+1);
-		lro_desc->tcp_rcv_tsecr = *(ptr+2);
-	}
-
-	lro_desc->mss = tcp_data_len;
-	lro_desc->active = 1;
-
-	lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
-						tcp_data_len);
-}
-
-static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
-{
-	memset(lro_desc, 0, sizeof(struct net_lro_desc));
-}
-
-static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
-			   struct tcphdr *tcph, int tcp_data_len)
-{
-	struct sk_buff *parent = lro_desc->parent;
-	__be32 *topt;
-
-	lro_desc->pkt_aggr_cnt++;
-	lro_desc->ip_tot_len += tcp_data_len;
-	lro_desc->tcp_next_seq += tcp_data_len;
-	lro_desc->tcp_window = tcph->window;
-	lro_desc->tcp_ack = tcph->ack_seq;
-
-	/* don't update tcp_rcv_tsval, would not work with PAWS */
-	if (lro_desc->tcp_saw_tstamp) {
-		topt = (__be32 *) (tcph + 1);
-		lro_desc->tcp_rcv_tsecr = *(topt + 2);
-	}
-
-	lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
-					     lro_tcp_data_csum(iph, tcph,
-							       tcp_data_len),
-					     parent->len);
-
-	parent->len += tcp_data_len;
-	parent->data_len += tcp_data_len;
-	if (tcp_data_len > lro_desc->mss)
-		lro_desc->mss = tcp_data_len;
-}
-
-static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
-			   struct iphdr *iph, struct tcphdr *tcph)
-{
-	struct sk_buff *parent = lro_desc->parent;
-	int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
-
-	lro_add_common(lro_desc, iph, tcph, tcp_data_len);
-
-	skb_pull(skb, (skb->len - tcp_data_len));
-	parent->truesize += skb->truesize;
-
-	if (lro_desc->last_skb)
-		lro_desc->last_skb->next = skb;
-	else
-		skb_shinfo(parent)->frag_list = skb;
-
-	lro_desc->last_skb = skb;
-}
-
-
-static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
-			      struct iphdr *iph,
-			      struct tcphdr *tcph)
-{
-	if ((lro_desc->iph->saddr != iph->saddr) ||
-	    (lro_desc->iph->daddr != iph->daddr) ||
-	    (lro_desc->tcph->source != tcph->source) ||
-	    (lro_desc->tcph->dest != tcph->dest))
-		return -1;
-	return 0;
-}
-
-static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
-					 struct net_lro_desc *lro_arr,
-					 struct iphdr *iph,
-					 struct tcphdr *tcph)
-{
-	struct net_lro_desc *lro_desc = NULL;
-	struct net_lro_desc *tmp;
-	int max_desc = lro_mgr->max_desc;
-	int i;
-
-	for (i = 0; i < max_desc; i++) {
-		tmp = &lro_arr[i];
-		if (tmp->active)
-			if (!lro_check_tcp_conn(tmp, iph, tcph)) {
-				lro_desc = tmp;
-				goto out;
-			}
-	}
-
-	for (i = 0; i < max_desc; i++) {
-		if (!lro_arr[i].active) {
-			lro_desc = &lro_arr[i];
-			goto out;
-		}
-	}
-
-	LRO_INC_STATS(lro_mgr, no_desc);
-out:
-	return lro_desc;
-}
-
-static void lro_flush(struct net_lro_mgr *lro_mgr,
-		      struct net_lro_desc *lro_desc)
-{
-	if (lro_desc->pkt_aggr_cnt > 1)
-		lro_update_tcp_ip_header(lro_desc);
-
-	skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
-
-	if (lro_mgr->features & LRO_F_NAPI)
-		netif_receive_skb(lro_desc->parent);
-	else
-		netif_rx(lro_desc->parent);
-
-	LRO_INC_STATS(lro_mgr, flushed);
-	lro_clear_desc(lro_desc);
-}
-
-static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
-			  void *priv)
-{
-	struct net_lro_desc *lro_desc;
-	struct iphdr *iph;
-	struct tcphdr *tcph;
-	u64 flags;
-	int vlan_hdr_len = 0;
-
-	if (!lro_mgr->get_skb_header ||
-	    lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
-				    &flags, priv))
-		goto out;
-
-	if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
-		goto out;
-
-	lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
-	if (!lro_desc)
-		goto out;
-
-	if ((skb->protocol == htons(ETH_P_8021Q)) &&
-	    !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
-		vlan_hdr_len = VLAN_HLEN;
-
-	if (!lro_desc->active) { /* start new lro session */
-		if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
-			goto out;
-
-		skb->ip_summed = lro_mgr->ip_summed_aggr;
-		lro_init_desc(lro_desc, skb, iph, tcph);
-		LRO_INC_STATS(lro_mgr, aggregated);
-		return 0;
-	}
-
-	if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
-		goto out2;
-
-	if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
-		goto out2;
-
-	lro_add_packet(lro_desc, skb, iph, tcph);
-	LRO_INC_STATS(lro_mgr, aggregated);
-
-	if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
-	    lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
-		lro_flush(lro_mgr, lro_desc);
-
-	return 0;
-
-out2: /* send aggregated SKBs to stack */
-	lro_flush(lro_mgr, lro_desc);
-
-out:
-	return 1;
-}
-
-void lro_receive_skb(struct net_lro_mgr *lro_mgr,
-		     struct sk_buff *skb,
-		     void *priv)
-{
-	if (__lro_proc_skb(lro_mgr, skb, priv)) {
-		if (lro_mgr->features & LRO_F_NAPI)
-			netif_receive_skb(skb);
-		else
-			netif_rx(skb);
-	}
-}
-EXPORT_SYMBOL(lro_receive_skb);
-
-void lro_flush_all(struct net_lro_mgr *lro_mgr)
-{
-	int i;
-	struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
-
-	for (i = 0; i < lro_mgr->max_desc; i++) {
-		if (lro_desc[i].active)
-			lro_flush(lro_mgr, &lro_desc[i]);
-	}
-}
-EXPORT_SYMBOL(lro_flush_all);
-- 
cgit v1.2.3


From 63c17fb8e5a46a16e10e82005748837fd11a2024 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 12 Feb 2016 13:02:08 -0800
Subject: mm/core, x86/mm/pkeys: Store protection bits in high VMA flags

vma->vm_flags is an 'unsigned long', so has space for 32 flags
on 32-bit architectures.  The high 32 bits are unused on 64-bit
platforms.  We've steered away from using the unused high VMA
bits for things because we would have difficulty supporting it
on 32-bit.

Protection Keys are not available in 32-bit mode, so there is
no concern about supporting this feature in 32-bit mode or on
32-bit CPUs.

This patch carves out 4 bits from the high half of
vma->vm_flags and allows architectures to set config option
to make them available.

Sparse complains about these constants unless we explicitly
call them "UL".

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Valentin Rothberg <valentinrothberg@gmail.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xie XiuQi <xiexiuqi@huawei.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160212210208.81AF00D5@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig   |  1 +
 include/linux/mm.h | 11 +++++++++++
 mm/Kconfig         |  3 +++
 3 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3632cdd03201..fb2ebeb9a692 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -155,6 +155,7 @@ config X86
 	select VIRT_TO_BUS
 	select X86_DEV_DMA_OPS			if X86_64
 	select X86_FEATURE_NAMES		if PROC_FS
+	select ARCH_USES_HIGH_VMA_FLAGS		if X86_INTEL_MEMORY_PROTECTION_KEYS
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4c7317828fda..54d173bcc327 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -170,6 +170,17 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_NOHUGEPAGE	0x40000000	/* MADV_NOHUGEPAGE marked this vma */
 #define VM_MERGEABLE	0x80000000	/* KSM may merge identical pages */
 
+#ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS
+#define VM_HIGH_ARCH_BIT_0	32	/* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_1	33	/* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_2	34	/* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_3	35	/* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_0	BIT(VM_HIGH_ARCH_BIT_0)
+#define VM_HIGH_ARCH_1	BIT(VM_HIGH_ARCH_BIT_1)
+#define VM_HIGH_ARCH_2	BIT(VM_HIGH_ARCH_BIT_2)
+#define VM_HIGH_ARCH_3	BIT(VM_HIGH_ARCH_BIT_3)
+#endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
+
 #if defined(CONFIG_X86)
 # define VM_PAT		VM_ARCH_1	/* PAT reserves whole VMA at once (x86) */
 #elif defined(CONFIG_PPC)
diff --git a/mm/Kconfig b/mm/Kconfig
index 03cbfa072f42..6cf439948297 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -669,3 +669,6 @@ config ZONE_DEVICE
 
 config FRAME_VECTOR
 	bool
+
+config ARCH_USES_HIGH_VMA_FLAGS
+	bool
-- 
cgit v1.2.3


From 8f62c883222c9e3c06d60b5e55e307a3d1f18257 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 12 Feb 2016 13:02:10 -0800
Subject: x86/mm/pkeys: Add arch-specific VMA protection bits

Lots of things seem to do:

        vma->vm_page_prot = vm_get_page_prot(flags);

and the ptes get created right from things we pull out
of ->vm_page_prot.  So it is very convenient if we can
store the protection key in flags and vm_page_prot, just
like the existing permission bits (_PAGE_RW/PRESENT).  It
greatly reduces the amount of plumbing and arch-specific
hacking we have to do in generic code.

This also takes the new PROT_PKEY{0,1,2,3} flags and
turns *those* in to VM_ flags for vma->vm_flags.

The protection key values are stored in 4 places:
	1. "prot" argument to system calls
	2. vma->vm_flags, filled from the mmap "prot"
	3. vma->vm_page prot, filled from vma->vm_flags
	4. the PTE itself.

The pseudocode for these for steps are as follows:

	mmap(PROT_PKEY*)
	vma->vm_flags 	  = ... | arch_calc_vm_prot_bits(mmap_prot);
	vma->vm_page_prot = ... | arch_vm_get_page_prot(vma->vm_flags);
	pte = pfn | vma->vm_page_prot

Note that this provides a new definitions for x86:

	arch_vm_get_page_prot()

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160212210210.FE483A42@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mmu_context.h   | 11 +++++++++++
 arch/x86/include/asm/pgtable_types.h | 12 ++++++++++--
 arch/x86/include/uapi/asm/mman.h     | 16 ++++++++++++++++
 include/linux/mm.h                   |  7 +++++++
 4 files changed, 44 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index bfd9b2a35a0b..94c4c8b5cb8f 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -275,4 +275,15 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
 		mpx_notify_unmap(mm, vma, start, end);
 }
 
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+	u16 pkey = 0;
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+	unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
+				      VM_PKEY_BIT2 | VM_PKEY_BIT3;
+	pkey = (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
+#endif
+	return pkey;
+}
+
 #endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index cae10ba3c975..8c35cf0cc2ef 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -115,7 +115,12 @@
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |	\
 			 _PAGE_DIRTY)
 
-/* Set of bits not changed in pte_modify */
+/*
+ * Set of bits not changed in pte_modify.  The pte's
+ * protection key is treated like _PAGE_RW, for
+ * instance, and is *not* included in this mask since
+ * pte_modify() does modify it.
+ */
 #define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |		\
 			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |	\
 			 _PAGE_SOFT_DIRTY)
@@ -231,7 +236,10 @@ enum page_cache_mode {
 /* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */
 #define PTE_PFN_MASK		((pteval_t)PHYSICAL_PAGE_MASK)
 
-/* Extracts the flags from a (pte|pmd|pud|pgd)val_t of a 4KB page */
+/*
+ *  Extracts the flags from a (pte|pmd|pud|pgd)val_t
+ *  This includes the protection key value.
+ */
 #define PTE_FLAGS_MASK		(~PTE_PFN_MASK)
 
 typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index 513b05f15bb4..e8562e0a2993 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -6,6 +6,22 @@
 #define MAP_HUGE_2MB    (21 << MAP_HUGE_SHIFT)
 #define MAP_HUGE_1GB    (30 << MAP_HUGE_SHIFT)
 
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+/*
+ * Take the 4 protection key bits out of the vma->vm_flags
+ * value and turn them in to the bits that we can put in
+ * to a pte.
+ *
+ * Only override these if Protection Keys are available
+ * (which is only on 64-bit).
+ */
+#define arch_vm_get_page_prot(vm_flags)	__pgprot(	\
+		((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) |	\
+		((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) |	\
+		((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) |	\
+		((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0))
+#endif
+
 #include <asm-generic/mman.h>
 
 #endif /* _ASM_X86_MMAN_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 54d173bcc327..3056369bab1d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -183,6 +183,13 @@ extern unsigned int kobjsize(const void *objp);
 
 #if defined(CONFIG_X86)
 # define VM_PAT		VM_ARCH_1	/* PAT reserves whole VMA at once (x86) */
+#if defined (CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS)
+# define VM_PKEY_SHIFT	VM_HIGH_ARCH_BIT_0
+# define VM_PKEY_BIT0	VM_HIGH_ARCH_0	/* A protection key is a 4-bit value */
+# define VM_PKEY_BIT1	VM_HIGH_ARCH_1
+# define VM_PKEY_BIT2	VM_HIGH_ARCH_2
+# define VM_PKEY_BIT3	VM_HIGH_ARCH_3
+#endif
 #elif defined(CONFIG_PPC)
 # define VM_SAO		VM_ARCH_1	/* Strong Access Ordering (powerpc) */
 #elif defined(CONFIG_PARISC)
-- 
cgit v1.2.3


From 905f0a739ad82c6371fb0cb0e71db14a750702ad Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 18 Feb 2016 15:03:27 +0100
Subject: nfnetlink: remove nfnetlink_alloc_skb

Following mmapped netlink removal this code can be simplified by
removing the alloc wrapper.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink.h | 2 --
 net/netfilter/nfnetlink.c           | 7 -------
 net/netfilter/nfnetlink_log.c       | 5 ++---
 3 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index ba0d9789eb6e..1d82dd5e9a08 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -34,8 +34,6 @@ int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
 int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
 
 int nfnetlink_has_listeners(struct net *net, unsigned int group);
-struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
-				    u32 dst_portid, gfp_t gfp_mask);
 int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
 		   unsigned int group, int echo, gfp_t flags);
 int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index a7ba23353dab..9a99f686d06f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -127,13 +127,6 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group)
 }
 EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
 
-struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
-				    u32 dst_portid, gfp_t gfp_mask)
-{
-	return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask);
-}
-EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb);
-
 int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
 		   unsigned int group, int echo, gfp_t flags)
 {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 8ca932057c13..11f81c8385fc 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -330,14 +330,13 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
 	 * message.  WARNING: has to be <= 128k due to slab restrictions */
 
 	n = max(inst_size, pkt_size);
-	skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC);
+	skb = alloc_skb(n, GFP_ATOMIC);
 	if (!skb) {
 		if (n > pkt_size) {
 			/* try to allocate only as much as we need for current
 			 * packet */
 
-			skb = nfnetlink_alloc_skb(net, pkt_size,
-						  peer_portid, GFP_ATOMIC);
+			skb = alloc_skb(pkt_size, GFP_ATOMIC);
 		}
 	}
 
-- 
cgit v1.2.3


From c5b0db3263b92526bc0c1b6380c0c99f91f069fc Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 18 Feb 2016 15:03:28 +0100
Subject: nfnetlink: Revert "nfnetlink: add support for memory mapped netlink"

reverts commit 3ab1f683bf8b ("nfnetlink: add support for memory mapped
netlink")'

Like previous commits in the series, remove wrappers that are not needed
after mmapped netlink removal.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h         | 10 ----------
 net/netfilter/nfnetlink_queue.c |  6 ++----
 net/netlink/af_netlink.c        | 20 ++++----------------
 3 files changed, 6 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 0b41959aab9f..da14ab61f363 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -69,16 +69,6 @@ extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group)
 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
 extern int netlink_has_listeners(struct sock *sk, unsigned int group);
 
-extern struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
-					   unsigned int ldiff, u32 dst_portid,
-					   gfp_t gfp_mask);
-static inline struct sk_buff *
-netlink_alloc_skb(struct sock *ssk, unsigned int size, u32 dst_portid,
-		  gfp_t gfp_mask)
-{
-	return __netlink_alloc_skb(ssk, size, 0, dst_portid, gfp_mask);
-}
-
 extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
 extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
 			     __u32 group, gfp_t allocation);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 1d3936587ace..75429997ed41 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -301,7 +301,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 			   __be32 **packet_id_ptr)
 {
 	size_t size;
-	size_t data_len = 0, cap_len = 0, rem_len = 0;
+	size_t data_len = 0, cap_len = 0;
 	unsigned int hlen = 0;
 	struct sk_buff *skb;
 	struct nlattr *nla;
@@ -361,7 +361,6 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 		hlen = min_t(unsigned int, hlen, data_len);
 		size += sizeof(struct nlattr) + hlen;
 		cap_len = entskb->len;
-		rem_len = data_len - hlen;
 		break;
 	}
 
@@ -386,8 +385,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 			size += nla_total_size(seclen);
 	}
 
-	skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid,
-				  GFP_ATOMIC);
+	skb = alloc_skb(size, GFP_ATOMIC);
 	if (!skb) {
 		skb_tx_error(entskb);
 		return NULL;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 85aa6ef86dfd..c8416792cce0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1248,14 +1248,6 @@ retry:
 }
 EXPORT_SYMBOL(netlink_unicast);
 
-struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
-				    unsigned int ldiff, u32 dst_portid,
-				    gfp_t gfp_mask)
-{
-	return alloc_skb(size, gfp_mask);
-}
-EXPORT_SYMBOL_GPL(__netlink_alloc_skb);
-
 int netlink_has_listeners(struct sock *sk, unsigned int group)
 {
 	int res = 0;
@@ -2082,15 +2074,12 @@ static int netlink_dump(struct sock *sk)
 
 	if (alloc_min_size < nlk->max_recvmsg_len) {
 		alloc_size = nlk->max_recvmsg_len;
-		skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,
-					GFP_KERNEL |
-					__GFP_NOWARN |
-					__GFP_NORETRY);
+		skb = alloc_skb(alloc_size, GFP_KERNEL |
+					    __GFP_NOWARN | __GFP_NORETRY);
 	}
 	if (!skb) {
 		alloc_size = alloc_min_size;
-		skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,
-					GFP_KERNEL);
+		skb = alloc_skb(alloc_size, GFP_KERNEL);
 	}
 	if (!skb)
 		goto errout_skb;
@@ -2230,8 +2219,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
 	if (!(nlk->flags & NETLINK_F_CAP_ACK) && err)
 		payload += nlmsg_len(nlh);
 
-	skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload),
-				NETLINK_CB(in_skb).portid, GFP_KERNEL);
+	skb = nlmsg_new(payload, GFP_KERNEL);
 	if (!skb) {
 		struct sock *sk;
 
-- 
cgit v1.2.3


From d6497816836da321a46c0e8575c4fa3d0c672bda Mon Sep 17 00:00:00 2001
From: Martin Sperl <kernel@martin.sperl.org>
Date: Thu, 18 Feb 2016 15:53:10 +0000
Subject: spi: docbook: fix parsing error

Fixes docbook parsing error because documentation
is not directly followed by the structure, but typedef
used in structure.

Reordering should solve that issue.

Signed-off-by: Martin Sperl <kernel@martin.sperl.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 38204b584dc5..5396ee5fc51f 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -586,6 +586,10 @@ extern struct spi_master *spi_busnum_to_master(u16 busnum);
  * SPI resource management while processing a SPI message
  */
 
+typedef void (*spi_res_release_t)(struct spi_master *master,
+				  struct spi_message *msg,
+				  void *res);
+
 /**
  * struct spi_res - spi resource management structure
  * @entry:   list entry
@@ -595,9 +599,6 @@ extern struct spi_master *spi_busnum_to_master(u16 busnum);
  * this is based on ideas from devres, but focused on life-cycle
  * management during spi_message processing
  */
-typedef void (*spi_res_release_t)(struct spi_master *master,
-				  struct spi_message *msg,
-				  void *res);
 struct spi_res {
 	struct list_head        entry;
 	spi_res_release_t       release;
-- 
cgit v1.2.3


From 1b2ee1266ea647713dbaf44825967c180dfc8d76 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 12 Feb 2016 13:02:21 -0800
Subject: mm/core: Do not enforce PKEY permissions on remote mm access

We try to enforce protection keys in software the same way that we
do in hardware.  (See long example below).

But, we only want to do this when accessing our *own* process's
memory.  If GDB set PKRU[6].AD=1 (disable access to PKEY 6), then
tried to PTRACE_POKE a target process which just happened to have
some mprotect_pkey(pkey=6) memory, we do *not* want to deny the
debugger access to that memory.  PKRU is fundamentally a
thread-local structure and we do not want to enforce it on access
to _another_ thread's data.

This gets especially tricky when we have workqueues or other
delayed-work mechanisms that might run in a random process's context.
We can check that we only enforce pkeys when operating on our *own* mm,
but delayed work gets performed when a random user context is active.
We might end up with a situation where a delayed-work gup fails when
running randomly under its "own" task but succeeds when running under
another process.  We want to avoid that.

To avoid that, we use the new GUP flag: FOLL_REMOTE and add a
fault flag: FAULT_FLAG_REMOTE.  They indicate that we are
walking an mm which is not guranteed to be the same as
current->mm and should not be subject to protection key
enforcement.

Thanks to Jerome Glisse for pointing out this scenario.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boaz Harrosh <boaz@plexistor.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dominik Dingel <dingel@linux.vnet.ibm.com>
Cc: Dominik Vogt <vogt@linux.vnet.ibm.com>
Cc: Eric B Munson <emunson@akamai.com>
Cc: Geliang Tang <geliangtang@163.com>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jason Low <jason.low2@hp.com>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Shachar Raindel <raindel@mellanox.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xie XiuQi <xiexiuqi@huawei.com>
Cc: iommu@lists.linux-foundation.org
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: linux-s390@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/include/asm/mmu_context.h   |  3 ++-
 arch/s390/include/asm/mmu_context.h      |  3 ++-
 arch/unicore32/include/asm/mmu_context.h |  3 ++-
 arch/x86/include/asm/mmu_context.h       |  5 +++--
 drivers/iommu/amd_iommu_v2.c             |  1 +
 include/asm-generic/mm_hooks.h           |  3 ++-
 include/linux/mm.h                       |  1 +
 mm/gup.c                                 | 15 ++++++++++-----
 mm/ksm.c                                 | 10 ++++++++--
 mm/memory.c                              |  3 ++-
 10 files changed, 33 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index a0f1838c8e78..df9bf3ed025b 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -148,7 +148,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
 {
 }
 
-static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+		bool write, bool foreign)
 {
 	/* by default, allow everything */
 	return true;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 2627b338382c..8906600922ce 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -130,7 +130,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
 {
 }
 
-static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+		bool write, bool foreign)
 {
 	/* by default, allow everything */
 	return true;
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index 3133f947ade2..e35632ef23c7 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -97,7 +97,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
 {
 }
 
-static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+		bool write, bool foreign)
 {
 	/* by default, allow everything */
 	return true;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 19036cdbed8f..b4d939a17e60 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -322,10 +322,11 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma)
 	return false;
 }
 
-static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+		bool write, bool foreign)
 {
 	/* allow access if the VMA is not one from this process */
-	if (vma_is_foreign(vma))
+	if (foreign || vma_is_foreign(vma))
 		return true;
 	return __pkru_allows_pkey(vma_pkey(vma), write);
 }
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index c865737326e1..56999d2fac07 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -526,6 +526,7 @@ static void do_fault(struct work_struct *work)
 		flags |= FAULT_FLAG_USER;
 	if (fault->flags & PPR_FAULT_WRITE)
 		flags |= FAULT_FLAG_WRITE;
+	flags |= FAULT_FLAG_REMOTE;
 
 	down_read(&mm->mmap_sem);
 	vma = find_extend_vma(mm, address);
diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h
index c1fc5af3c384..d5c9633bd955 100644
--- a/include/asm-generic/mm_hooks.h
+++ b/include/asm-generic/mm_hooks.h
@@ -26,7 +26,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
 {
 }
 
-static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+		bool write, bool foreign)
 {
 	/* by default, allow everything */
 	return true;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3056369bab1d..2aaa0f0d67ea 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -251,6 +251,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_KILLABLE	0x10	/* The fault task is in SIGKILL killable region */
 #define FAULT_FLAG_TRIED	0x20	/* Second try */
 #define FAULT_FLAG_USER		0x40	/* The fault originated in userspace */
+#define FAULT_FLAG_REMOTE	0x80	/* faulting for non current tsk/mm */
 
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
diff --git a/mm/gup.c b/mm/gup.c
index e0f5f3574d16..d276760163b3 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -365,6 +365,8 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 		return -ENOENT;
 	if (*flags & FOLL_WRITE)
 		fault_flags |= FAULT_FLAG_WRITE;
+	if (*flags & FOLL_REMOTE)
+		fault_flags |= FAULT_FLAG_REMOTE;
 	if (nonblocking)
 		fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 	if (*flags & FOLL_NOWAIT)
@@ -415,11 +417,13 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
 {
 	vm_flags_t vm_flags = vma->vm_flags;
+	int write = (gup_flags & FOLL_WRITE);
+	int foreign = (gup_flags & FOLL_REMOTE);
 
 	if (vm_flags & (VM_IO | VM_PFNMAP))
 		return -EFAULT;
 
-	if (gup_flags & FOLL_WRITE) {
+	if (write) {
 		if (!(vm_flags & VM_WRITE)) {
 			if (!(gup_flags & FOLL_FORCE))
 				return -EFAULT;
@@ -445,7 +449,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
 		if (!(vm_flags & VM_MAYREAD))
 			return -EFAULT;
 	}
-	if (!arch_vma_access_permitted(vma, (gup_flags & FOLL_WRITE)))
+	if (!arch_vma_access_permitted(vma, write, foreign))
 		return -EFAULT;
 	return 0;
 }
@@ -615,7 +619,8 @@ EXPORT_SYMBOL(__get_user_pages);
 
 bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
 {
-	bool write = !!(fault_flags & FAULT_FLAG_WRITE);
+	bool write   = !!(fault_flags & FAULT_FLAG_WRITE);
+	bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
 	vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
 
 	if (!(vm_flags & vma->vm_flags))
@@ -623,9 +628,9 @@ bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
 
 	/*
 	 * The architecture might have a hardware protection
-	 * mechanism other than read/write that can deny access
+	 * mechanism other than read/write that can deny access.
 	 */
-	if (!arch_vma_access_permitted(vma, write))
+	if (!arch_vma_access_permitted(vma, write, foreign))
 		return false;
 
 	return true;
diff --git a/mm/ksm.c b/mm/ksm.c
index c2013f638d11..b99e828172f6 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -359,6 +359,10 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
  * in case the application has unmapped and remapped mm,addr meanwhile.
  * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
  * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
+ *
+ * FAULT_FLAG/FOLL_REMOTE are because we do this outside the context
+ * of the process that owns 'vma'.  We also do not want to enforce
+ * protection keys here anyway.
  */
 static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
 {
@@ -367,12 +371,14 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
 
 	do {
 		cond_resched();
-		page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION);
+		page = follow_page(vma, addr,
+				FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
 		if (IS_ERR_OR_NULL(page))
 			break;
 		if (PageKsm(page))
 			ret = handle_mm_fault(vma->vm_mm, vma, addr,
-							FAULT_FLAG_WRITE);
+							FAULT_FLAG_WRITE |
+							FAULT_FLAG_REMOTE);
 		else
 			ret = VM_FAULT_WRITE;
 		put_page(page);
diff --git a/mm/memory.c b/mm/memory.c
index d7e84fe6504d..76c44e5dffa2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3379,7 +3379,8 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	pmd_t *pmd;
 	pte_t *pte;
 
-	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE))
+	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
+					    flags & FAULT_FLAG_REMOTE))
 		return VM_FAULT_SIGSEGV;
 
 	if (unlikely(is_vm_hugetlb_page(vma)))
-- 
cgit v1.2.3


From d61172b4b695b821388cdb6088a41d431bcbb93b Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 12 Feb 2016 13:02:24 -0800
Subject: mm/core, x86/mm/pkeys: Differentiate instruction fetches

As discussed earlier, we attempt to enforce protection keys in
software.

However, the code checks all faults to ensure that they are not
violating protection key permissions.  It was assumed that all
faults are either write faults where we check PKRU[key].WD (write
disable) or read faults where we check the AD (access disable)
bit.

But, there is a third category of faults for protection keys:
instruction faults.  Instruction faults never run afoul of
protection keys because they do not affect instruction fetches.

So, plumb the PF_INSTR bit down in to the
arch_vma_access_permitted() function where we do the protection
key checks.

We also add a new FAULT_FLAG_INSTRUCTION.  This is because
handle_mm_fault() is not passed the architecture-specific
error_code where we keep PF_INSTR, so we need to encode the
instruction fetch information in to the arch-generic fault
flags.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160212210224.96928009@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/include/asm/mmu_context.h |  2 +-
 arch/s390/include/asm/mmu_context.h    |  2 +-
 arch/x86/include/asm/mmu_context.h     |  5 ++++-
 arch/x86/mm/fault.c                    |  8 ++++++--
 include/asm-generic/mm_hooks.h         |  2 +-
 include/linux/mm.h                     |  1 +
 mm/gup.c                               | 11 +++++++++--
 mm/memory.c                            |  1 +
 8 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index df9bf3ed025b..4eaab40e3ade 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -149,7 +149,7 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
 }
 
 static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
-		bool write, bool foreign)
+		bool write, bool execute, bool foreign)
 {
 	/* by default, allow everything */
 	return true;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 8906600922ce..fa66b6dfa97a 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -131,7 +131,7 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
 }
 
 static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
-		bool write, bool foreign)
+		bool write, bool execute, bool foreign)
 {
 	/* by default, allow everything */
 	return true;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index b4d939a17e60..6572b949cbca 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -323,8 +323,11 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma)
 }
 
 static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
-		bool write, bool foreign)
+		bool write, bool execute, bool foreign)
 {
+	/* pkeys never affect instruction fetches */
+	if (execute)
+		return true;
 	/* allow access if the VMA is not one from this process */
 	if (foreign || vma_is_foreign(vma))
 		return true;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 68ecdffe284e..d81744e6f39f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -908,7 +908,8 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
 	if (error_code & PF_PK)
 		return true;
 	/* this checks permission keys on the VMA: */
-	if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), foreign))
+	if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
+				(error_code & PF_INSTR), foreign))
 		return true;
 	return false;
 }
@@ -1112,7 +1113,8 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
 	 * faults just to hit a PF_PK as soon as we fill in a
 	 * page.
 	 */
-	if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), foreign))
+	if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
+				(error_code & PF_INSTR), foreign))
 		return 1;
 
 	if (error_code & PF_WRITE) {
@@ -1267,6 +1269,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 
 	if (error_code & PF_WRITE)
 		flags |= FAULT_FLAG_WRITE;
+	if (error_code & PF_INSTR)
+		flags |= FAULT_FLAG_INSTRUCTION;
 
 	/*
 	 * When running in the kernel we expect faults to occur only to
diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h
index d5c9633bd955..cc5d9a1405df 100644
--- a/include/asm-generic/mm_hooks.h
+++ b/include/asm-generic/mm_hooks.h
@@ -27,7 +27,7 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
 }
 
 static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
-		bool write, bool foreign)
+		bool write, bool execute, bool foreign)
 {
 	/* by default, allow everything */
 	return true;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2aaa0f0d67ea..7955c3eb83db 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -252,6 +252,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_TRIED	0x20	/* Second try */
 #define FAULT_FLAG_USER		0x40	/* The fault originated in userspace */
 #define FAULT_FLAG_REMOTE	0x80	/* faulting for non current tsk/mm */
+#define FAULT_FLAG_INSTRUCTION  0x100	/* The fault was during an instruction fetch */
 
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
diff --git a/mm/gup.c b/mm/gup.c
index d276760163b3..7f1c4fb77cfa 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -449,7 +449,11 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
 		if (!(vm_flags & VM_MAYREAD))
 			return -EFAULT;
 	}
-	if (!arch_vma_access_permitted(vma, write, foreign))
+	/*
+	 * gups are always data accesses, not instruction
+	 * fetches, so execute=false here
+	 */
+	if (!arch_vma_access_permitted(vma, write, false, foreign))
 		return -EFAULT;
 	return 0;
 }
@@ -629,8 +633,11 @@ bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
 	/*
 	 * The architecture might have a hardware protection
 	 * mechanism other than read/write that can deny access.
+	 *
+	 * gup always represents data access, not instruction
+	 * fetches, so execute=false here:
 	 */
-	if (!arch_vma_access_permitted(vma, write, foreign))
+	if (!arch_vma_access_permitted(vma, write, false, foreign))
 		return false;
 
 	return true;
diff --git a/mm/memory.c b/mm/memory.c
index 76c44e5dffa2..99e9f928264a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3380,6 +3380,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	pte_t *pte;
 
 	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
+					    flags & FAULT_FLAG_INSTRUCTION,
 					    flags & FAULT_FLAG_REMOTE))
 		return VM_FAULT_SIGSEGV;
 
-- 
cgit v1.2.3


From e6bfb70959a0ca6ddedb29e779a293c6f71ed0e7 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 12 Feb 2016 13:02:31 -0800
Subject: mm/core, arch, powerpc: Pass a protection key in to
 calc_vm_flag_bits()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This plumbs a protection key through calc_vm_flag_bits().  We
could have done this in calc_vm_prot_bits(), but I did not feel
super strongly which way to go.  It was pretty arbitrary which
one to use.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arve Hjønnevåg <arve@android.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Chen Gang <gang.chen.5i5j@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: David Airlie <airlied@linux.ie>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Geliang Tang <geliangtang@163.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Leon Romanovsky <leon@leon.nu>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Riley Andrews <riandrews@android.com>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: devel@driverdev.osuosl.org
Cc: linux-api@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/20160212210231.E6F1F0D6@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/include/asm/mman.h  | 5 +++--
 drivers/char/agp/frontend.c      | 2 +-
 drivers/staging/android/ashmem.c | 4 ++--
 include/linux/mman.h             | 6 +++---
 mm/mmap.c                        | 2 +-
 mm/mprotect.c                    | 2 +-
 mm/nommu.c                       | 2 +-
 7 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 8565c254151a..2563c435a4b1 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -18,11 +18,12 @@
  * This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits()
  * here.  How important is the optimization?
  */
-static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot)
+static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
+		unsigned long pkey)
 {
 	return (prot & PROT_SAO) ? VM_SAO : 0;
 }
-#define arch_calc_vm_prot_bits(prot) arch_calc_vm_prot_bits(prot)
+#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
 
 static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
 {
diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
index 09f17eb73486..0f64d149c98d 100644
--- a/drivers/char/agp/frontend.c
+++ b/drivers/char/agp/frontend.c
@@ -156,7 +156,7 @@ static pgprot_t agp_convert_mmap_flags(int prot)
 {
 	unsigned long prot_bits;
 
-	prot_bits = calc_vm_prot_bits(prot) | VM_SHARED;
+	prot_bits = calc_vm_prot_bits(prot, 0) | VM_SHARED;
 	return vm_get_page_prot(prot_bits);
 }
 
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 5bb1283d19cd..2695ff121b04 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -372,8 +372,8 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
 	}
 
 	/* requested protection bits must match our allowed protection mask */
-	if (unlikely((vma->vm_flags & ~calc_vm_prot_bits(asma->prot_mask)) &
-		     calc_vm_prot_bits(PROT_MASK))) {
+	if (unlikely((vma->vm_flags & ~calc_vm_prot_bits(asma->prot_mask, 0)) &
+		     calc_vm_prot_bits(PROT_MASK, 0))) {
 		ret = -EPERM;
 		goto out;
 	}
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 16373c8f5f57..33e17f6a327a 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -35,7 +35,7 @@ static inline void vm_unacct_memory(long pages)
  */
 
 #ifndef arch_calc_vm_prot_bits
-#define arch_calc_vm_prot_bits(prot) 0
+#define arch_calc_vm_prot_bits(prot, pkey) 0
 #endif
 
 #ifndef arch_vm_get_page_prot
@@ -70,12 +70,12 @@ static inline int arch_validate_prot(unsigned long prot)
  * Combine the mmap "prot" argument into "vm_flags" used internally.
  */
 static inline unsigned long
-calc_vm_prot_bits(unsigned long prot)
+calc_vm_prot_bits(unsigned long prot, unsigned long pkey)
 {
 	return _calc_vm_trans(prot, PROT_READ,  VM_READ ) |
 	       _calc_vm_trans(prot, PROT_WRITE, VM_WRITE) |
 	       _calc_vm_trans(prot, PROT_EXEC,  VM_EXEC) |
-	       arch_calc_vm_prot_bits(prot);
+	       arch_calc_vm_prot_bits(prot, pkey);
 }
 
 /*
diff --git a/mm/mmap.c b/mm/mmap.c
index e2e9f48b06c2..784d2d6142a2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1313,7 +1313,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 	 * to. we assume access permissions have been handled by the open
 	 * of the memory object, so we don't do any here.
 	 */
-	vm_flags |= calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
+	vm_flags |= calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags) |
 			mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 
 	if (flags & MAP_LOCKED)
diff --git a/mm/mprotect.c b/mm/mprotect.c
index f7cb3d4d9c2e..3790c8bee380 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -380,7 +380,7 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
 	if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
 		prot |= PROT_EXEC;
 
-	vm_flags = calc_vm_prot_bits(prot);
+	vm_flags = calc_vm_prot_bits(prot, 0);
 
 	down_write(&current->mm->mmap_sem);
 
diff --git a/mm/nommu.c b/mm/nommu.c
index b64d04d19702..5ba39b82f241 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1082,7 +1082,7 @@ static unsigned long determine_vm_flags(struct file *file,
 {
 	unsigned long vm_flags;
 
-	vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags);
+	vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags);
 	/* vm_flags |= mm->def_flags; */
 
 	if (!(capabilities & NOMMU_MAP_DIRECT)) {
-- 
cgit v1.2.3


From 66d375709d2c891acc639538fd3179fa0cbb0daf Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 12 Feb 2016 13:02:32 -0800
Subject: mm/core, x86/mm/pkeys: Add arch_validate_pkey()

The syscall-level code is passed a protection key and need to
return an appropriate error code if the protection key is bogus.
We will be using this in subsequent patches.

Note that this also begins a series of arch-specific calls that
we need to expose in otherwise arch-independent code.  We create
a linux/pkeys.h header where we will put *all* the stubs for
these functions.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160212210232.774EEAAB@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig             |  1 +
 arch/x86/include/asm/pkeys.h |  6 ++++++
 include/linux/pkeys.h        | 25 +++++++++++++++++++++++++
 mm/Kconfig                   |  2 ++
 4 files changed, 34 insertions(+)
 create mode 100644 arch/x86/include/asm/pkeys.h
 create mode 100644 include/linux/pkeys.h

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b8754348de4d..eda18cecdbbd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -156,6 +156,7 @@ config X86
 	select X86_DEV_DMA_OPS			if X86_64
 	select X86_FEATURE_NAMES		if PROC_FS
 	select ARCH_USES_HIGH_VMA_FLAGS		if X86_INTEL_MEMORY_PROTECTION_KEYS
+	select ARCH_HAS_PKEYS			if X86_INTEL_MEMORY_PROTECTION_KEYS
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
new file mode 100644
index 000000000000..04243c23380c
--- /dev/null
+++ b/arch/x86/include/asm/pkeys.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_X86_PKEYS_H
+#define _ASM_X86_PKEYS_H
+
+#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
+
+#endif /*_ASM_X86_PKEYS_H */
diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h
new file mode 100644
index 000000000000..55e465f93a28
--- /dev/null
+++ b/include/linux/pkeys.h
@@ -0,0 +1,25 @@
+#ifndef _LINUX_PKEYS_H
+#define _LINUX_PKEYS_H
+
+#include <linux/mm_types.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_ARCH_HAS_PKEYS
+#include <asm/pkeys.h>
+#else /* ! CONFIG_ARCH_HAS_PKEYS */
+#define arch_max_pkey() (1)
+#endif /* ! CONFIG_ARCH_HAS_PKEYS */
+
+/*
+ * This is called from mprotect_pkey().
+ *
+ * Returns true if the protection keys is valid.
+ */
+static inline bool validate_pkey(int pkey)
+{
+	if (pkey < 0)
+		return false;
+	return (pkey < arch_max_pkey());
+}
+
+#endif /* _LINUX_PKEYS_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index 6cf439948297..2702bb6c21df 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -672,3 +672,5 @@ config FRAME_VECTOR
 
 config ARCH_USES_HIGH_VMA_FLAGS
 	bool
+config ARCH_HAS_PKEYS
+	bool
-- 
cgit v1.2.3


From 8459429693395ca9e8d18101300b120ad9171795 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 12 Feb 2016 13:02:36 -0800
Subject: x86/mm/pkeys: Allow kernel to modify user pkey rights register

The Protection Key Rights for User memory (PKRU) is a 32-bit
user-accessible register.  It contains two bits for each
protection key: one to write-disable (WD) access to memory
covered by the key and another to access-disable (AD).

Userspace can read/write the register with the RDPKRU and WRPKRU
instructions.  But, the register is saved and restored with the
XSAVE family of instructions, which means we have to treat it
like a floating point register.

The kernel needs to write to the register if it wants to
implement execute-only memory or if it implements a system call
to change PKRU.

To do this, we need to create a 'pkru_state' buffer, read the old
contents in to it, modify it, and then tell the FPU code that
there is modified data in there so it can (possibly) move the
buffer back in to the registers.

This uses the fpu__xfeature_set_state() function that we defined
in the previous patch.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160212210236.0BE13217@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/pgtable.h |  5 +--
 arch/x86/include/asm/pkeys.h   |  3 ++
 arch/x86/kernel/fpu/xstate.c   | 74 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/pkeys.h          |  5 +++
 4 files changed, 85 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 3cbfae80abb2..1ff49ec29ece 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -921,16 +921,17 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
 
 #define PKRU_AD_BIT 0x1
 #define PKRU_WD_BIT 0x2
+#define PKRU_BITS_PER_PKEY 2
 
 static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
 {
-	int pkru_pkey_bits = pkey * 2;
+	int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
 	return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits));
 }
 
 static inline bool __pkru_allows_write(u32 pkru, u16 pkey)
 {
-	int pkru_pkey_bits = pkey * 2;
+	int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
 	/*
 	 * Access-disable disables writes too so we need to check
 	 * both bits here.
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 04243c23380c..5061aec2ed5e 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -3,4 +3,7 @@
 
 #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
 
+extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+		unsigned long init_val);
+
 #endif /*_ASM_X86_PKEYS_H */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 30d144f01eb9..50813c35e9d9 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -5,6 +5,7 @@
  */
 #include <linux/compat.h>
 #include <linux/cpu.h>
+#include <linux/pkeys.h>
 
 #include <asm/fpu/api.h>
 #include <asm/fpu/internal.h>
@@ -855,3 +856,76 @@ out:
 	 */
 	fpu__current_fpstate_write_end();
 }
+
+#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
+#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
+
+/*
+ * This will go out and modify the XSAVE buffer so that PKRU is
+ * set to a particular state for access to 'pkey'.
+ *
+ * PKRU state does affect kernel access to user memory.  We do
+ * not modfiy PKRU *itself* here, only the XSAVE state that will
+ * be restored in to PKRU when we return back to userspace.
+ */
+int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+		unsigned long init_val)
+{
+	struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
+	struct pkru_state *old_pkru_state;
+	struct pkru_state new_pkru_state;
+	int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
+	u32 new_pkru_bits = 0;
+
+	if (!validate_pkey(pkey))
+		return -EINVAL;
+	/*
+	 * This check implies XSAVE support.  OSPKE only gets
+	 * set if we enable XSAVE and we enable PKU in XCR0.
+	 */
+	if (!boot_cpu_has(X86_FEATURE_OSPKE))
+		return -EINVAL;
+
+	/* Set the bits we need in PKRU  */
+	if (init_val & PKEY_DISABLE_ACCESS)
+		new_pkru_bits |= PKRU_AD_BIT;
+	if (init_val & PKEY_DISABLE_WRITE)
+		new_pkru_bits |= PKRU_WD_BIT;
+
+	/* Shift the bits in to the correct place in PKRU for pkey. */
+	new_pkru_bits <<= pkey_shift;
+
+	/* Locate old copy of the state in the xsave buffer */
+	old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
+
+	/*
+	 * When state is not in the buffer, it is in the init
+	 * state, set it manually.  Otherwise, copy out the old
+	 * state.
+	 */
+	if (!old_pkru_state)
+		new_pkru_state.pkru = 0;
+	else
+		new_pkru_state.pkru = old_pkru_state->pkru;
+
+	/* mask off any old bits in place */
+	new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
+	/* Set the newly-requested bits */
+	new_pkru_state.pkru |= new_pkru_bits;
+
+	/*
+	 * We could theoretically live without zeroing pkru.pad.
+	 * The current XSAVE feature state definition says that
+	 * only bytes 0->3 are used.  But we do not want to
+	 * chance leaking kernel stack out to userspace in case a
+	 * memcpy() of the whole xsave buffer was done.
+	 *
+	 * They're in the same cacheline anyway.
+	 */
+	new_pkru_state.pad = 0;
+
+	fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state,
+			sizeof(new_pkru_state));
+
+	return 0;
+}
diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h
index 55e465f93a28..fc325b367bd0 100644
--- a/include/linux/pkeys.h
+++ b/include/linux/pkeys.h
@@ -4,6 +4,11 @@
 #include <linux/mm_types.h>
 #include <asm/mmu_context.h>
 
+#define PKEY_DISABLE_ACCESS	0x1
+#define PKEY_DISABLE_WRITE	0x2
+#define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
+				 PKEY_DISABLE_WRITE)
+
 #ifdef CONFIG_ARCH_HAS_PKEYS
 #include <asm/pkeys.h>
 #else /* ! CONFIG_ARCH_HAS_PKEYS */
-- 
cgit v1.2.3


From 62b5f7d013fc455b8db26cf01e421f4c0d264b92 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 12 Feb 2016 13:02:40 -0800
Subject: mm/core, x86/mm/pkeys: Add execute-only protection keys support

Protection keys provide new page-based protection in hardware.
But, they have an interesting attribute: they only affect data
accesses and never affect instruction fetches.  That means that
if we set up some memory which is set as "access-disabled" via
protection keys, we can still execute from it.

This patch uses protection keys to set up mappings to do just that.
If a user calls:

	mmap(..., PROT_EXEC);
or
	mprotect(ptr, sz, PROT_EXEC);

(note PROT_EXEC-only without PROT_READ/WRITE), the kernel will
notice this, and set a special protection key on the memory.  It
also sets the appropriate bits in the Protection Keys User Rights
(PKRU) register so that the memory becomes unreadable and
unwritable.

I haven't found any userspace that does this today.  With this
facility in place, we expect userspace to move to use it
eventually.  Userspace _could_ start doing this today.  Any
PROT_EXEC calls get converted to PROT_READ inside the kernel, and
would transparently be upgraded to "true" PROT_EXEC with this
code.  IOW, userspace never has to do any PROT_EXEC runtime
detection.

This feature provides enhanced protection against leaking
executable memory contents.  This helps thwart attacks which are
attempting to find ROP gadgets on the fly.

But, the security provided by this approach is not comprehensive.
The PKRU register which controls access permissions is a normal
user register writable from unprivileged userspace.  An attacker
who can execute the 'wrpkru' instruction can easily disable the
protection provided by this feature.

The protection key that is used for execute-only support is
permanently dedicated at compile time.  This is fine for now
because there is currently no API to set a protection key other
than this one.

Despite there being a constant PKRU value across the entire
system, we do not set it unless this feature is in use in a
process.  That is to preserve the PKRU XSAVE 'init state',
which can lead to faster context switches.

PKRU *is* a user register and the kernel is modifying it.  That
means that code doing:

	pkru = rdpkru()
	pkru |= 0x100;
	mmap(..., PROT_EXEC);
	wrpkru(pkru);

could lose the bits in PKRU that enforce execute-only
permissions.  To avoid this, we suggest avoiding ever calling
mmap() or mprotect() when the PKRU value is expected to be
unstable.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Chen Gang <gang.chen.5i5j@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: David Hildenbrand <dahi@linux.vnet.ibm.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Piotr Kwapulinski <kwapulinski.piotr@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Vladimir Murzin <vladimir.murzin@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: keescook@google.com
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160212210240.CB4BB5CA@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/pkeys.h |  25 +++++++++++
 arch/x86/kernel/fpu/xstate.c |   2 -
 arch/x86/mm/Makefile         |   2 +
 arch/x86/mm/fault.c          |  10 +++++
 arch/x86/mm/pkeys.c          | 101 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/pkeys.h        |   3 ++
 mm/mmap.c                    |  10 ++++-
 mm/mprotect.c                |   8 ++--
 8 files changed, 154 insertions(+), 7 deletions(-)
 create mode 100644 arch/x86/mm/pkeys.c

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 5061aec2ed5e..7b84565c916c 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -6,4 +6,29 @@
 extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 		unsigned long init_val);
 
+/*
+ * Try to dedicate one of the protection keys to be used as an
+ * execute-only protection key.
+ */
+#define PKEY_DEDICATED_EXECUTE_ONLY 15
+extern int __execute_only_pkey(struct mm_struct *mm);
+static inline int execute_only_pkey(struct mm_struct *mm)
+{
+	if (!boot_cpu_has(X86_FEATURE_OSPKE))
+		return 0;
+
+	return __execute_only_pkey(mm);
+}
+
+extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma,
+		int prot, int pkey);
+static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
+		int prot, int pkey)
+{
+	if (!boot_cpu_has(X86_FEATURE_OSPKE))
+		return 0;
+
+	return __arch_override_mprotect_pkey(vma, prot, pkey);
+}
+
 #endif /*_ASM_X86_PKEYS_H */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 50813c35e9d9..1b1981812bb6 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -877,8 +877,6 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 	int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
 	u32 new_pkru_bits = 0;
 
-	if (!validate_pkey(pkey))
-		return -EINVAL;
 	/*
 	 * This check implies XSAVE support.  OSPKE only gets
 	 * set if we enable XSAVE and we enable PKU in XCR0.
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index f9d38a48e3c8..67cf2e1e557b 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -34,3 +34,5 @@ obj-$(CONFIG_ACPI_NUMA)		+= srat.o
 obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 
 obj-$(CONFIG_X86_INTEL_MPX)	+= mpx.o
+obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d81744e6f39f..5877b92ab6f1 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1108,6 +1108,16 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
 	 */
 	if (error_code & PF_PK)
 		return 1;
+
+	if (!(error_code & PF_INSTR)) {
+		/*
+		 * Assume all accesses require either read or execute
+		 * permissions.  This is not an instruction access, so
+		 * it requires read permissions.
+		 */
+		if (!(vma->vm_flags & VM_READ))
+			return 1;
+	}
 	/*
 	 * Make sure to check the VMA so that we do not perform
 	 * faults just to hit a PF_PK as soon as we fill in a
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
new file mode 100644
index 000000000000..e8c474451928
--- /dev/null
+++ b/arch/x86/mm/pkeys.c
@@ -0,0 +1,101 @@
+/*
+ * Intel Memory Protection Keys management
+ * Copyright (c) 2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/mm_types.h>             /* mm_struct, vma, etc...       */
+#include <linux/pkeys.h>                /* PKEY_*                       */
+#include <uapi/asm-generic/mman-common.h>
+
+#include <asm/cpufeature.h>             /* boot_cpu_has, ...            */
+#include <asm/mmu_context.h>            /* vma_pkey()                   */
+#include <asm/fpu/internal.h>           /* fpregs_active()              */
+
+int __execute_only_pkey(struct mm_struct *mm)
+{
+	int ret;
+
+	/*
+	 * We do not want to go through the relatively costly
+	 * dance to set PKRU if we do not need to.  Check it
+	 * first and assume that if the execute-only pkey is
+	 * write-disabled that we do not have to set it
+	 * ourselves.  We need preempt off so that nobody
+	 * can make fpregs inactive.
+	 */
+	preempt_disable();
+	if (fpregs_active() &&
+	    !__pkru_allows_read(read_pkru(), PKEY_DEDICATED_EXECUTE_ONLY)) {
+		preempt_enable();
+		return PKEY_DEDICATED_EXECUTE_ONLY;
+	}
+	preempt_enable();
+	ret = arch_set_user_pkey_access(current, PKEY_DEDICATED_EXECUTE_ONLY,
+			PKEY_DISABLE_ACCESS);
+	/*
+	 * If the PKRU-set operation failed somehow, just return
+	 * 0 and effectively disable execute-only support.
+	 */
+	if (ret)
+		return 0;
+
+	return PKEY_DEDICATED_EXECUTE_ONLY;
+}
+
+static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
+{
+	/* Do this check first since the vm_flags should be hot */
+	if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC)
+		return false;
+	if (vma_pkey(vma) != PKEY_DEDICATED_EXECUTE_ONLY)
+		return false;
+
+	return true;
+}
+
+/*
+ * This is only called for *plain* mprotect calls.
+ */
+int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey)
+{
+	/*
+	 * Is this an mprotect_pkey() call?  If so, never
+	 * override the value that came from the user.
+	 */
+	if (pkey != -1)
+		return pkey;
+	/*
+	 * Look for a protection-key-drive execute-only mapping
+	 * which is now being given permissions that are not
+	 * execute-only.  Move it back to the default pkey.
+	 */
+	if (vma_is_pkey_exec_only(vma) &&
+	    (prot & (PROT_READ|PROT_WRITE))) {
+		return 0;
+	}
+	/*
+	 * The mapping is execute-only.  Go try to get the
+	 * execute-only protection key.  If we fail to do that,
+	 * fall through as if we do not have execute-only
+	 * support.
+	 */
+	if (prot == PROT_EXEC) {
+		pkey = execute_only_pkey(vma->vm_mm);
+		if (pkey > 0)
+			return pkey;
+	}
+	/*
+	 * This is a vanilla, non-pkey mprotect (or we failed to
+	 * setup execute-only), inherit the pkey from the VMA we
+	 * are working on.
+	 */
+	return vma_pkey(vma);
+}
diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h
index fc325b367bd0..1d405a2b7272 100644
--- a/include/linux/pkeys.h
+++ b/include/linux/pkeys.h
@@ -13,6 +13,9 @@
 #include <asm/pkeys.h>
 #else /* ! CONFIG_ARCH_HAS_PKEYS */
 #define arch_max_pkey() (1)
+#define execute_only_pkey(mm) (0)
+#define arch_override_mprotect_pkey(vma, prot, pkey) (0)
+#define PKEY_DEDICATED_EXECUTE_ONLY 0
 #endif /* ! CONFIG_ARCH_HAS_PKEYS */
 
 /*
diff --git a/mm/mmap.c b/mm/mmap.c
index 784d2d6142a2..0175b7d055f0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -43,6 +43,7 @@
 #include <linux/printk.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/moduleparam.h>
+#include <linux/pkeys.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -1270,6 +1271,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 			unsigned long pgoff, unsigned long *populate)
 {
 	struct mm_struct *mm = current->mm;
+	int pkey = 0;
 
 	*populate = 0;
 
@@ -1309,11 +1311,17 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 	if (offset_in_page(addr))
 		return addr;
 
+	if (prot == PROT_EXEC) {
+		pkey = execute_only_pkey(mm);
+		if (pkey < 0)
+			pkey = 0;
+	}
+
 	/* Do simple checking here so the lower-level routines won't have
 	 * to. we assume access permissions have been handled by the open
 	 * of the memory object, so we don't do any here.
 	 */
-	vm_flags |= calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags) |
+	vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
 			mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 
 	if (flags & MAP_LOCKED)
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 3790c8bee380..fa37c4cd973a 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -24,6 +24,7 @@
 #include <linux/migrate.h>
 #include <linux/perf_event.h>
 #include <linux/ksm.h>
+#include <linux/pkeys.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -354,7 +355,7 @@ fail:
 SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
 		unsigned long, prot)
 {
-	unsigned long vm_flags, nstart, end, tmp, reqprot;
+	unsigned long nstart, end, tmp, reqprot;
 	struct vm_area_struct *vma, *prev;
 	int error = -EINVAL;
 	const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
@@ -380,8 +381,6 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
 	if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
 		prot |= PROT_EXEC;
 
-	vm_flags = calc_vm_prot_bits(prot, 0);
-
 	down_write(&current->mm->mmap_sem);
 
 	vma = find_vma(current->mm, start);
@@ -411,10 +410,11 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
 
 	for (nstart = start ; ; ) {
 		unsigned long newflags;
+		int pkey = arch_override_mprotect_pkey(vma, prot, -1);
 
 		/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
 
-		newflags = vm_flags;
+		newflags = calc_vm_prot_bits(prot, pkey);
 		newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
 
 		/* newflags >> 4 shift VM_MAY% in place of VM_% */
-- 
cgit v1.2.3


From 9e74a6dadbbf31ac18a2712048bf866c8e32aab2 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Wed, 17 Feb 2016 11:23:55 -0800
Subject: net: Optimize local checksum offload

This patch takes advantage of several assumptions we can make about the
headers of the frame in order to reduce overall processing overhead for
computing the outer header checksum.

First we can assume the entire header is in the region pointed to by
skb->head as this is what csum_start is based on.

Second, as a result of our first assumption, we can just call csum_partial
instead of making a call to skb_checksum which would end up having to
configure things so that we could walk through the frags list.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 39206751463e..89b536796e53 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3713,19 +3713,18 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
  */
 static inline __wsum lco_csum(struct sk_buff *skb)
 {
-	char *inner_csum_field;
-	__wsum csum;
+	unsigned char *csum_start = skb_checksum_start(skb);
+	unsigned char *l4_hdr = skb_transport_header(skb);
+	__wsum partial;
 
 	/* Start with complement of inner checksum adjustment */
-	inner_csum_field = skb->data + skb_checksum_start_offset(skb) +
-				skb->csum_offset;
-	csum = ~csum_unfold(*(__force __sum16 *)inner_csum_field);
+	partial = ~csum_unfold(*(__force __sum16 *)(csum_start +
+						    skb->csum_offset));
+
 	/* Add in checksum of our headers (incl. outer checksum
-	 * adjustment filled in by caller)
+	 * adjustment filled in by caller) and return result.
 	 */
-	csum = skb_checksum(skb, 0, skb_checksum_start_offset(skb), csum);
-	/* The result is the checksum from skb->data to end of packet */
-	return csum;
+	return csum_partial(l4_hdr, csum_start - l4_hdr, partial);
 }
 
 #endif	/* __KERNEL__ */
-- 
cgit v1.2.3


From 3f9b4a6972d50562613daa649ed064244e6bc7bb Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Thu, 18 Feb 2016 17:00:39 +0200
Subject: qed: Lay infrastructure for vlan filtering offload

Today, interfaces are working in vlan-promisc mode; But once
vlan filtering offloaded would be supported, we'll need a method to
control it directly [e.g., when setting device to PROMISC, or when
running out of vlan credits].

This adds the necessary API for L2 client to manually choose whether to
accept all vlans or only those for which filters were configured.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_l2.c | 23 +++++++++++++++++++----
 include/linux/qed/qed_eth_if.h           |  2 ++
 2 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 978d07a61bbf..73feaf7eedb8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -124,6 +124,8 @@ struct qed_sp_vport_update_params {
 	u8				update_vport_active_tx_flg;
 	u8				vport_active_tx_flg;
 	u8				update_approx_mcast_flg;
+	u8				update_accept_any_vlan_flg;
+	u8				accept_any_vlan;
 	unsigned long			bins[8];
 	struct qed_rss_params		*rss_params;
 	struct qed_filter_accept_flags	accept_flags;
@@ -393,7 +395,9 @@ qed_sp_vport_update(struct qed_hwfn *p_hwfn,
 	p_cmn->update_rx_active_flg = p_params->update_vport_active_rx_flg;
 	p_cmn->tx_active_flg = p_params->vport_active_tx_flg;
 	p_cmn->update_tx_active_flg = p_params->update_vport_active_tx_flg;
-
+	p_cmn->accept_any_vlan = p_params->accept_any_vlan;
+	p_cmn->update_accept_any_vlan_flg =
+			p_params->update_accept_any_vlan_flg;
 	rc = qed_sp_vport_update_rss(p_hwfn, p_ramrod, p_rss_params);
 	if (rc) {
 		/* Return spq entry which is taken in qed_sp_init_request()*/
@@ -444,8 +448,10 @@ static int qed_sp_vport_stop(struct qed_hwfn *p_hwfn,
 static int qed_filter_accept_cmd(struct qed_dev *cdev,
 				 u8 vport,
 				 struct qed_filter_accept_flags accept_flags,
-				 enum spq_mode comp_mode,
-				 struct qed_spq_comp_cb *p_comp_data)
+				 u8 update_accept_any_vlan,
+				 u8 accept_any_vlan,
+				enum spq_mode comp_mode,
+				struct qed_spq_comp_cb *p_comp_data)
 {
 	struct qed_sp_vport_update_params vport_update_params;
 	int i, rc;
@@ -454,6 +460,8 @@ static int qed_filter_accept_cmd(struct qed_dev *cdev,
 	memset(&vport_update_params, 0, sizeof(vport_update_params));
 	vport_update_params.vport_id = vport;
 	vport_update_params.accept_flags = accept_flags;
+	vport_update_params.update_accept_any_vlan_flg = update_accept_any_vlan;
+	vport_update_params.accept_any_vlan = accept_any_vlan;
 
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
@@ -471,6 +479,10 @@ static int qed_filter_accept_cmd(struct qed_dev *cdev,
 			   "Accept filter configured, flags = [Rx]%x [Tx]%x\n",
 			   accept_flags.rx_accept_filter,
 			   accept_flags.tx_accept_filter);
+		if (update_accept_any_vlan)
+			DP_VERBOSE(p_hwfn, QED_MSG_SP,
+				   "accept_any_vlan=%d configured\n",
+				   accept_any_vlan);
 	}
 
 	return 0;
@@ -1347,6 +1359,9 @@ static int qed_update_vport(struct qed_dev *cdev,
 		params->update_vport_active_flg;
 	sp_params.vport_active_rx_flg = params->vport_active_flg;
 	sp_params.vport_active_tx_flg = params->vport_active_flg;
+	sp_params.accept_any_vlan = params->accept_any_vlan;
+	sp_params.update_accept_any_vlan_flg =
+		params->update_accept_any_vlan_flg;
 
 	/* RSS - is a bit tricky, since upper-layer isn't familiar with hwfns.
 	 * We need to re-fix the rss values per engine for CMT.
@@ -1566,7 +1581,7 @@ static int qed_configure_filter_rx_mode(struct qed_dev *cdev,
 	else if (type == QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC)
 		accept_flags.rx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED;
 
-	return qed_filter_accept_cmd(cdev, 0, accept_flags,
+	return qed_filter_accept_cmd(cdev, 0, accept_flags, false, false,
 				     QED_SPQ_MODE_CB, NULL);
 }
 
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 81ab178e31c1..e53b0ca49e41 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -33,6 +33,8 @@ struct qed_update_vport_params {
 	u8 vport_id;
 	u8 update_vport_active_flg;
 	u8 vport_active_flg;
+	u8 update_accept_any_vlan_flg;
+	u8 accept_any_vlan;
 	u8 update_rss_flg;
 	struct qed_update_vport_rss_params rss_params;
 };
-- 
cgit v1.2.3


From b44a7dfc6fa16e01f2497c9fa62c3926f94be174 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Mon, 28 Dec 2015 16:02:29 -0500
Subject: vfs: define a generic function to read a file from the kernel

For a while it was looked down upon to directly read files from Linux.
These days there exists a few mechanisms in the kernel that do just
this though to load a file into a local buffer.  There are minor but
important checks differences on each.  This patch set is the first
attempt at resolving some of these differences.

This patch introduces a common function for reading files from the kernel
with the corresponding security post-read hook and function.

Changelog v4+:
- export security_kernel_post_read_file() - Fengguang Wu
v3:
- additional bounds checking - Luis
v2:
- To simplify patch review, re-ordered patches

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Reviewed-by: Luis R. Rodriguez <mcgrof@suse.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c                 | 53 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h        |  1 +
 include/linux/lsm_hooks.h |  9 ++++++++
 include/linux/security.h  |  7 +++++++
 security/security.c       |  8 +++++++
 5 files changed, 78 insertions(+)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index dcd4ac7d3f1e..6b6668baa44a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -56,6 +56,7 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
 #include <linux/compat.h>
+#include <linux/vmalloc.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -831,6 +832,58 @@ int kernel_read(struct file *file, loff_t offset,
 
 EXPORT_SYMBOL(kernel_read);
 
+int kernel_read_file(struct file *file, void **buf, loff_t *size,
+		     loff_t max_size)
+{
+	loff_t i_size, pos;
+	ssize_t bytes = 0;
+	int ret;
+
+	if (!S_ISREG(file_inode(file)->i_mode) || max_size < 0)
+		return -EINVAL;
+
+	i_size = i_size_read(file_inode(file));
+	if (max_size > 0 && i_size > max_size)
+		return -EFBIG;
+	if (i_size <= 0)
+		return -EINVAL;
+
+	*buf = vmalloc(i_size);
+	if (!*buf)
+		return -ENOMEM;
+
+	pos = 0;
+	while (pos < i_size) {
+		bytes = kernel_read(file, pos, (char *)(*buf) + pos,
+				    i_size - pos);
+		if (bytes < 0) {
+			ret = bytes;
+			goto out;
+		}
+
+		if (bytes == 0)
+			break;
+		pos += bytes;
+	}
+
+	if (pos != i_size) {
+		ret = -EIO;
+		goto out;
+	}
+
+	ret = security_kernel_post_read_file(file, *buf, i_size);
+	if (!ret)
+		*size = pos;
+
+out:
+	if (ret < 0) {
+		vfree(*buf);
+		*buf = NULL;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kernel_read_file);
+
 ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
 {
 	ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ae681002100a..9a83d82b61ac 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2577,6 +2577,7 @@ static inline void i_readcount_inc(struct inode *inode)
 extern int do_pipe_flags(int *, int);
 
 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
+extern int kernel_read_file(struct file *, void **, loff_t *, loff_t);
 extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t);
 extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
 extern struct file * open_exec(const char *);
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 71969de4058c..f82631cc7248 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -561,6 +561,13 @@
  *	the kernel module to load. If the module is being loaded from a blob,
  *	this argument will be NULL.
  *	Return 0 if permission is granted.
+ * @kernel_post_read_file:
+ *	Read a file specified by userspace.
+ *	@file contains the file structure pointing to the file being read
+ *	by the kernel.
+ *	@buf pointer to buffer containing the file contents.
+ *	@size length of the file contents.
+ *	Return 0 if permission is granted.
  * @task_fix_setuid:
  *	Update the module's state after setting one or more of the user
  *	identity attributes of the current process.  The @flags parameter
@@ -1457,6 +1464,7 @@ union security_list_options {
 	int (*kernel_fw_from_file)(struct file *file, char *buf, size_t size);
 	int (*kernel_module_request)(char *kmod_name);
 	int (*kernel_module_from_file)(struct file *file);
+	int (*kernel_post_read_file)(struct file *file, char *buf, loff_t size);
 	int (*task_fix_setuid)(struct cred *new, const struct cred *old,
 				int flags);
 	int (*task_setpgid)(struct task_struct *p, pid_t pgid);
@@ -1716,6 +1724,7 @@ struct security_hook_heads {
 	struct list_head kernel_act_as;
 	struct list_head kernel_create_files_as;
 	struct list_head kernel_fw_from_file;
+	struct list_head kernel_post_read_file;
 	struct list_head kernel_module_request;
 	struct list_head kernel_module_from_file;
 	struct list_head task_fix_setuid;
diff --git a/include/linux/security.h b/include/linux/security.h
index 4824a4ccaf1c..f30f5647d1e1 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -301,6 +301,7 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_fw_from_file(struct file *file, char *buf, size_t size);
 int security_kernel_module_request(char *kmod_name);
 int security_kernel_module_from_file(struct file *file);
+int security_kernel_post_read_file(struct file *file, char *buf, loff_t size);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags);
 int security_task_setpgid(struct task_struct *p, pid_t pgid);
@@ -866,6 +867,12 @@ static inline int security_kernel_module_from_file(struct file *file)
 	return 0;
 }
 
+static inline int security_kernel_post_read_file(struct file *file,
+						 char *buf, loff_t size)
+{
+	return 0;
+}
+
 static inline int security_task_fix_setuid(struct cred *new,
 					   const struct cred *old,
 					   int flags)
diff --git a/security/security.c b/security/security.c
index e8ffd92ae2eb..c98dd6bf4ebd 100644
--- a/security/security.c
+++ b/security/security.c
@@ -910,6 +910,12 @@ int security_kernel_module_from_file(struct file *file)
 	return ima_module_check(file);
 }
 
+int security_kernel_post_read_file(struct file *file, char *buf, loff_t size)
+{
+	return call_int_hook(kernel_post_read_file, 0, file, buf, size);
+}
+EXPORT_SYMBOL_GPL(security_kernel_post_read_file);
+
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags)
 {
@@ -1697,6 +1703,8 @@ struct security_hook_heads security_hook_heads = {
 		LIST_HEAD_INIT(security_hook_heads.kernel_module_request),
 	.kernel_module_from_file =
 		LIST_HEAD_INIT(security_hook_heads.kernel_module_from_file),
+	.kernel_post_read_file =
+		LIST_HEAD_INIT(security_hook_heads.kernel_post_read_file),
 	.task_fix_setuid =
 		LIST_HEAD_INIT(security_hook_heads.task_fix_setuid),
 	.task_setpgid =	LIST_HEAD_INIT(security_hook_heads.task_setpgid),
-- 
cgit v1.2.3


From bc8ca5b92d54f6f005fa73ad546f02fca26ddd85 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Sun, 24 Jan 2016 10:07:32 -0500
Subject: vfs: define kernel_read_file_id enumeration

To differentiate between the kernel_read_file() callers, this patch
defines a new enumeration named kernel_read_file_id and includes the
caller identifier as an argument.

Subsequent patches define READING_KEXEC_IMAGE, READING_KEXEC_INITRAMFS,
READING_FIRMWARE, READING_MODULE, and READING_POLICY.

Changelog v3:
- Replace the IMA specific enumeration with a generic one.

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c                 | 4 ++--
 include/linux/fs.h        | 7 ++++++-
 include/linux/lsm_hooks.h | 4 +++-
 include/linux/security.h  | 7 +++++--
 security/security.c       | 5 +++--
 5 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 6b6668baa44a..1138dc502c77 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -833,7 +833,7 @@ int kernel_read(struct file *file, loff_t offset,
 EXPORT_SYMBOL(kernel_read);
 
 int kernel_read_file(struct file *file, void **buf, loff_t *size,
-		     loff_t max_size)
+		     loff_t max_size, enum kernel_read_file_id id)
 {
 	loff_t i_size, pos;
 	ssize_t bytes = 0;
@@ -871,7 +871,7 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size,
 		goto out;
 	}
 
-	ret = security_kernel_post_read_file(file, *buf, i_size);
+	ret = security_kernel_post_read_file(file, *buf, i_size, id);
 	if (!ret)
 		*size = pos;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9a83d82b61ac..aa84bcb9c368 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2576,8 +2576,13 @@ static inline void i_readcount_inc(struct inode *inode)
 #endif
 extern int do_pipe_flags(int *, int);
 
+enum kernel_read_file_id {
+	READING_MAX_ID
+};
+
 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
-extern int kernel_read_file(struct file *, void **, loff_t *, loff_t);
+extern int kernel_read_file(struct file *, void **, loff_t *, loff_t,
+			    enum kernel_read_file_id);
 extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t);
 extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
 extern struct file * open_exec(const char *);
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index f82631cc7248..2337f33913c1 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -567,6 +567,7 @@
  *	by the kernel.
  *	@buf pointer to buffer containing the file contents.
  *	@size length of the file contents.
+ *	@id kernel read file identifier
  *	Return 0 if permission is granted.
  * @task_fix_setuid:
  *	Update the module's state after setting one or more of the user
@@ -1464,7 +1465,8 @@ union security_list_options {
 	int (*kernel_fw_from_file)(struct file *file, char *buf, size_t size);
 	int (*kernel_module_request)(char *kmod_name);
 	int (*kernel_module_from_file)(struct file *file);
-	int (*kernel_post_read_file)(struct file *file, char *buf, loff_t size);
+	int (*kernel_post_read_file)(struct file *file, char *buf, loff_t size,
+				     enum kernel_read_file_id id);
 	int (*task_fix_setuid)(struct cred *new, const struct cred *old,
 				int flags);
 	int (*task_setpgid)(struct task_struct *p, pid_t pgid);
diff --git a/include/linux/security.h b/include/linux/security.h
index f30f5647d1e1..b68ce94e4e00 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -28,6 +28,7 @@
 #include <linux/err.h>
 #include <linux/string.h>
 #include <linux/mm.h>
+#include <linux/fs.h>
 
 struct linux_binprm;
 struct cred;
@@ -301,7 +302,8 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_fw_from_file(struct file *file, char *buf, size_t size);
 int security_kernel_module_request(char *kmod_name);
 int security_kernel_module_from_file(struct file *file);
-int security_kernel_post_read_file(struct file *file, char *buf, loff_t size);
+int security_kernel_post_read_file(struct file *file, char *buf, loff_t size,
+				   enum kernel_read_file_id id);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags);
 int security_task_setpgid(struct task_struct *p, pid_t pgid);
@@ -868,7 +870,8 @@ static inline int security_kernel_module_from_file(struct file *file)
 }
 
 static inline int security_kernel_post_read_file(struct file *file,
-						 char *buf, loff_t size)
+						 char *buf, loff_t size,
+						 enum kernel_read_file_id id)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index c98dd6bf4ebd..5b96eabaafd4 100644
--- a/security/security.c
+++ b/security/security.c
@@ -910,9 +910,10 @@ int security_kernel_module_from_file(struct file *file)
 	return ima_module_check(file);
 }
 
-int security_kernel_post_read_file(struct file *file, char *buf, loff_t size)
+int security_kernel_post_read_file(struct file *file, char *buf, loff_t size,
+				   enum kernel_read_file_id id)
 {
-	return call_int_hook(kernel_post_read_file, 0, file, buf, size);
+	return call_int_hook(kernel_post_read_file, 0, file, buf, size, id);
 }
 EXPORT_SYMBOL_GPL(security_kernel_post_read_file);
 
-- 
cgit v1.2.3


From 9eb45d5cc541142896f5fe7896d824c1091197c9 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Mon, 15 Feb 2016 09:41:59 +0100
Subject: PCI: Update VPD definitions

The 'end' tag is actually 0x0f; it's the representation as a small resource
data type tag that's 0x78 (i.e., shifted by 3).  Correct PCI_VPD_STIN_END
and PCI_VPD_SRDT_END accordingly.

Also, add helper functions to extract the resource data type tags for both
large and small resource data types.

[bhelgaas: changelog]
Tested-by: Shane Seymour <shane.seymour@hpe.com>
Tested-by: Babu Moger <babu.moger@oracle.com>
Signed-off-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Alexander Duyck <alexander.duyck@gmail.com>
---
 include/linux/pci.h | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 27df4a6585da..49ad85c3e88e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1834,12 +1834,13 @@ bool pci_acs_path_enabled(struct pci_dev *start,
 #define PCI_VPD_LRDT_RW_DATA		PCI_VPD_LRDT_ID(PCI_VPD_LTIN_RW_DATA)
 
 /* Small Resource Data Type Tag Item Names */
-#define PCI_VPD_STIN_END		0x78	/* End */
+#define PCI_VPD_STIN_END		0x0f	/* End */
 
-#define PCI_VPD_SRDT_END		PCI_VPD_STIN_END
+#define PCI_VPD_SRDT_END		(PCI_VPD_STIN_END << 3)
 
 #define PCI_VPD_SRDT_TIN_MASK		0x78
 #define PCI_VPD_SRDT_LEN_MASK		0x07
+#define PCI_VPD_LRDT_TIN_MASK		0x7f
 
 #define PCI_VPD_LRDT_TAG_SIZE		3
 #define PCI_VPD_SRDT_TAG_SIZE		1
@@ -1862,6 +1863,17 @@ static inline u16 pci_vpd_lrdt_size(const u8 *lrdt)
 	return (u16)lrdt[1] + ((u16)lrdt[2] << 8);
 }
 
+/**
+ * pci_vpd_lrdt_tag - Extracts the Large Resource Data Type Tag Item
+ * @lrdt: Pointer to the beginning of the Large Resource Data Type tag
+ *
+ * Returns the extracted Large Resource Data Type Tag item.
+ */
+static inline u16 pci_vpd_lrdt_tag(const u8 *lrdt)
+{
+    return (u16)(lrdt[0] & PCI_VPD_LRDT_TIN_MASK);
+}
+
 /**
  * pci_vpd_srdt_size - Extracts the Small Resource Data Type length
  * @lrdt: Pointer to the beginning of the Small Resource Data Type tag
@@ -1873,6 +1885,17 @@ static inline u8 pci_vpd_srdt_size(const u8 *srdt)
 	return (*srdt) & PCI_VPD_SRDT_LEN_MASK;
 }
 
+/**
+ * pci_vpd_srdt_tag - Extracts the Small Resource Data Type Tag Item
+ * @lrdt: Pointer to the beginning of the Small Resource Data Type tag
+ *
+ * Returns the extracted Small Resource Data Type Tag Item.
+ */
+static inline u8 pci_vpd_srdt_tag(const u8 *srdt)
+{
+	return ((*srdt) & PCI_VPD_SRDT_TIN_MASK) >> 3;
+}
+
 /**
  * pci_vpd_info_field_size - Extracts the information field length
  * @lrdt: Pointer to the beginning of an information field header
-- 
cgit v1.2.3


From ee2204a37957daed80418ea8ffc4f5c3146fb8e7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 Feb 2016 16:40:35 +0100
Subject: gpio: remove broken irq_to_gpio() interface

gpiolib has removed the irq_to_gpio() API several years ago,
but the global header still provided a non-working stub.

To prevent new users of this broken function from showing
up, let's remove the stubs as well.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index d12b5d566e4b..6fc1c9e74854 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -70,11 +70,6 @@ static inline int gpio_to_irq(unsigned int gpio)
 	return __gpio_to_irq(gpio);
 }
 
-static inline int irq_to_gpio(unsigned int irq)
-{
-	return -EINVAL;
-}
-
 #endif /* ! CONFIG_ARCH_HAVE_CUSTOM_GPIO_H */
 
 /* CONFIG_GPIOLIB: bindings for managed devices that want to request gpios */
@@ -222,13 +217,6 @@ static inline void gpiochip_unlock_as_irq(struct gpio_chip *chip,
 	WARN_ON(1);
 }
 
-static inline int irq_to_gpio(unsigned irq)
-{
-	/* irq can never have been returned from gpio_to_irq() */
-	WARN_ON(1);
-	return -EINVAL;
-}
-
 static inline int
 gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
 		       unsigned int gpio_offset, unsigned int pin_offset,
-- 
cgit v1.2.3


From df4878e969ccc047da45d2cd3af5d08031da1593 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 12 Feb 2016 14:48:23 +0100
Subject: gpio: store reflect the label to userspace

The gpio_chip label is useful for userspace to understand what
kind of GPIO chip it is dealing with. Let's store a copy of this
label in the gpio_device, add it to the struct passed to userspace
for GPIO_GET_CHIPINFO_IOCTL and modify lsgpio to show it.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 13 +++++++++++++
 drivers/gpio/gpiolib.h      |  3 +++
 include/linux/gpio/driver.h |  3 ++-
 include/uapi/linux/gpio.h   |  2 ++
 tools/gpio/lsgpio.c         |  4 ++--
 5 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 59f0045c5950..797c790aa750 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -342,6 +342,9 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		strncpy(chipinfo.name, dev_name(&gdev->dev),
 			sizeof(chipinfo.name));
 		chipinfo.name[sizeof(chipinfo.name)-1] = '\0';
+		strncpy(chipinfo.label, gdev->label,
+			sizeof(chipinfo.label));
+		chipinfo.label[sizeof(chipinfo.label)-1] = '\0';
 		chipinfo.lines = gdev->ngpio;
 		if (copy_to_user(ip, &chipinfo, sizeof(chipinfo)))
 			return -EFAULT;
@@ -479,6 +482,16 @@ int gpiochip_add_data(struct gpio_chip *chip, void *data)
 		status = -EINVAL;
 		goto err_free_gdev;
 	}
+
+	if (chip->label)
+		gdev->label = devm_kstrdup(&gdev->dev, chip->label, GFP_KERNEL);
+	else
+		gdev->label = devm_kstrdup(&gdev->dev, "unknown", GFP_KERNEL);
+	if (!gdev->label) {
+		status = -ENOMEM;
+		goto err_free_gdev;
+	}
+
 	gdev->ngpio = chip->ngpio;
 	gdev->data = data;
 
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index ddbe409ad48f..e30e5fdb1214 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -37,6 +37,8 @@ struct acpi_device;
  * of the @descs array.
  * @base: GPIO base in the DEPRECATED global Linux GPIO numberspace, assigned
  * at device creation time.
+ * @label: a descriptive name for the GPIO device, such as the part number
+ * or name of the IP component in a System on Chip.
  * @data: per-instance data assigned by the driver
  * @list: links gpio_device:s together for traversal
  *
@@ -55,6 +57,7 @@ struct gpio_device {
 	struct gpio_desc	*descs;
 	int			base;
 	u16			ngpio;
+	char			*label;
 	void			*data;
 	struct list_head        list;
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index ff96d0f9fceb..639607658ed8 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -21,7 +21,8 @@ struct gpio_device;
 
 /**
  * struct gpio_chip - abstract a GPIO controller
- * @label: for diagnostics
+ * @label: a functional name for the GPIO device, such as a part
+ *	number or the name of the SoC IP-block implementing it.
  * @gpiodev: the internal state holder, opaque struct
  * @parent: optional parent device providing the GPIOs
  * @owner: helps prevent removal of modules exporting active GPIOs
diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h
index 3188a87bdaa0..3f93e1bcd3dd 100644
--- a/include/uapi/linux/gpio.h
+++ b/include/uapi/linux/gpio.h
@@ -16,10 +16,12 @@
 /**
  * struct gpiochip_info - Information about a certain GPIO chip
  * @name: the name of this GPIO chip
+ * @label: a functional name for this GPIO chip
  * @lines: number of GPIO lines on this chip
  */
 struct gpiochip_info {
 	char name[32];
+	char label[32];
 	__u32 lines;
 };
 
diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c
index 4cfe29da279b..692233f561fb 100644
--- a/tools/gpio/lsgpio.c
+++ b/tools/gpio/lsgpio.c
@@ -54,8 +54,8 @@ int list_device(const char *device_name)
 
 		goto free_chrdev_name;
 	}
-	fprintf(stdout, "GPIO chip: %s, %u GPIO lines\n",
-		cinfo.name, cinfo.lines);
+	fprintf(stdout, "GPIO chip: %s, \"%s\", %u GPIO lines\n",
+		cinfo.name, cinfo.label, cinfo.lines);
 
 	if (close(fd) == -1)  {
 		ret = -errno;
-- 
cgit v1.2.3


From d07ab6d114774d7fcb53c57d7474aef459713451 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 18 Feb 2016 10:54:11 +0200
Subject: block: Add blk_set_runtime_active()

If block device is left runtime suspended during system suspend, resume
hook of the driver typically corrects runtime PM status of the device back
to "active" after it is resumed. However, this is not enough as queue's
runtime PM status is still "suspended". As long as it is in this state
blk_pm_peek_request() returns NULL and thus prevents new requests to be
processed.

Add new function blk_set_runtime_active() that can be used to force the
queue status back to "active" as needed.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-core.c       | 24 ++++++++++++++++++++++++
 include/linux/blkdev.h |  2 ++
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index ab51685988c2..11e371e76cb7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -3527,6 +3527,30 @@ void blk_post_runtime_resume(struct request_queue *q, int err)
 	spin_unlock_irq(q->queue_lock);
 }
 EXPORT_SYMBOL(blk_post_runtime_resume);
+
+/**
+ * blk_set_runtime_active - Force runtime status of the queue to be active
+ * @q: the queue of the device
+ *
+ * If the device is left runtime suspended during system suspend the resume
+ * hook typically resumes the device and corrects runtime status
+ * accordingly. However, that does not affect the queue runtime PM status
+ * which is still "suspended". This prevents processing requests from the
+ * queue.
+ *
+ * This function can be used in driver's resume hook to correct queue
+ * runtime PM status and re-enable peeking requests from the queue. It
+ * should be called before first request is added to the queue.
+ */
+void blk_set_runtime_active(struct request_queue *q)
+{
+	spin_lock_irq(q->queue_lock);
+	q->rpm_status = RPM_ACTIVE;
+	pm_runtime_mark_last_busy(q->dev);
+	pm_request_autosuspend(q->dev);
+	spin_unlock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL(blk_set_runtime_active);
 #endif
 
 int __init blk_dev_init(void)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 29189aeace19..8a77c47f8d21 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1026,6 +1026,7 @@ extern int blk_pre_runtime_suspend(struct request_queue *q);
 extern void blk_post_runtime_suspend(struct request_queue *q, int err);
 extern void blk_pre_runtime_resume(struct request_queue *q);
 extern void blk_post_runtime_resume(struct request_queue *q, int err);
+extern void blk_set_runtime_active(struct request_queue *q);
 #else
 static inline void blk_pm_runtime_init(struct request_queue *q,
 	struct device *dev) {}
@@ -1036,6 +1037,7 @@ static inline int blk_pre_runtime_suspend(struct request_queue *q)
 static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {}
 static inline void blk_pre_runtime_resume(struct request_queue *q) {}
 static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
+extern inline void blk_set_runtime_active(struct request_queue *q) {}
 #endif
 
 /*
-- 
cgit v1.2.3


From 91d31b9f8e7662726f273fc32b25f4099d78de4a Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 15 Feb 2016 05:22:18 +0000
Subject: regmap: add regmap_update_bits_base()

Current regmap has many similar update functions like below,
but the difference is very few.
	regmap_update_bits()
	regmap_update_bits_async()
	regmap_update_bits_check()
	regmap_update_bits_check_async()
Furthermore, we can add *force* write option too in the future.

This patch adds new regmap_update_bits_base() which is feature
merged function. Above functions can be merged into it by macro.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 40 ++++++++++++++++++++++++++++++++++++++++
 include/linux/regmap.h       | 11 +++++++++++
 2 files changed, 51 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index ee54e841de4a..4e35b2f41304 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2647,6 +2647,46 @@ static int _regmap_update_bits(struct regmap *map, unsigned int reg,
 	return ret;
 }
 
+/**
+ * regmap_update_bits_base:
+ *	Perform a read/modify/write cycle on the
+ *	register map with change, async, force option
+ *
+ * @map: Register map to update
+ * @reg: Register to update
+ * @mask: Bitmask to change
+ * @val: New value for bitmask
+ * @change: Boolean indicating if a write was done
+ * @async: Boolean indicating asynchronously
+ * @force: Boolean indicating use force update
+ *
+ * if async was true,
+ * With most buses the read must be done synchronously so this is most
+ * useful for devices with a cache which do not need to interact with
+ * the hardware to determine the current register value.
+ *
+ * Returns zero for success, a negative number on error.
+ */
+int regmap_update_bits_base(struct regmap *map, unsigned int reg,
+			    unsigned int mask, unsigned int val,
+			    bool *change, bool async, bool force)
+{
+	int ret;
+
+	map->lock(map->lock_arg);
+
+	map->async = async;
+
+	ret = _regmap_update_bits(map, reg, mask, val, change, force);
+
+	map->async = false;
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_update_bits_base);
+
 /**
  * regmap_update_bits: Perform a read/modify/write cycle on the register map
  *
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 18394343f489..28e50a3d2872 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -691,6 +691,9 @@ int regmap_raw_read(struct regmap *map, unsigned int reg,
 		    void *val, size_t val_len);
 int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 		     size_t val_count);
+int regmap_update_bits_base(struct regmap *map, unsigned int reg,
+			    unsigned int mask, unsigned int val,
+			    bool *change, bool async, bool force);
 int regmap_update_bits(struct regmap *map, unsigned int reg,
 		       unsigned int mask, unsigned int val);
 int regmap_write_bits(struct regmap *map, unsigned int reg,
@@ -937,6 +940,14 @@ static inline int regmap_bulk_read(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
+static inline int regmap_update_bits_base(struct regmap *map, unsigned int reg,
+					  unsigned int mask, unsigned int val,
+					  bool *change, bool async, bool force)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_update_bits(struct regmap *map, unsigned int reg,
 				     unsigned int mask, unsigned int val)
 {
-- 
cgit v1.2.3


From ca7a94464b5457a8dc5add19f6fc3bea59d6193f Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 15 Feb 2016 05:22:42 +0000
Subject: regmap: merge regmap_update_bits() into macro

Current regmap has many similar update functions like below,
but the difference is very few.
	regmap_update_bits()
	regmap_update_bits_async()
	regmap_update_bits_check()
	regmap_update_bits_check_async()
Furthermore, we can add *force* write option too in the future.

This patch merges regmap_update_bits() into macro
by using regmap_update_bits_base().

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 23 -----------------------
 include/linux/regmap.h       | 12 +++---------
 2 files changed, 3 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 4e35b2f41304..281898a97e8f 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2687,29 +2687,6 @@ int regmap_update_bits_base(struct regmap *map, unsigned int reg,
 }
 EXPORT_SYMBOL_GPL(regmap_update_bits_base);
 
-/**
- * regmap_update_bits: Perform a read/modify/write cycle on the register map
- *
- * @map: Register map to update
- * @reg: Register to update
- * @mask: Bitmask to change
- * @val: New value for bitmask
- *
- * Returns zero for success, a negative number on error.
- */
-int regmap_update_bits(struct regmap *map, unsigned int reg,
-		       unsigned int mask, unsigned int val)
-{
-	int ret;
-
-	map->lock(map->lock_arg);
-	ret = _regmap_update_bits(map, reg, mask, val, NULL, false);
-	map->unlock(map->lock_arg);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(regmap_update_bits);
-
 /**
  * regmap_write_bits: Perform a read/modify/write cycle on the register map
  *
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 28e50a3d2872..500b36cbc7aa 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -65,6 +65,9 @@ struct reg_sequence {
 	unsigned int delay_us;
 };
 
+#define	regmap_update_bits(map, reg, mask, val) \
+	regmap_update_bits_base(map, reg, mask, val, NULL, false, false)
+
 #ifdef CONFIG_REGMAP
 
 enum regmap_endian {
@@ -694,8 +697,6 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 int regmap_update_bits_base(struct regmap *map, unsigned int reg,
 			    unsigned int mask, unsigned int val,
 			    bool *change, bool async, bool force);
-int regmap_update_bits(struct regmap *map, unsigned int reg,
-		       unsigned int mask, unsigned int val);
 int regmap_write_bits(struct regmap *map, unsigned int reg,
 		       unsigned int mask, unsigned int val);
 int regmap_update_bits_async(struct regmap *map, unsigned int reg,
@@ -948,13 +949,6 @@ static inline int regmap_update_bits_base(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
-static inline int regmap_update_bits(struct regmap *map, unsigned int reg,
-				     unsigned int mask, unsigned int val)
-{
-	WARN_ONCE(1, "regmap API is disabled");
-	return -EINVAL;
-}
-
 static inline int regmap_write_bits(struct regmap *map, unsigned int reg,
 				     unsigned int mask, unsigned int val)
 {
-- 
cgit v1.2.3


From 30ed9cb7a49b499ebc6061e4ff38e88cb4857cad Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 15 Feb 2016 05:23:01 +0000
Subject: regmap: merge regmap_update_bits_async() into macro

Current regmap has many similar update functions like below,
but the difference is very few.
	regmap_update_bits()
	regmap_update_bits_async()
	regmap_update_bits_check()
	regmap_update_bits_check_async()
Furthermore, we can add *force* write option too in the future.

This patch merges regmap_update_bits_async() into macro
by using regmap_update_bits_base().

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 34 ----------------------------------
 include/linux/regmap.h       | 12 ++----------
 2 files changed, 2 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 281898a97e8f..c2255f604c03 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2710,40 +2710,6 @@ int regmap_write_bits(struct regmap *map, unsigned int reg,
 }
 EXPORT_SYMBOL_GPL(regmap_write_bits);
 
-/**
- * regmap_update_bits_async: Perform a read/modify/write cycle on the register
- *                           map asynchronously
- *
- * @map: Register map to update
- * @reg: Register to update
- * @mask: Bitmask to change
- * @val: New value for bitmask
- *
- * With most buses the read must be done synchronously so this is most
- * useful for devices with a cache which do not need to interact with
- * the hardware to determine the current register value.
- *
- * Returns zero for success, a negative number on error.
- */
-int regmap_update_bits_async(struct regmap *map, unsigned int reg,
-			     unsigned int mask, unsigned int val)
-{
-	int ret;
-
-	map->lock(map->lock_arg);
-
-	map->async = true;
-
-	ret = _regmap_update_bits(map, reg, mask, val, NULL, false);
-
-	map->async = false;
-
-	map->unlock(map->lock_arg);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(regmap_update_bits_async);
-
 /**
  * regmap_update_bits_check: Perform a read/modify/write cycle on the
  *                           register map and report if updated
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 500b36cbc7aa..90c8b0e99f9d 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -67,6 +67,8 @@ struct reg_sequence {
 
 #define	regmap_update_bits(map, reg, mask, val) \
 	regmap_update_bits_base(map, reg, mask, val, NULL, false, false)
+#define	regmap_update_bits_async(map, reg, mask, val)\
+	regmap_update_bits_base(map, reg, mask, val, NULL, true, false)
 
 #ifdef CONFIG_REGMAP
 
@@ -699,8 +701,6 @@ int regmap_update_bits_base(struct regmap *map, unsigned int reg,
 			    bool *change, bool async, bool force);
 int regmap_write_bits(struct regmap *map, unsigned int reg,
 		       unsigned int mask, unsigned int val);
-int regmap_update_bits_async(struct regmap *map, unsigned int reg,
-			     unsigned int mask, unsigned int val);
 int regmap_update_bits_check(struct regmap *map, unsigned int reg,
 			     unsigned int mask, unsigned int val,
 			     bool *change);
@@ -956,14 +956,6 @@ static inline int regmap_write_bits(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
-static inline int regmap_update_bits_async(struct regmap *map,
-					   unsigned int reg,
-					   unsigned int mask, unsigned int val)
-{
-	WARN_ONCE(1, "regmap API is disabled");
-	return -EINVAL;
-}
-
 static inline int regmap_update_bits_check(struct regmap *map,
 					   unsigned int reg,
 					   unsigned int mask, unsigned int val,
-- 
cgit v1.2.3


From 98c2dc48694a47109fff430a216fc13a9b45a4a1 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 15 Feb 2016 05:23:17 +0000
Subject: regmap: merge regmap_update_bits_check() into macro

Current regmap has many similar update functions like below,
but the difference is very few.
	regmap_update_bits()
	regmap_update_bits_async()
	regmap_update_bits_check()
	regmap_update_bits_check_async()
Furthermore, we can add *force* write option too in the future.

This patch merges regmap_update_bits_check() into macro
by using regmap_update_bits_base().

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 25 -------------------------
 include/linux/regmap.h       | 14 ++------------
 2 files changed, 2 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index c2255f604c03..ce24e9688b05 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2710,31 +2710,6 @@ int regmap_write_bits(struct regmap *map, unsigned int reg,
 }
 EXPORT_SYMBOL_GPL(regmap_write_bits);
 
-/**
- * regmap_update_bits_check: Perform a read/modify/write cycle on the
- *                           register map and report if updated
- *
- * @map: Register map to update
- * @reg: Register to update
- * @mask: Bitmask to change
- * @val: New value for bitmask
- * @change: Boolean indicating if a write was done
- *
- * Returns zero for success, a negative number on error.
- */
-int regmap_update_bits_check(struct regmap *map, unsigned int reg,
-			     unsigned int mask, unsigned int val,
-			     bool *change)
-{
-	int ret;
-
-	map->lock(map->lock_arg);
-	ret = _regmap_update_bits(map, reg, mask, val, change, false);
-	map->unlock(map->lock_arg);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(regmap_update_bits_check);
-
 /**
  * regmap_update_bits_check_async: Perform a read/modify/write cycle on the
  *                                 register map asynchronously and report if
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 90c8b0e99f9d..dd227dd5e5f8 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -69,6 +69,8 @@ struct reg_sequence {
 	regmap_update_bits_base(map, reg, mask, val, NULL, false, false)
 #define	regmap_update_bits_async(map, reg, mask, val)\
 	regmap_update_bits_base(map, reg, mask, val, NULL, true, false)
+#define	regmap_update_bits_check(map, reg, mask, val, change)\
+	regmap_update_bits_base(map, reg, mask, val, change, false, false)
 
 #ifdef CONFIG_REGMAP
 
@@ -701,9 +703,6 @@ int regmap_update_bits_base(struct regmap *map, unsigned int reg,
 			    bool *change, bool async, bool force);
 int regmap_write_bits(struct regmap *map, unsigned int reg,
 		       unsigned int mask, unsigned int val);
-int regmap_update_bits_check(struct regmap *map, unsigned int reg,
-			     unsigned int mask, unsigned int val,
-			     bool *change);
 int regmap_update_bits_check_async(struct regmap *map, unsigned int reg,
 				   unsigned int mask, unsigned int val,
 				   bool *change);
@@ -956,15 +955,6 @@ static inline int regmap_write_bits(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
-static inline int regmap_update_bits_check(struct regmap *map,
-					   unsigned int reg,
-					   unsigned int mask, unsigned int val,
-					   bool *change)
-{
-	WARN_ONCE(1, "regmap API is disabled");
-	return -EINVAL;
-}
-
 static inline int regmap_update_bits_check_async(struct regmap *map,
 						 unsigned int reg,
 						 unsigned int mask,
-- 
cgit v1.2.3


From 89d8d4b833b0b29e0e95bd0cd51e80f5ee7a6b0a Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 15 Feb 2016 05:23:37 +0000
Subject: regmap: merge regmap_update_bits_check_async() into macro

Current regmap has many similar update functions like below,
but the difference is very few.
	regmap_update_bits()
	regmap_update_bits_async()
	regmap_update_bits_check()
	regmap_update_bits_check_async()
Furthermore, we can add *force* write option too in the future.

This patch merges regmap_update_bits_check_async() into macro
by using regmap_update_bits_base().

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 37 -------------------------------------
 include/linux/regmap.h       | 15 ++-------------
 2 files changed, 2 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index ce24e9688b05..015135a656b7 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2710,43 +2710,6 @@ int regmap_write_bits(struct regmap *map, unsigned int reg,
 }
 EXPORT_SYMBOL_GPL(regmap_write_bits);
 
-/**
- * regmap_update_bits_check_async: Perform a read/modify/write cycle on the
- *                                 register map asynchronously and report if
- *                                 updated
- *
- * @map: Register map to update
- * @reg: Register to update
- * @mask: Bitmask to change
- * @val: New value for bitmask
- * @change: Boolean indicating if a write was done
- *
- * With most buses the read must be done synchronously so this is most
- * useful for devices with a cache which do not need to interact with
- * the hardware to determine the current register value.
- *
- * Returns zero for success, a negative number on error.
- */
-int regmap_update_bits_check_async(struct regmap *map, unsigned int reg,
-				   unsigned int mask, unsigned int val,
-				   bool *change)
-{
-	int ret;
-
-	map->lock(map->lock_arg);
-
-	map->async = true;
-
-	ret = _regmap_update_bits(map, reg, mask, val, change, false);
-
-	map->async = false;
-
-	map->unlock(map->lock_arg);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(regmap_update_bits_check_async);
-
 void regmap_async_complete_cb(struct regmap_async *async, int ret)
 {
 	struct regmap *map = async->map;
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index dd227dd5e5f8..4d8e1edb4407 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -71,6 +71,8 @@ struct reg_sequence {
 	regmap_update_bits_base(map, reg, mask, val, NULL, true, false)
 #define	regmap_update_bits_check(map, reg, mask, val, change)\
 	regmap_update_bits_base(map, reg, mask, val, change, false, false)
+#define	regmap_update_bits_check_async(map, reg, mask, val, change)\
+	regmap_update_bits_base(map, reg, mask, val, change, true, false)
 
 #ifdef CONFIG_REGMAP
 
@@ -703,9 +705,6 @@ int regmap_update_bits_base(struct regmap *map, unsigned int reg,
 			    bool *change, bool async, bool force);
 int regmap_write_bits(struct regmap *map, unsigned int reg,
 		       unsigned int mask, unsigned int val);
-int regmap_update_bits_check_async(struct regmap *map, unsigned int reg,
-				   unsigned int mask, unsigned int val,
-				   bool *change);
 int regmap_get_val_bytes(struct regmap *map);
 int regmap_get_max_register(struct regmap *map);
 int regmap_get_reg_stride(struct regmap *map);
@@ -955,16 +954,6 @@ static inline int regmap_write_bits(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
-static inline int regmap_update_bits_check_async(struct regmap *map,
-						 unsigned int reg,
-						 unsigned int mask,
-						 unsigned int val,
-						 bool *change)
-{
-	WARN_ONCE(1, "regmap API is disabled");
-	return -EINVAL;
-}
-
 static inline int regmap_get_val_bytes(struct regmap *map)
 {
 	WARN_ONCE(1, "regmap API is disabled");
-- 
cgit v1.2.3


From 28972eaa34f384eef5e33f36e00d8fa21ca44375 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 15 Feb 2016 05:23:55 +0000
Subject: regmap: add regmap_field_update_bits_base()

This patch adds new regmap_field_update_bits_base() which is using
regmap_update_bits_base().
Current regmap_field_xxx() can be merged into it by macro.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 27 +++++++++++++++++++++++++++
 include/linux/regmap.h       | 12 +++++++++++-
 2 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 015135a656b7..e534105f47f6 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1689,6 +1689,33 @@ int regmap_raw_write(struct regmap *map, unsigned int reg,
 }
 EXPORT_SYMBOL_GPL(regmap_raw_write);
 
+/**
+ * regmap_field_update_bits_base():
+ *	Perform a read/modify/write cycle on the register field
+ *	with change, async, force option
+ *
+ * @field: Register field to write to
+ * @mask: Bitmask to change
+ * @val: Value to be written
+ * @change: Boolean indicating if a write was done
+ * @async: Boolean indicating asynchronously
+ * @force: Boolean indicating use force update
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_field_update_bits_base(struct regmap_field *field,
+				  unsigned int mask, unsigned int val,
+				  bool *change, bool async, bool force)
+{
+	mask = (mask << field->shift) & field->mask;
+
+	return regmap_update_bits_base(field->regmap, field->reg,
+				       mask, val << field->shift,
+				       change, async, force);
+}
+EXPORT_SYMBOL_GPL(regmap_field_update_bits_base);
+
 /**
  * regmap_field_write(): Write a value to a single register field
  *
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 4d8e1edb4407..23bf7657e485 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -773,9 +773,11 @@ void devm_regmap_field_free(struct device *dev,	struct regmap_field *field);
 
 int regmap_field_read(struct regmap_field *field, unsigned int *val);
 int regmap_field_write(struct regmap_field *field, unsigned int val);
+int regmap_field_update_bits_base(struct regmap_field *field,
+				  unsigned int mask, unsigned int val,
+				  bool *change, bool async, bool force);
 int regmap_field_update_bits(struct regmap_field *field,
 			     unsigned int mask, unsigned int val);
-
 int regmap_fields_write(struct regmap_field *field, unsigned int id,
 			unsigned int val);
 int regmap_fields_force_write(struct regmap_field *field, unsigned int id,
@@ -954,6 +956,14 @@ static inline int regmap_write_bits(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
+static inline int regmap_field_update_bits_base(struct regmap_field *field,
+					unsigned int mask, unsigned int val,
+					bool *change, bool async, bool force)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_get_val_bytes(struct regmap *map)
 {
 	WARN_ONCE(1, "regmap API is disabled");
-- 
cgit v1.2.3


From 3674124b35894631f8f4d33ab041e713078bfd4b Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 15 Feb 2016 05:24:15 +0000
Subject: regmap: merge regmap_field_write() into macro

This patch merges regmap_field_write() into macro
by using regmap_field_update_bits_base().

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 16 ----------------
 include/linux/regmap.h       |  4 +++-
 2 files changed, 3 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index e534105f47f6..228dce237658 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1716,22 +1716,6 @@ int regmap_field_update_bits_base(struct regmap_field *field,
 }
 EXPORT_SYMBOL_GPL(regmap_field_update_bits_base);
 
-/**
- * regmap_field_write(): Write a value to a single register field
- *
- * @field: Register field to write to
- * @val: Value to be written
- *
- * A value of zero will be returned on success, a negative errno will
- * be returned in error cases.
- */
-int regmap_field_write(struct regmap_field *field, unsigned int val)
-{
-	return regmap_update_bits(field->regmap, field->reg,
-				field->mask, val << field->shift);
-}
-EXPORT_SYMBOL_GPL(regmap_field_write);
-
 /**
  * regmap_field_update_bits():	Perform a read/modify/write cycle
  *                              on the register field
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 23bf7657e485..13e9ebdea1c7 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -74,6 +74,9 @@ struct reg_sequence {
 #define	regmap_update_bits_check_async(map, reg, mask, val, change)\
 	regmap_update_bits_base(map, reg, mask, val, change, true, false)
 
+#define	regmap_field_write(field, val) \
+	regmap_field_update_bits_base(field, ~0, val, NULL, false, false)
+
 #ifdef CONFIG_REGMAP
 
 enum regmap_endian {
@@ -772,7 +775,6 @@ struct regmap_field *devm_regmap_field_alloc(struct device *dev,
 void devm_regmap_field_free(struct device *dev,	struct regmap_field *field);
 
 int regmap_field_read(struct regmap_field *field, unsigned int *val);
-int regmap_field_write(struct regmap_field *field, unsigned int val);
 int regmap_field_update_bits_base(struct regmap_field *field,
 				  unsigned int mask, unsigned int val,
 				  bool *change, bool async, bool force);
-- 
cgit v1.2.3


From 721ed64dda3774c619874866ca4f9a38ae6750af Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 15 Feb 2016 05:24:33 +0000
Subject: regmap: merge regmap_field_update_bits() into macro

This patch merges regmap_field_update_bits() into macro
by using regmap_field_update_bits_base().

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 20 --------------------
 include/linux/regmap.h       |  4 ++--
 2 files changed, 2 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 228dce237658..606c9b53526b 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1716,26 +1716,6 @@ int regmap_field_update_bits_base(struct regmap_field *field,
 }
 EXPORT_SYMBOL_GPL(regmap_field_update_bits_base);
 
-/**
- * regmap_field_update_bits():	Perform a read/modify/write cycle
- *                              on the register field
- *
- * @field: Register field to write to
- * @mask: Bitmask to change
- * @val: Value to be written
- *
- * A value of zero will be returned on success, a negative errno will
- * be returned in error cases.
- */
-int regmap_field_update_bits(struct regmap_field *field, unsigned int mask, unsigned int val)
-{
-	mask = (mask << field->shift) & field->mask;
-
-	return regmap_update_bits(field->regmap, field->reg,
-				  mask, val << field->shift);
-}
-EXPORT_SYMBOL_GPL(regmap_field_update_bits);
-
 /**
  * regmap_fields_write(): Write a value to a single register field with port ID
  *
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 13e9ebdea1c7..e525beeaa2c6 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -76,6 +76,8 @@ struct reg_sequence {
 
 #define	regmap_field_write(field, val) \
 	regmap_field_update_bits_base(field, ~0, val, NULL, false, false)
+#define	regmap_field_update_bits(field, mask, val)\
+	regmap_field_update_bits_base(field, mask, val, NULL, false, false)
 
 #ifdef CONFIG_REGMAP
 
@@ -778,8 +780,6 @@ int regmap_field_read(struct regmap_field *field, unsigned int *val);
 int regmap_field_update_bits_base(struct regmap_field *field,
 				  unsigned int mask, unsigned int val,
 				  bool *change, bool async, bool force);
-int regmap_field_update_bits(struct regmap_field *field,
-			     unsigned int mask, unsigned int val);
 int regmap_fields_write(struct regmap_field *field, unsigned int id,
 			unsigned int val);
 int regmap_fields_force_write(struct regmap_field *field, unsigned int id,
-- 
cgit v1.2.3


From e126edec184ea3049cc1f8b652c6eeb06aa65fda Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 15 Feb 2016 05:24:51 +0000
Subject: regmap: add regmap_fields_update_bits_base()

This patch adds new regmap_fields_update_bits_base() which is using
regmap_update_bits_base().
Current regmap_fields_xxx() can be merged into it by macro.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 32 ++++++++++++++++++++++++++++++++
 include/linux/regmap.h       | 12 ++++++++++++
 2 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 606c9b53526b..0c7773fadd48 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1716,6 +1716,38 @@ int regmap_field_update_bits_base(struct regmap_field *field,
 }
 EXPORT_SYMBOL_GPL(regmap_field_update_bits_base);
 
+/**
+ * regmap_fields_update_bits_base():
+ *	Perform a read/modify/write cycle on the register field
+ *	with change, async, force option
+ *
+ * @field: Register field to write to
+ * @id: port ID
+ * @mask: Bitmask to change
+ * @val: Value to be written
+ * @change: Boolean indicating if a write was done
+ * @async: Boolean indicating asynchronously
+ * @force: Boolean indicating use force update
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_fields_update_bits_base(struct regmap_field *field,  unsigned int id,
+				   unsigned int mask, unsigned int val,
+				   bool *change, bool async, bool force)
+{
+	if (id >= field->id_size)
+		return -EINVAL;
+
+	mask = (mask << field->shift) & field->mask;
+
+	return regmap_update_bits_base(field->regmap,
+				       field->reg + (field->id_offset * id),
+				       mask, val << field->shift,
+				       change, async, force);
+}
+EXPORT_SYMBOL_GPL(regmap_fields_update_bits_base);
+
 /**
  * regmap_fields_write(): Write a value to a single register field with port ID
  *
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index e525beeaa2c6..2735a3df7eab 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -788,6 +788,9 @@ int regmap_fields_read(struct regmap_field *field, unsigned int id,
 		       unsigned int *val);
 int regmap_fields_update_bits(struct regmap_field *field,  unsigned int id,
 			      unsigned int mask, unsigned int val);
+int regmap_fields_update_bits_base(struct regmap_field *field,  unsigned int id,
+				   unsigned int mask, unsigned int val,
+				   bool *change, bool async, bool force);
 
 /**
  * Description of an IRQ for the generic regmap irq_chip.
@@ -966,6 +969,15 @@ static inline int regmap_field_update_bits_base(struct regmap_field *field,
 	return -EINVAL;
 }
 
+static inline int regmap_fields_update_bits_base(struct regmap_field *field,
+				   unsigned int id,
+				   unsigned int mask, unsigned int val,
+				   bool *change, bool async, bool force)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_get_val_bytes(struct regmap *map)
 {
 	WARN_ONCE(1, "regmap API is disabled");
-- 
cgit v1.2.3


From bbf2c46f46e23a496337e143cd012c013c6c7910 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 15 Feb 2016 05:25:15 +0000
Subject: regmap: merge regmap_fields_write() into macro

This patch merges regmap_fields_write() into macro
by using regmap_fields_update_bits_base().

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 22 ----------------------
 include/linux/regmap.h       |  5 +++--
 2 files changed, 3 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 0c7773fadd48..4b14745249ba 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1748,28 +1748,6 @@ int regmap_fields_update_bits_base(struct regmap_field *field,  unsigned int id,
 }
 EXPORT_SYMBOL_GPL(regmap_fields_update_bits_base);
 
-/**
- * regmap_fields_write(): Write a value to a single register field with port ID
- *
- * @field: Register field to write to
- * @id: port ID
- * @val: Value to be written
- *
- * A value of zero will be returned on success, a negative errno will
- * be returned in error cases.
- */
-int regmap_fields_write(struct regmap_field *field, unsigned int id,
-			unsigned int val)
-{
-	if (id >= field->id_size)
-		return -EINVAL;
-
-	return regmap_update_bits(field->regmap,
-				  field->reg + (field->id_offset * id),
-				  field->mask, val << field->shift);
-}
-EXPORT_SYMBOL_GPL(regmap_fields_write);
-
 int regmap_fields_force_write(struct regmap_field *field, unsigned int id,
 			unsigned int val)
 {
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 2735a3df7eab..5f438a4df5e6 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -79,6 +79,9 @@ struct reg_sequence {
 #define	regmap_field_update_bits(field, mask, val)\
 	regmap_field_update_bits_base(field, mask, val, NULL, false, false)
 
+#define	regmap_fields_write(field, id, val) \
+	regmap_fields_update_bits_base(field, id, ~0, val, NULL, false, false)
+
 #ifdef CONFIG_REGMAP
 
 enum regmap_endian {
@@ -780,8 +783,6 @@ int regmap_field_read(struct regmap_field *field, unsigned int *val);
 int regmap_field_update_bits_base(struct regmap_field *field,
 				  unsigned int mask, unsigned int val,
 				  bool *change, bool async, bool force);
-int regmap_fields_write(struct regmap_field *field, unsigned int id,
-			unsigned int val);
 int regmap_fields_force_write(struct regmap_field *field, unsigned int id,
 			unsigned int val);
 int regmap_fields_read(struct regmap_field *field, unsigned int id,
-- 
cgit v1.2.3


From 48138609135fc9c363f034596e14bff5dbf9f33f Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 15 Feb 2016 05:25:32 +0000
Subject: regmap: merge regmap_fields_update_bits() into macro

This patch merges regmap_fields_update_bits() into macro
by using regmap_field_update_bits_base().

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 26 --------------------------
 include/linux/regmap.h       |  4 ++--
 2 files changed, 2 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 4b14745249ba..79d7f51019d7 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1760,32 +1760,6 @@ int regmap_fields_force_write(struct regmap_field *field, unsigned int id,
 }
 EXPORT_SYMBOL_GPL(regmap_fields_force_write);
 
-/**
- * regmap_fields_update_bits():	Perform a read/modify/write cycle
- *                              on the register field
- *
- * @field: Register field to write to
- * @id: port ID
- * @mask: Bitmask to change
- * @val: Value to be written
- *
- * A value of zero will be returned on success, a negative errno will
- * be returned in error cases.
- */
-int regmap_fields_update_bits(struct regmap_field *field,  unsigned int id,
-			      unsigned int mask, unsigned int val)
-{
-	if (id >= field->id_size)
-		return -EINVAL;
-
-	mask = (mask << field->shift) & field->mask;
-
-	return regmap_update_bits(field->regmap,
-				  field->reg + (field->id_offset * id),
-				  mask, val << field->shift);
-}
-EXPORT_SYMBOL_GPL(regmap_fields_update_bits);
-
 /*
  * regmap_bulk_write(): Write multiple registers to the device
  *
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 5f438a4df5e6..7d3d498fd3e8 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -81,6 +81,8 @@ struct reg_sequence {
 
 #define	regmap_fields_write(field, id, val) \
 	regmap_fields_update_bits_base(field, id, ~0, val, NULL, false, false)
+#define	regmap_fields_update_bits(field, id, mask, val)\
+	regmap_fields_update_bits_base(field, id, mask, val, NULL, false, false)
 
 #ifdef CONFIG_REGMAP
 
@@ -787,8 +789,6 @@ int regmap_fields_force_write(struct regmap_field *field, unsigned int id,
 			unsigned int val);
 int regmap_fields_read(struct regmap_field *field, unsigned int id,
 		       unsigned int *val);
-int regmap_fields_update_bits(struct regmap_field *field,  unsigned int id,
-			      unsigned int mask, unsigned int val);
 int regmap_fields_update_bits_base(struct regmap_field *field,  unsigned int id,
 				   unsigned int mask, unsigned int val,
 				   bool *change, bool async, bool force);
-- 
cgit v1.2.3


From e52bc7c28ac9f54db6f86b19ed65c599def18c98 Mon Sep 17 00:00:00 2001
From: David Decotigny <decot@googlers.com>
Date: Fri, 19 Feb 2016 09:23:59 -0500
Subject: lib/bitmap.c: conversion routines to/from u32 array

Aimed at transferring bitmaps to/from user-space in a 32/64-bit agnostic
way.

Tested:
  unit tests (next patch) on qemu i386, x86_64, ppc, ppc64 BE and LE,
  ARM.

Signed-off-by: David Decotigny <decot@googlers.com>
Reviewed-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bitmap.h | 10 ++++++
 lib/bitmap.c           | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 9653fdb76a42..e9b0b9ab07e5 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -59,6 +59,8 @@
  * bitmap_find_free_region(bitmap, bits, order)	Find and allocate bit region
  * bitmap_release_region(bitmap, pos, order)	Free specified bit region
  * bitmap_allocate_region(bitmap, pos, order)	Allocate specified bit region
+ * bitmap_from_u32array(dst, nbits, buf, nwords) *dst = *buf (nwords 32b words)
+ * bitmap_to_u32array(buf, nwords, src, nbits)	*buf = *dst (nwords 32b words)
  */
 
 /*
@@ -163,6 +165,14 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
 extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
 extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
 extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
+extern unsigned int bitmap_from_u32array(unsigned long *bitmap,
+					 unsigned int nbits,
+					 const u32 *buf,
+					 unsigned int nwords);
+extern unsigned int bitmap_to_u32array(u32 *buf,
+				       unsigned int nwords,
+				       const unsigned long *bitmap,
+				       unsigned int nbits);
 #ifdef __BIG_ENDIAN
 extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits);
 #else
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 814814397cce..c66da508cbf7 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -12,6 +12,8 @@
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
 #include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -1059,6 +1061,93 @@ int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
 }
 EXPORT_SYMBOL(bitmap_allocate_region);
 
+/**
+ * bitmap_from_u32array - copy the contents of a u32 array of bits to bitmap
+ *	@bitmap: array of unsigned longs, the destination bitmap, non NULL
+ *	@nbits: number of bits in @bitmap
+ *	@buf: array of u32 (in host byte order), the source bitmap, non NULL
+ *	@nwords: number of u32 words in @buf
+ *
+ * copy min(nbits, 32*nwords) bits from @buf to @bitmap, remaining
+ * bits between nword and nbits in @bitmap (if any) are cleared. In
+ * last word of @bitmap, the bits beyond nbits (if any) are kept
+ * unchanged.
+ *
+ * Return the number of bits effectively copied.
+ */
+unsigned int
+bitmap_from_u32array(unsigned long *bitmap, unsigned int nbits,
+		     const u32 *buf, unsigned int nwords)
+{
+	unsigned int dst_idx, src_idx;
+
+	for (src_idx = dst_idx = 0; dst_idx < BITS_TO_LONGS(nbits); ++dst_idx) {
+		unsigned long part = 0;
+
+		if (src_idx < nwords)
+			part = buf[src_idx++];
+
+#if BITS_PER_LONG == 64
+		if (src_idx < nwords)
+			part |= ((unsigned long) buf[src_idx++]) << 32;
+#endif
+
+		if (dst_idx < nbits/BITS_PER_LONG)
+			bitmap[dst_idx] = part;
+		else {
+			unsigned long mask = BITMAP_LAST_WORD_MASK(nbits);
+
+			bitmap[dst_idx] = (bitmap[dst_idx] & ~mask)
+				| (part & mask);
+		}
+	}
+
+	return min_t(unsigned int, nbits, 32*nwords);
+}
+EXPORT_SYMBOL(bitmap_from_u32array);
+
+/**
+ * bitmap_to_u32array - copy the contents of bitmap to a u32 array of bits
+ *	@buf: array of u32 (in host byte order), the dest bitmap, non NULL
+ *	@nwords: number of u32 words in @buf
+ *	@bitmap: array of unsigned longs, the source bitmap, non NULL
+ *	@nbits: number of bits in @bitmap
+ *
+ * copy min(nbits, 32*nwords) bits from @bitmap to @buf. Remaining
+ * bits after nbits in @buf (if any) are cleared.
+ *
+ * Return the number of bits effectively copied.
+ */
+unsigned int
+bitmap_to_u32array(u32 *buf, unsigned int nwords,
+		   const unsigned long *bitmap, unsigned int nbits)
+{
+	unsigned int dst_idx = 0, src_idx = 0;
+
+	while (dst_idx < nwords) {
+		unsigned long part = 0;
+
+		if (src_idx < BITS_TO_LONGS(nbits)) {
+			part = bitmap[src_idx];
+			if (src_idx >= nbits/BITS_PER_LONG)
+				part &= BITMAP_LAST_WORD_MASK(nbits);
+			src_idx++;
+		}
+
+		buf[dst_idx++] = part & 0xffffffffUL;
+
+#if BITS_PER_LONG == 64
+		if (dst_idx < nwords) {
+			part >>= 32;
+			buf[dst_idx++] = part & 0xffffffffUL;
+		}
+#endif
+	}
+
+	return min_t(unsigned int, nbits, 32*nwords);
+}
+EXPORT_SYMBOL(bitmap_to_u32array);
+
 /**
  * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order.
  * @dst:   destination buffer
-- 
cgit v1.2.3


From 421797b1aa363cb897f29f7d365e068dc9d9db81 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Fri, 19 Feb 2016 09:24:02 -0500
Subject: net/ethtool: support get coalesce per queue

This patch implements sub command ETHTOOL_GCOALESCE for ioctl
ETHTOOL_PERQUEUE. It introduces an interface get_per_queue_coalesce to
get coalesce of each masked queue from device driver. Then the interrupt
coalescing parameters will be copied back to user space one by one.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Reviewed-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  8 +++++++-
 net/core/ethtool.c      | 35 ++++++++++++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 653dc9c4ebac..de56600023a7 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -201,6 +201,11 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  * @get_module_eeprom: Get the eeprom information from the plug-in module
  * @get_eee: Get Energy-Efficient (EEE) supported and status.
  * @set_eee: Set EEE status (enable/disable) as well as LPI timers.
+ * @get_per_queue_coalesce: Get interrupt coalescing parameters per queue.
+ *	It must check that the given queue number is valid. If neither a RX nor
+ *	a TX queue has this number, return -EINVAL. If only a RX queue or a TX
+ *	queue has this number, set the inapplicable fields to ~0 and return 0.
+ *	Returns a negative error code or zero.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -279,7 +284,8 @@ struct ethtool_ops {
 			       const struct ethtool_tunable *, void *);
 	int	(*set_tunable)(struct net_device *,
 			       const struct ethtool_tunable *, const void *);
-
+	int	(*get_per_queue_coalesce)(struct net_device *, u32,
+					  struct ethtool_coalesce *);
 
 };
 #endif /* _LINUX_ETHTOOL_H */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d640ecf71e74..2a6c3a26f63f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1888,6 +1888,38 @@ out:
 	return ret;
 }
 
+static int ethtool_get_per_queue_coalesce(struct net_device *dev,
+					  void __user *useraddr,
+					  struct ethtool_per_queue_op *per_queue_opt)
+{
+	u32 bit;
+	int ret;
+	DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE);
+
+	if (!dev->ethtool_ops->get_per_queue_coalesce)
+		return -EOPNOTSUPP;
+
+	useraddr += sizeof(*per_queue_opt);
+
+	bitmap_from_u32array(queue_mask,
+			     MAX_NUM_QUEUE,
+			     per_queue_opt->queue_mask,
+			     DIV_ROUND_UP(MAX_NUM_QUEUE, 32));
+
+	for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) {
+		struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
+
+		ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, &coalesce);
+		if (ret != 0)
+			return ret;
+		if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
+			return -EFAULT;
+		useraddr += sizeof(coalesce);
+	}
+
+	return 0;
+}
+
 static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_per_queue_op per_queue_opt;
@@ -1896,7 +1928,8 @@ static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
 		return -EFAULT;
 
 	switch (per_queue_opt.sub_command) {
-
+	case ETHTOOL_GCOALESCE:
+		return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt);
 	default:
 		return -EOPNOTSUPP;
 	};
-- 
cgit v1.2.3


From f38d138a7da6510a1184e3bc5f425deb187c3265 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Fri, 19 Feb 2016 09:24:03 -0500
Subject: net/ethtool: support set coalesce per queue

This patch implements sub command ETHTOOL_SCOALESCE for ioctl
ETHTOOL_PERQUEUE. It introduces an interface set_per_queue_coalesce to
set coalesce of each masked queue to device driver. The wanted coalesce
information are stored in "data" for each masked queue, which can copy
from userspace.
If it fails to set coalesce to device driver, the value which already
set to specific queue will be tried to rollback.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Reviewed-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  7 ++++++
 net/core/ethtool.c      | 61 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index de56600023a7..472d7d7b01c2 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -206,6 +206,11 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  *	a TX queue has this number, return -EINVAL. If only a RX queue or a TX
  *	queue has this number, set the inapplicable fields to ~0 and return 0.
  *	Returns a negative error code or zero.
+ * @set_per_queue_coalesce: Set interrupt coalescing parameters per queue.
+ *	It must check that the given queue number is valid. If neither a RX nor
+ *	a TX queue has this number, return -EINVAL. If only a RX queue or a TX
+ *	queue has this number, ignore the inapplicable fields.
+ *	Returns a negative error code or zero.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -286,6 +291,8 @@ struct ethtool_ops {
 			       const struct ethtool_tunable *, const void *);
 	int	(*get_per_queue_coalesce)(struct net_device *, u32,
 					  struct ethtool_coalesce *);
+	int	(*set_per_queue_coalesce)(struct net_device *, u32,
+					  struct ethtool_coalesce *);
 
 };
 #endif /* _LINUX_ETHTOOL_H */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 2a6c3a26f63f..2406101002b1 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1920,6 +1920,65 @@ static int ethtool_get_per_queue_coalesce(struct net_device *dev,
 	return 0;
 }
 
+static int ethtool_set_per_queue_coalesce(struct net_device *dev,
+					  void __user *useraddr,
+					  struct ethtool_per_queue_op *per_queue_opt)
+{
+	u32 bit;
+	int i, ret = 0;
+	int n_queue;
+	struct ethtool_coalesce *backup = NULL, *tmp = NULL;
+	DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE);
+
+	if ((!dev->ethtool_ops->set_per_queue_coalesce) ||
+	    (!dev->ethtool_ops->get_per_queue_coalesce))
+		return -EOPNOTSUPP;
+
+	useraddr += sizeof(*per_queue_opt);
+
+	bitmap_from_u32array(queue_mask,
+			     MAX_NUM_QUEUE,
+			     per_queue_opt->queue_mask,
+			     DIV_ROUND_UP(MAX_NUM_QUEUE, 32));
+	n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE);
+	tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL);
+	if (!backup)
+		return -ENOMEM;
+
+	for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) {
+		struct ethtool_coalesce coalesce;
+
+		ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, tmp);
+		if (ret != 0)
+			goto roll_back;
+
+		tmp++;
+
+		if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) {
+			ret = -EFAULT;
+			goto roll_back;
+		}
+
+		ret = dev->ethtool_ops->set_per_queue_coalesce(dev, bit, &coalesce);
+		if (ret != 0)
+			goto roll_back;
+
+		useraddr += sizeof(coalesce);
+	}
+
+roll_back:
+	if (ret != 0) {
+		tmp = backup;
+		for_each_set_bit(i, queue_mask, bit) {
+			dev->ethtool_ops->set_per_queue_coalesce(dev, i, tmp);
+			tmp++;
+		}
+	}
+	kfree(backup);
+
+	return ret;
+}
+
 static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_per_queue_op per_queue_opt;
@@ -1930,6 +1989,8 @@ static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
 	switch (per_queue_opt.sub_command) {
 	case ETHTOOL_GCOALESCE:
 		return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt);
+	case ETHTOOL_SCOALESCE:
+		return ethtool_set_per_queue_coalesce(dev, useraddr, &per_queue_opt);
 	default:
 		return -EOPNOTSUPP;
 	};
-- 
cgit v1.2.3


From 568b329a02f75ed3aaae5eb2cca384cb9e09cb29 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 17 Feb 2016 19:58:57 -0800
Subject: perf: generalize perf_callchain

. avoid walking the stack when there is no room left in the buffer
. generalize get_perf_callchain() to be called from bpf helper

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/include/asm/stacktrace.h |  2 +-
 arch/x86/kernel/cpu/perf_event.c  |  4 ++--
 arch/x86/kernel/dumpstack.c       |  6 ++++--
 arch/x86/kernel/stacktrace.c      | 18 +++++++++++-------
 arch/x86/oprofile/backtrace.c     |  3 ++-
 include/linux/perf_event.h        | 13 +++++++++++--
 kernel/events/callchain.c         | 32 ++++++++++++++++++++------------
 kernel/events/internal.h          |  2 --
 8 files changed, 51 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 70bbe39043a9..7c247e7404be 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -37,7 +37,7 @@ print_context_stack_bp(struct thread_info *tinfo,
 /* Generic stack tracer with callbacks */
 
 struct stacktrace_ops {
-	void (*address)(void *data, unsigned long address, int reliable);
+	int (*address)(void *data, unsigned long address, int reliable);
 	/* On negative return stop dumping */
 	int (*stack)(void *data, char *name);
 	walk_stack_t	walk_stack;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1b443db2db50..d276b31ca473 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2180,11 +2180,11 @@ static int backtrace_stack(void *data, char *name)
 	return 0;
 }
 
-static void backtrace_address(void *data, unsigned long addr, int reliable)
+static int backtrace_address(void *data, unsigned long addr, int reliable)
 {
 	struct perf_callchain_entry *entry = data;
 
-	perf_callchain_store(entry, addr);
+	return perf_callchain_store(entry, addr);
 }
 
 static const struct stacktrace_ops backtrace_ops = {
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 9c30acfadae2..0d1ff4b407d4 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -135,7 +135,8 @@ print_context_stack_bp(struct thread_info *tinfo,
 		if (!__kernel_text_address(addr))
 			break;
 
-		ops->address(data, addr, 1);
+		if (ops->address(data, addr, 1))
+			break;
 		frame = frame->next_frame;
 		ret_addr = &frame->return_address;
 		print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
@@ -154,10 +155,11 @@ static int print_trace_stack(void *data, char *name)
 /*
  * Print one address/symbol entries per line.
  */
-static void print_trace_address(void *data, unsigned long addr, int reliable)
+static int print_trace_address(void *data, unsigned long addr, int reliable)
 {
 	touch_nmi_watchdog();
 	printk_stack_address(addr, reliable, data);
+	return 0;
 }
 
 static const struct stacktrace_ops print_trace_ops = {
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index fdd0c6430e5a..9ee98eefc44d 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -14,30 +14,34 @@ static int save_stack_stack(void *data, char *name)
 	return 0;
 }
 
-static void
+static int
 __save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
 {
 	struct stack_trace *trace = data;
 #ifdef CONFIG_FRAME_POINTER
 	if (!reliable)
-		return;
+		return 0;
 #endif
 	if (nosched && in_sched_functions(addr))
-		return;
+		return 0;
 	if (trace->skip > 0) {
 		trace->skip--;
-		return;
+		return 0;
 	}
-	if (trace->nr_entries < trace->max_entries)
+	if (trace->nr_entries < trace->max_entries) {
 		trace->entries[trace->nr_entries++] = addr;
+		return 0;
+	} else {
+		return -1; /* no more room, stop walking the stack */
+	}
 }
 
-static void save_stack_address(void *data, unsigned long addr, int reliable)
+static int save_stack_address(void *data, unsigned long addr, int reliable)
 {
 	return __save_stack_address(data, addr, reliable, false);
 }
 
-static void
+static int
 save_stack_address_nosched(void *data, unsigned long addr, int reliable)
 {
 	return __save_stack_address(data, addr, reliable, true);
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 4e664bdb535a..cb31a4440e58 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -23,12 +23,13 @@ static int backtrace_stack(void *data, char *name)
 	return 0;
 }
 
-static void backtrace_address(void *data, unsigned long addr, int reliable)
+static int backtrace_address(void *data, unsigned long addr, int reliable)
 {
 	unsigned int *depth = data;
 
 	if ((*depth)--)
 		oprofile_add_trace(addr);
+	return 0;
 }
 
 static struct stacktrace_ops backtrace_ops = {
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b35a61a481fa..7da3c25999df 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -964,11 +964,20 @@ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
 
 extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs);
 extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs);
+extern struct perf_callchain_entry *
+get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+		   bool crosstask, bool add_mark);
+extern int get_callchain_buffers(void);
+extern void put_callchain_buffers(void);
 
-static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
+static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
 {
-	if (entry->nr < PERF_MAX_STACK_DEPTH)
+	if (entry->nr < PERF_MAX_STACK_DEPTH) {
 		entry->ip[entry->nr++] = ip;
+		return 0;
+	} else {
+		return -1; /* no more room, stop walking the stack */
+	}
 }
 
 extern int sysctl_perf_event_paranoid;
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 9c418002b8c1..343c22f5e867 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -159,15 +159,24 @@ put_callchain_entry(int rctx)
 struct perf_callchain_entry *
 perf_callchain(struct perf_event *event, struct pt_regs *regs)
 {
-	int rctx;
-	struct perf_callchain_entry *entry;
-
-	int kernel = !event->attr.exclude_callchain_kernel;
-	int user   = !event->attr.exclude_callchain_user;
+	bool kernel = !event->attr.exclude_callchain_kernel;
+	bool user   = !event->attr.exclude_callchain_user;
+	/* Disallow cross-task user callchains. */
+	bool crosstask = event->ctx->task && event->ctx->task != current;
 
 	if (!kernel && !user)
 		return NULL;
 
+	return get_perf_callchain(regs, 0, kernel, user, crosstask, true);
+}
+
+struct perf_callchain_entry *
+get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
+		   bool crosstask, bool add_mark)
+{
+	struct perf_callchain_entry *entry;
+	int rctx;
+
 	entry = get_callchain_entry(&rctx);
 	if (rctx == -1)
 		return NULL;
@@ -175,10 +184,11 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
 	if (!entry)
 		goto exit_put;
 
-	entry->nr = 0;
+	entry->nr = init_nr;
 
 	if (kernel && !user_mode(regs)) {
-		perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+		if (add_mark)
+			perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
 		perf_callchain_kernel(entry, regs);
 	}
 
@@ -191,13 +201,11 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
 		}
 
 		if (regs) {
-			/*
-			 * Disallow cross-task user callchains.
-			 */
-			if (event->ctx->task && event->ctx->task != current)
+			if (crosstask)
 				goto exit_put;
 
-			perf_callchain_store(entry, PERF_CONTEXT_USER);
+			if (add_mark)
+				perf_callchain_store(entry, PERF_CONTEXT_USER);
 			perf_callchain_user(entry, regs);
 		}
 	}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 2bbad9c1274c..4199b6d193f5 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -182,8 +182,6 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
 /* Callchain handling */
 extern struct perf_callchain_entry *
 perf_callchain(struct perf_event *event, struct pt_regs *regs);
-extern int get_callchain_buffers(void);
-extern void put_callchain_buffers(void);
 
 static inline int get_recursion_context(int *recursion)
 {
-- 
cgit v1.2.3


From d5a3b1f691865be576c2bffa708549b8cdccda19 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 17 Feb 2016 19:58:58 -0800
Subject: bpf: introduce BPF_MAP_TYPE_STACK_TRACE

add new map type to store stack traces and corresponding helper
bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id
@ctx: struct pt_regs*
@map: pointer to stack_trace map
@flags: bits 0-7 - numer of stack frames to skip
        bit 8 - collect user stack instead of kernel
        bit 9 - compare stacks by hash only
        bit 10 - if two different stacks hash into the same stackid
                 discard old
        other bits - reserved
Return: >= 0 stackid on success or negative error

stackid is a 32-bit integer handle that can be further combined with
other data (including other stackid) and used as a key into maps.

Userspace will access stackmap using standard lookup/delete syscall commands to
retrieve full stack trace for given stackid.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |   1 +
 include/uapi/linux/bpf.h |  21 +++++
 kernel/bpf/Makefile      |   3 +
 kernel/bpf/stackmap.c    | 237 +++++++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c    |   6 +-
 kernel/trace/bpf_trace.c |   2 +
 6 files changed, 269 insertions(+), 1 deletion(-)
 create mode 100644 kernel/bpf/stackmap.c

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 90ee6ab24bc5..0cadbb7456c0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -237,6 +237,7 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
 extern const struct bpf_func_proto bpf_get_current_comm_proto;
 extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
 extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
+extern const struct bpf_func_proto bpf_get_stackid_proto;
 
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2ee0fde1bf96..d3e77da8e9e8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -83,6 +83,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_PERF_EVENT_ARRAY,
 	BPF_MAP_TYPE_PERCPU_HASH,
 	BPF_MAP_TYPE_PERCPU_ARRAY,
+	BPF_MAP_TYPE_STACK_TRACE,
 };
 
 enum bpf_prog_type {
@@ -272,6 +273,20 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_perf_event_output,
 	BPF_FUNC_skb_load_bytes,
+
+	/**
+	 * bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id
+	 * @ctx: struct pt_regs*
+	 * @map: pointer to stack_trace map
+	 * @flags: bits 0-7 - numer of stack frames to skip
+	 *         bit 8 - collect user stack instead of kernel
+	 *         bit 9 - compare stacks by hash only
+	 *         bit 10 - if two different stacks hash into the same stackid
+	 *                  discard old
+	 *         other bits - reserved
+	 * Return: >= 0 stackid on success or negative error
+	 */
+	BPF_FUNC_get_stackid,
 	__BPF_FUNC_MAX_ID,
 };
 
@@ -294,6 +309,12 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
 #define BPF_F_TUNINFO_IPV6		(1ULL << 0)
 
+/* BPF_FUNC_get_stackid flags. */
+#define BPF_F_SKIP_FIELD_MASK		0xffULL
+#define BPF_F_USER_STACK		(1ULL << 8)
+#define BPF_F_FAST_STACK_CMP		(1ULL << 9)
+#define BPF_F_REUSE_STACKID		(1ULL << 10)
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 13272582eee0..8a932d079c24 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -2,3 +2,6 @@ obj-y := core.o
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o
+ifeq ($(CONFIG_PERF_EVENTS),y)
+obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
+endif
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
new file mode 100644
index 000000000000..8a60ee14a977
--- /dev/null
+++ b/kernel/bpf/stackmap.c
@@ -0,0 +1,237 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/jhash.h>
+#include <linux/filter.h>
+#include <linux/vmalloc.h>
+#include <linux/stacktrace.h>
+#include <linux/perf_event.h>
+
+struct stack_map_bucket {
+	struct rcu_head rcu;
+	u32 hash;
+	u32 nr;
+	u64 ip[];
+};
+
+struct bpf_stack_map {
+	struct bpf_map map;
+	u32 n_buckets;
+	struct stack_map_bucket __rcu *buckets[];
+};
+
+/* Called from syscall */
+static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
+{
+	u32 value_size = attr->value_size;
+	struct bpf_stack_map *smap;
+	u64 cost, n_buckets;
+	int err;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	/* check sanity of attributes */
+	if (attr->max_entries == 0 || attr->key_size != 4 ||
+	    value_size < 8 || value_size % 8 ||
+	    value_size / 8 > PERF_MAX_STACK_DEPTH)
+		return ERR_PTR(-EINVAL);
+
+	/* hash table size must be power of 2 */
+	n_buckets = roundup_pow_of_two(attr->max_entries);
+
+	cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
+	if (cost >= U32_MAX - PAGE_SIZE)
+		return ERR_PTR(-E2BIG);
+
+	smap = kzalloc(cost, GFP_USER | __GFP_NOWARN);
+	if (!smap) {
+		smap = vzalloc(cost);
+		if (!smap)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	err = -E2BIG;
+	cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
+	if (cost >= U32_MAX - PAGE_SIZE)
+		goto free_smap;
+
+	smap->map.map_type = attr->map_type;
+	smap->map.key_size = attr->key_size;
+	smap->map.value_size = value_size;
+	smap->map.max_entries = attr->max_entries;
+	smap->n_buckets = n_buckets;
+	smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+	err = get_callchain_buffers();
+	if (err)
+		goto free_smap;
+
+	return &smap->map;
+
+free_smap:
+	kvfree(smap);
+	return ERR_PTR(err);
+}
+
+static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+{
+	struct pt_regs *regs = (struct pt_regs *) (long) r1;
+	struct bpf_map *map = (struct bpf_map *) (long) r2;
+	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
+	struct perf_callchain_entry *trace;
+	struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
+	u32 max_depth = map->value_size / 8;
+	/* stack_map_alloc() checks that max_depth <= PERF_MAX_STACK_DEPTH */
+	u32 init_nr = PERF_MAX_STACK_DEPTH - max_depth;
+	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
+	u32 hash, id, trace_nr, trace_len;
+	bool user = flags & BPF_F_USER_STACK;
+	bool kernel = !user;
+	u64 *ips;
+
+	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+			       BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
+		return -EINVAL;
+
+	trace = get_perf_callchain(regs, init_nr, kernel, user, false, false);
+
+	if (unlikely(!trace))
+		/* couldn't fetch the stack trace */
+		return -EFAULT;
+
+	/* get_perf_callchain() guarantees that trace->nr >= init_nr
+	 * and trace-nr <= PERF_MAX_STACK_DEPTH, so trace_nr <= max_depth
+	 */
+	trace_nr = trace->nr - init_nr;
+
+	if (trace_nr <= skip)
+		/* skipping more than usable stack trace */
+		return -EFAULT;
+
+	trace_nr -= skip;
+	trace_len = trace_nr * sizeof(u64);
+	ips = trace->ip + skip + init_nr;
+	hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
+	id = hash & (smap->n_buckets - 1);
+	bucket = rcu_dereference(smap->buckets[id]);
+
+	if (bucket && bucket->hash == hash) {
+		if (flags & BPF_F_FAST_STACK_CMP)
+			return id;
+		if (bucket->nr == trace_nr &&
+		    memcmp(bucket->ip, ips, trace_len) == 0)
+			return id;
+	}
+
+	/* this call stack is not in the map, try to add it */
+	if (bucket && !(flags & BPF_F_REUSE_STACKID))
+		return -EEXIST;
+
+	new_bucket = kmalloc(sizeof(struct stack_map_bucket) + map->value_size,
+			     GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!new_bucket))
+		return -ENOMEM;
+
+	memcpy(new_bucket->ip, ips, trace_len);
+	memset(new_bucket->ip + trace_len / 8, 0, map->value_size - trace_len);
+	new_bucket->hash = hash;
+	new_bucket->nr = trace_nr;
+
+	old_bucket = xchg(&smap->buckets[id], new_bucket);
+	if (old_bucket)
+		kfree_rcu(old_bucket, rcu);
+	return id;
+}
+
+const struct bpf_func_proto bpf_get_stackid_proto = {
+	.func		= bpf_get_stackid,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+/* Called from syscall or from eBPF program */
+static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
+	struct stack_map_bucket *bucket;
+	u32 id = *(u32 *)key;
+
+	if (unlikely(id >= smap->n_buckets))
+		return NULL;
+	bucket = rcu_dereference(smap->buckets[id]);
+	return bucket ? bucket->ip : NULL;
+}
+
+static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	return -EINVAL;
+}
+
+static int stack_map_update_elem(struct bpf_map *map, void *key, void *value,
+				 u64 map_flags)
+{
+	return -EINVAL;
+}
+
+/* Called from syscall or from eBPF program */
+static int stack_map_delete_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
+	struct stack_map_bucket *old_bucket;
+	u32 id = *(u32 *)key;
+
+	if (unlikely(id >= smap->n_buckets))
+		return -E2BIG;
+
+	old_bucket = xchg(&smap->buckets[id], NULL);
+	if (old_bucket) {
+		kfree_rcu(old_bucket, rcu);
+		return 0;
+	} else {
+		return -ENOENT;
+	}
+}
+
+/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
+static void stack_map_free(struct bpf_map *map)
+{
+	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
+	int i;
+
+	synchronize_rcu();
+
+	for (i = 0; i < smap->n_buckets; i++)
+		if (smap->buckets[i])
+			kfree_rcu(smap->buckets[i], rcu);
+	kvfree(smap);
+	put_callchain_buffers();
+}
+
+static const struct bpf_map_ops stack_map_ops = {
+	.map_alloc = stack_map_alloc,
+	.map_free = stack_map_free,
+	.map_get_next_key = stack_map_get_next_key,
+	.map_lookup_elem = stack_map_lookup_elem,
+	.map_update_elem = stack_map_update_elem,
+	.map_delete_elem = stack_map_delete_elem,
+};
+
+static struct bpf_map_type_list stack_map_type __read_mostly = {
+	.ops = &stack_map_ops,
+	.type = BPF_MAP_TYPE_STACK_TRACE,
+};
+
+static int __init register_stack_map(void)
+{
+	bpf_register_map_type(&stack_map_type);
+	return 0;
+}
+late_initcall(register_stack_map);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d1d3e8f57de9..42ba4ccc020b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -246,6 +246,7 @@ static const struct {
 	{BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
 	{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
 	{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
+	{BPF_MAP_TYPE_STACK_TRACE, BPF_FUNC_get_stackid},
 };
 
 static void print_verifier_state(struct verifier_env *env)
@@ -911,8 +912,11 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		 * don't allow any other map type to be passed into
 		 * the special func;
 		 */
-		if (bool_func && bool_map != bool_func)
+		if (bool_func && bool_map != bool_func) {
+			verbose("cannot pass map_type %d into func %d\n",
+				map->map_type, func_id);
 			return -EINVAL;
+		}
 	}
 
 	return 0;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 326a75e884db..4b8caa392b86 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -299,6 +299,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 		return &bpf_perf_event_read_proto;
 	case BPF_FUNC_perf_event_output:
 		return &bpf_perf_event_output_proto;
+	case BPF_FUNC_get_stackid:
+		return &bpf_get_stackid_proto;
 	default:
 		return NULL;
 	}
-- 
cgit v1.2.3


From 2307f3aab887bc43e0f262d298977fd9c6b9eee3 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sat, 20 Feb 2016 12:53:31 +0100
Subject: Revert "gpio: remove broken irq_to_gpio() interface"

This reverts commit ee2204a37957daed80418ea8ffc4f5c3146fb8e7.
---
 include/linux/gpio.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 6fc1c9e74854..d12b5d566e4b 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -70,6 +70,11 @@ static inline int gpio_to_irq(unsigned int gpio)
 	return __gpio_to_irq(gpio);
 }
 
+static inline int irq_to_gpio(unsigned int irq)
+{
+	return -EINVAL;
+}
+
 #endif /* ! CONFIG_ARCH_HAVE_CUSTOM_GPIO_H */
 
 /* CONFIG_GPIOLIB: bindings for managed devices that want to request gpios */
@@ -217,6 +222,13 @@ static inline void gpiochip_unlock_as_irq(struct gpio_chip *chip,
 	WARN_ON(1);
 }
 
+static inline int irq_to_gpio(unsigned irq)
+{
+	/* irq can never have been returned from gpio_to_irq() */
+	WARN_ON(1);
+	return -EINVAL;
+}
+
 static inline int
 gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
 		       unsigned int gpio_offset, unsigned int pin_offset,
-- 
cgit v1.2.3


From f8560a9bc76b2cd5c06fa412cb7b5481d70fcf34 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Mon, 15 Feb 2016 19:12:01 +0200
Subject: stm class: Use driver's packet callback return value

STM drivers provide a callback to generate/send individual STP packets;
it also tells the stm core how many bytes of payload it has consumed.
However, we would also need to use the negative space of this return
value to communicate errors that occur during the packet generation,
in which case the stm core will have to take appropriate action.

For now, we need to account for the possibility that the stm driver may
not support certain combinations of packet type/flags, in which case
it is expected to signal an error.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/stm/core.c | 19 ++++++++++++-------
 include/linux/stm.h          |  7 +++++++
 2 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
index 79cca94bfb58..0db303b50e51 100644
--- a/drivers/hwtracing/stm/core.c
+++ b/drivers/hwtracing/stm/core.c
@@ -380,8 +380,8 @@ static int stm_file_assign(struct stm_file *stmf, char *id, unsigned int width)
 	return ret;
 }
 
-static void stm_write(struct stm_data *data, unsigned int master,
-		      unsigned int channel, const char *buf, size_t count)
+static ssize_t stm_write(struct stm_data *data, unsigned int master,
+			  unsigned int channel, const char *buf, size_t count)
 {
 	unsigned int flags = STP_PACKET_TIMESTAMPED;
 	const unsigned char *p = buf, nil = 0;
@@ -393,9 +393,14 @@ static void stm_write(struct stm_data *data, unsigned int master,
 		sz = data->packet(data, master, channel, STP_PACKET_DATA, flags,
 				  sz, p);
 		flags = 0;
+
+		if (sz < 0)
+			break;
 	}
 
 	data->packet(data, master, channel, STP_PACKET_FLAG, 0, 0, &nil);
+
+	return pos;
 }
 
 static ssize_t stm_char_write(struct file *file, const char __user *buf,
@@ -433,8 +438,8 @@ static ssize_t stm_char_write(struct file *file, const char __user *buf,
 		return -EFAULT;
 	}
 
-	stm_write(stm->data, stmf->output.master, stmf->output.channel, kbuf,
-		  count);
+	count = stm_write(stm->data, stmf->output.master, stmf->output.channel,
+			  kbuf, count);
 
 	kfree(kbuf);
 
@@ -996,9 +1001,9 @@ int stm_source_write(struct stm_source_data *data, unsigned int chan,
 
 	stm = srcu_dereference(src->link, &stm_source_srcu);
 	if (stm)
-		stm_write(stm->data, src->output.master,
-			  src->output.channel + chan,
-			  buf, count);
+		count = stm_write(stm->data, src->output.master,
+				  src->output.channel + chan,
+				  buf, count);
 	else
 		count = -ENODEV;
 
diff --git a/include/linux/stm.h b/include/linux/stm.h
index 9d0083d364e6..ab8ceca4f570 100644
--- a/include/linux/stm.h
+++ b/include/linux/stm.h
@@ -67,6 +67,13 @@ struct stm_device;
  * description. That is, the lowest master that can be allocated to software
  * writers is @sw_start and data from this writer will appear is @sw_start
  * master in the STP stream.
+ *
+ * The @packet callback should adhere to the following rules:
+ *   1) it must return the number of bytes it consumed from the payload;
+ *   2) therefore, if it sent a packet that does not have payload (like FLAG),
+ *      it must return zero;
+ *   3) if it does not support the requested packet type/flag combination,
+ *      it must return -ENOTSUPP.
  */
 struct stm_data {
 	const char		*name;
-- 
cgit v1.2.3


From cc8424074e51355e0c6ba717d8edc50d408f2802 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Mon, 15 Feb 2016 19:12:09 +0200
Subject: stm class: Plug stm device's unlink callback

STM device's unlink callback is never actually called from anywhere in
the stm class code.

This patch adds calls to stm driver's unlink method after the unlinking
has succeeded.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/stm/core.c | 23 +++++++++++++++++++----
 include/linux/stm.h          |  3 +++
 2 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
index 30181821d909..de80d45d8df9 100644
--- a/drivers/hwtracing/stm/core.c
+++ b/drivers/hwtracing/stm/core.c
@@ -376,14 +376,19 @@ err_free:
 static int stm_char_release(struct inode *inode, struct file *file)
 {
 	struct stm_file *stmf = file->private_data;
+	struct stm_device *stm = stmf->stm;
+
+	if (stm->data->unlink)
+		stm->data->unlink(stm->data, stmf->output.master,
+				  stmf->output.channel);
 
-	stm_output_free(stmf->stm, &stmf->output);
+	stm_output_free(stm, &stmf->output);
 
 	/*
 	 * matches the stm_char_open()'s
 	 * class_find_device() + try_module_get()
 	 */
-	stm_put_device(stmf->stm);
+	stm_put_device(stm);
 	kfree(stmf);
 
 	return 0;
@@ -865,8 +870,18 @@ unlock:
 	spin_unlock(&src->link_lock);
 	spin_unlock(&stm->link_lock);
 
-	if (!ret && src->data->unlink)
-		src->data->unlink(src->data);
+	/*
+	 * Call the unlink callbacks for both source and stm, when we know
+	 * that we have actually performed the unlinking.
+	 */
+	if (!ret) {
+		if (src->data->unlink)
+			src->data->unlink(src->data);
+
+		if (stm->data->unlink)
+			stm->data->unlink(stm->data, src->output.master,
+					  src->output.channel);
+	}
 
 	return ret;
 }
diff --git a/include/linux/stm.h b/include/linux/stm.h
index ab8ceca4f570..1a79ed8e43da 100644
--- a/include/linux/stm.h
+++ b/include/linux/stm.h
@@ -74,6 +74,9 @@ struct stm_device;
  *      it must return zero;
  *   3) if it does not support the requested packet type/flag combination,
  *      it must return -ENOTSUPP.
+ *
+ * The @unlink callback is called when there are no more active writers so
+ * that the master/channel can be quiesced.
  */
 struct stm_data {
 	const char		*name;
-- 
cgit v1.2.3


From b3e94405941e6916d5e365454d74560c2bea47ca Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Wed, 17 Feb 2016 17:51:45 -0700
Subject: coresight: associating path with session rather than tracer

When using the Coresight framework from the sysFS interface a
tracer is always handling a single session and as such, a path
can be associated with a tracer.  But when supporting multiple
session per tracer there is no guarantee that sessions will always
have the same path from source to sink.

This patch is removing the automatic association between path and
tracers.  The building of a path and enablement of the components
in the path are decoupled, allowing for the association of a path
with a session rather than a tracer.

To keep backward functionality with the current sysFS access methods
a per-cpu place holder is used to keep a handle on the path built when
tracers are enabled.  Lastly APIs to build paths and enable tracers are
made public so that other subsystem can interact with the Coresight
framework.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight-priv.h |   5 +
 drivers/hwtracing/coresight/coresight.c      | 296 ++++++++++++++++++---------
 include/linux/coresight.h                    |   2 -
 3 files changed, 206 insertions(+), 97 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 62fcd98cc7cf..7b193a34d709 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -52,6 +52,11 @@ static inline void CS_UNLOCK(void __iomem *addr)
 	} while (0);
 }
 
+void coresight_disable_path(struct list_head *path);
+int coresight_enable_path(struct list_head *path);
+struct list_head *coresight_build_path(struct coresight_device *csdev);
+void coresight_release_path(struct list_head *path);
+
 #ifdef CONFIG_CORESIGHT_SOURCE_ETM3X
 extern int etm_readl_cp14(u32 off, unsigned int *val);
 extern int etm_writel_cp14(u32 off, u32 val);
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 7e6e9ff27dd1..f26589effb70 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -29,6 +29,22 @@
 
 static DEFINE_MUTEX(coresight_mutex);
 
+/**
+ * struct coresight_node - elements of a path, from source to sink
+ * @csdev:	Address of an element.
+ * @link:	hook to the list.
+ */
+struct coresight_node {
+	struct coresight_device *csdev;
+	struct list_head link;
+};
+
+/*
+ * When operating Coresight drivers from the sysFS interface, only a single
+ * path can exist from a tracer (associated to a CPU) to a sink.
+ */
+static DEFINE_PER_CPU(struct list_head *, sysfs_path);
+
 static int coresight_id_match(struct device *dev, void *data)
 {
 	int trace_id, i_trace_id;
@@ -68,15 +84,12 @@ static int coresight_source_is_unique(struct coresight_device *csdev)
 				 csdev, coresight_id_match);
 }
 
-static int coresight_find_link_inport(struct coresight_device *csdev)
+static int coresight_find_link_inport(struct coresight_device *csdev,
+				      struct coresight_device *parent)
 {
 	int i;
-	struct coresight_device *parent;
 	struct coresight_connection *conn;
 
-	parent = container_of(csdev->path_link.next,
-			      struct coresight_device, path_link);
-
 	for (i = 0; i < parent->nr_outport; i++) {
 		conn = &parent->conns[i];
 		if (conn->child_dev == csdev)
@@ -89,15 +102,12 @@ static int coresight_find_link_inport(struct coresight_device *csdev)
 	return 0;
 }
 
-static int coresight_find_link_outport(struct coresight_device *csdev)
+static int coresight_find_link_outport(struct coresight_device *csdev,
+				       struct coresight_device *child)
 {
 	int i;
-	struct coresight_device *child;
 	struct coresight_connection *conn;
 
-	child = container_of(csdev->path_link.prev,
-			     struct coresight_device, path_link);
-
 	for (i = 0; i < csdev->nr_outport; i++) {
 		conn = &csdev->conns[i];
 		if (conn->child_dev == child)
@@ -138,14 +148,19 @@ static void coresight_disable_sink(struct coresight_device *csdev)
 	}
 }
 
-static int coresight_enable_link(struct coresight_device *csdev)
+static int coresight_enable_link(struct coresight_device *csdev,
+				 struct coresight_device *parent,
+				 struct coresight_device *child)
 {
 	int ret;
 	int link_subtype;
 	int refport, inport, outport;
 
-	inport = coresight_find_link_inport(csdev);
-	outport = coresight_find_link_outport(csdev);
+	if (!parent || !child)
+		return -EINVAL;
+
+	inport = coresight_find_link_inport(csdev, parent);
+	outport = coresight_find_link_outport(csdev, child);
 	link_subtype = csdev->subtype.link_subtype;
 
 	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG)
@@ -168,14 +183,19 @@ static int coresight_enable_link(struct coresight_device *csdev)
 	return 0;
 }
 
-static void coresight_disable_link(struct coresight_device *csdev)
+static void coresight_disable_link(struct coresight_device *csdev,
+				   struct coresight_device *parent,
+				   struct coresight_device *child)
 {
 	int i, nr_conns;
 	int link_subtype;
 	int refport, inport, outport;
 
-	inport = coresight_find_link_inport(csdev);
-	outport = coresight_find_link_outport(csdev);
+	if (!parent || !child)
+		return;
+
+	inport = coresight_find_link_inport(csdev, parent);
+	outport = coresight_find_link_outport(csdev, child);
 	link_subtype = csdev->subtype.link_subtype;
 
 	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) {
@@ -235,109 +255,167 @@ static void coresight_disable_source(struct coresight_device *csdev)
 	}
 }
 
-static int coresight_enable_path(struct list_head *path)
+void coresight_disable_path(struct list_head *path)
 {
-	int ret = 0;
-	struct coresight_device *cd;
+	struct coresight_node *nd;
+	struct coresight_device *csdev, *parent, *child;
 
-	/*
-	 * At this point we have a full @path, from source to sink.  The
-	 * sink is the first entry and the source the last one.  Go through
-	 * all the components and enable them one by one.
-	 */
-	list_for_each_entry(cd, path, path_link) {
-		if (cd == list_first_entry(path, struct coresight_device,
-					   path_link)) {
-			ret = coresight_enable_sink(cd);
-		} else if (list_is_last(&cd->path_link, path)) {
-			/*
-			 * Don't enable the source just yet - this needs to
-			 * happen at the very end when all links and sink
-			 * along the path have been configured properly.
-			 */
-			;
-		} else {
-			ret = coresight_enable_link(cd);
+	list_for_each_entry(nd, path, link) {
+		csdev = nd->csdev;
+
+		switch (csdev->type) {
+		case CORESIGHT_DEV_TYPE_SINK:
+		case CORESIGHT_DEV_TYPE_LINKSINK:
+			coresight_disable_sink(csdev);
+			break;
+		case CORESIGHT_DEV_TYPE_SOURCE:
+			/* sources are disabled from either sysFS or Perf */
+			break;
+		case CORESIGHT_DEV_TYPE_LINK:
+			parent = list_prev_entry(nd, link)->csdev;
+			child = list_next_entry(nd, link)->csdev;
+			coresight_disable_link(csdev, parent, child);
+			break;
+		default:
+			break;
 		}
-		if (ret)
-			goto err;
 	}
+}
 
-	return 0;
-err:
-	list_for_each_entry_continue_reverse(cd, path, path_link) {
-		if (cd == list_first_entry(path, struct coresight_device,
-					   path_link)) {
-			coresight_disable_sink(cd);
-		} else if (list_is_last(&cd->path_link, path)) {
-			;
-		} else {
-			coresight_disable_link(cd);
+int coresight_enable_path(struct list_head *path)
+{
+
+	int ret = 0;
+	struct coresight_node *nd;
+	struct coresight_device *csdev, *parent, *child;
+
+	list_for_each_entry_reverse(nd, path, link) {
+		csdev = nd->csdev;
+
+		switch (csdev->type) {
+		case CORESIGHT_DEV_TYPE_SINK:
+		case CORESIGHT_DEV_TYPE_LINKSINK:
+			ret = coresight_enable_sink(csdev);
+			if (ret)
+				goto err;
+			break;
+		case CORESIGHT_DEV_TYPE_SOURCE:
+			/* sources are enabled from either sysFS or Perf */
+			break;
+		case CORESIGHT_DEV_TYPE_LINK:
+			parent = list_prev_entry(nd, link)->csdev;
+			child = list_next_entry(nd, link)->csdev;
+			ret = coresight_enable_link(csdev, parent, child);
+			if (ret)
+				goto err;
+			break;
+		default:
+			goto err;
 		}
 	}
 
+out:
 	return ret;
+err:
+	coresight_disable_path(path);
+	goto out;
 }
 
-static int coresight_disable_path(struct list_head *path)
+/**
+ * _coresight_build_path - recursively build a path from a @csdev to a sink.
+ * @csdev:	The device to start from.
+ * @path:	The list to add devices to.
+ *
+ * The tree of Coresight device is traversed until an activated sink is
+ * found.  From there the sink is added to the list along with all the
+ * devices that led to that point - the end result is a list from source
+ * to sink. In that list the source is the first device and the sink the
+ * last one.
+ */
+static int _coresight_build_path(struct coresight_device *csdev,
+				 struct list_head *path)
 {
-	struct coresight_device *cd;
+	int i;
+	bool found = false;
+	struct coresight_node *node;
+	struct coresight_connection *conn;
 
-	list_for_each_entry_reverse(cd, path, path_link) {
-		if (cd == list_first_entry(path, struct coresight_device,
-					   path_link)) {
-			coresight_disable_sink(cd);
-		} else if (list_is_last(&cd->path_link, path)) {
-			/*
-			 * The source has already been stopped, no need
-			 * to do it again here.
-			 */
-			;
-		} else {
-			coresight_disable_link(cd);
+	/* An activated sink has been found.  Enqueue the element */
+	if ((csdev->type == CORESIGHT_DEV_TYPE_SINK ||
+	     csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) && csdev->activated)
+		goto out;
+
+	/* Not a sink - recursively explore each port found on this element */
+	for (i = 0; i < csdev->nr_outport; i++) {
+		conn = &csdev->conns[i];
+		if (_coresight_build_path(conn->child_dev, path) == 0) {
+			found = true;
+			break;
 		}
 	}
 
+	if (!found)
+		return -ENODEV;
+
+out:
+	/*
+	 * A path from this element to a sink has been found.  The elements
+	 * leading to the sink are already enqueued, all that is left to do
+	 * is add a node for this element.
+	 */
+	node = kzalloc(sizeof(struct coresight_node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	node->csdev = csdev;
+	list_add(&node->link, path);
+
 	return 0;
 }
 
-static int coresight_build_paths(struct coresight_device *csdev,
-				 struct list_head *path,
-				 bool enable)
+struct list_head *coresight_build_path(struct coresight_device *csdev)
 {
-	int i, ret = -EINVAL;
-	struct coresight_connection *conn;
+	struct list_head *path;
 
-	list_add(&csdev->path_link, path);
+	path = kzalloc(sizeof(struct list_head), GFP_KERNEL);
+	if (!path)
+		return NULL;
 
-	if ((csdev->type == CORESIGHT_DEV_TYPE_SINK ||
-	    csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) &&
-	    csdev->activated) {
-		if (enable)
-			ret = coresight_enable_path(path);
-		else
-			ret = coresight_disable_path(path);
-	} else {
-		for (i = 0; i < csdev->nr_outport; i++) {
-			conn = &csdev->conns[i];
-			if (coresight_build_paths(conn->child_dev,
-						    path, enable) == 0)
-				ret = 0;
-		}
+	INIT_LIST_HEAD(path);
+
+	if (_coresight_build_path(csdev, path)) {
+		kfree(path);
+		path = NULL;
 	}
 
-	if (list_first_entry(path, struct coresight_device, path_link) != csdev)
-		dev_err(&csdev->dev, "wrong device in %s\n", __func__);
+	return path;
+}
 
-	list_del(&csdev->path_link);
+/**
+ * coresight_release_path - release a previously built path.
+ * @path:	the path to release.
+ *
+ * Go through all the elements of a path and 1) removed it from the list and
+ * 2) free the memory allocated for each node.
+ */
+void coresight_release_path(struct list_head *path)
+{
+	struct coresight_node *nd, *next;
 
-	return ret;
+	list_for_each_entry_safe(nd, next, path, link) {
+		list_del(&nd->link);
+		kfree(nd);
+	}
+
+	kfree(path);
+	path = NULL;
 }
 
 int coresight_enable(struct coresight_device *csdev)
 {
 	int ret = 0;
-	LIST_HEAD(path);
+	int cpu;
+	struct list_head *path;
 
 	mutex_lock(&coresight_mutex);
 	if (csdev->type != CORESIGHT_DEV_TYPE_SOURCE) {
@@ -348,22 +426,47 @@ int coresight_enable(struct coresight_device *csdev)
 	if (csdev->enable)
 		goto out;
 
-	if (coresight_build_paths(csdev, &path, true)) {
-		dev_err(&csdev->dev, "building path(s) failed\n");
+	path = coresight_build_path(csdev);
+	if (!path) {
+		pr_err("building path(s) failed\n");
 		goto out;
 	}
 
-	if (coresight_enable_source(csdev))
-		dev_err(&csdev->dev, "source enable failed\n");
+	ret = coresight_enable_path(path);
+	if (ret)
+		goto err_path;
+
+	ret = coresight_enable_source(csdev);
+	if (ret)
+		goto err_source;
+
+	/*
+	 * When working from sysFS it is important to keep track
+	 * of the paths that were created so that they can be
+	 * undone in 'coresight_disable()'.  Since there can only
+	 * be a single session per tracer (when working from sysFS)
+	 * a per-cpu variable will do just fine.
+	 */
+	cpu = source_ops(csdev)->cpu_id(csdev);
+	per_cpu(sysfs_path, cpu) = path;
+
 out:
 	mutex_unlock(&coresight_mutex);
 	return ret;
+
+err_source:
+	coresight_disable_path(path);
+
+err_path:
+	coresight_release_path(path);
+	goto out;
 }
 EXPORT_SYMBOL_GPL(coresight_enable);
 
 void coresight_disable(struct coresight_device *csdev)
 {
-	LIST_HEAD(path);
+	int cpu;
+	struct list_head *path;
 
 	mutex_lock(&coresight_mutex);
 	if (csdev->type != CORESIGHT_DEV_TYPE_SOURCE) {
@@ -373,9 +476,12 @@ void coresight_disable(struct coresight_device *csdev)
 	if (!csdev->enable)
 		goto out;
 
+	cpu = source_ops(csdev)->cpu_id(csdev);
+	path = per_cpu(sysfs_path, cpu);
 	coresight_disable_source(csdev);
-	if (coresight_build_paths(csdev, &path, false))
-		dev_err(&csdev->dev, "releasing path(s) failed\n");
+	coresight_disable_path(path);
+	coresight_release_path(path);
+	per_cpu(sysfs_path, cpu) = NULL;
 
 out:
 	mutex_unlock(&coresight_mutex);
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index bf62b265bf52..851ecb22397e 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -152,7 +152,6 @@ struct coresight_connection {
 		by @coresight_ops.
  * @dev:	The device entity associated to this component.
  * @refcnt:	keep track of what is in use.
- * @path_link:	link of current component into the path being enabled.
  * @orphan:	true if the component has connections that haven't been linked.
  * @enable:	'true' if component is currently part of an active path.
  * @activated:	'true' only if a _sink_ has been activated.  A sink can be
@@ -168,7 +167,6 @@ struct coresight_device {
 	const struct coresight_ops *ops;
 	struct device dev;
 	atomic_t *refcnt;
-	struct list_head path_link;
 	bool orphan;
 	bool enable;	/* true only if configured as part of a path */
 	bool activated;	/* true only if a sink is part of a path */
-- 
cgit v1.2.3


From 22fd532eaa0c24d86e23d8e9e3b7feac4a8cac80 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Wed, 17 Feb 2016 17:51:52 -0700
Subject: coresight: etm3x: adding operation mode for etm_enable()

Adding a new mode to source API enable() in order to
distinguish where the request comes from.  That way it is
possible to perform different operations based on where
the request was issued from.

The ETM4x driver is also modified to keep in sync with the
new interface.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight-etm.h        |  5 +-
 .../hwtracing/coresight/coresight-etm3x-sysfs.c    |  4 +-
 drivers/hwtracing/coresight/coresight-etm3x.c      | 68 +++++++++++++++++++---
 drivers/hwtracing/coresight/coresight-etm4x.c      |  2 +-
 drivers/hwtracing/coresight/coresight-priv.h       |  6 ++
 drivers/hwtracing/coresight/coresight.c            |  6 +-
 include/linux/coresight.h                          |  2 +-
 7 files changed, 76 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-etm.h b/drivers/hwtracing/coresight/coresight-etm.h
index 371fb7d2e829..5b29d5540fe5 100644
--- a/drivers/hwtracing/coresight/coresight-etm.h
+++ b/drivers/hwtracing/coresight/coresight-etm.h
@@ -13,6 +13,7 @@
 #ifndef _CORESIGHT_CORESIGHT_ETM_H
 #define _CORESIGHT_CORESIGHT_ETM_H
 
+#include <asm/local.h>
 #include <linux/spinlock.h>
 #include "coresight-priv.h"
 
@@ -214,7 +215,7 @@ struct etm_config {
  * @port_size:	port size as reported by ETMCR bit 4-6 and 21.
  * @arch:	ETM/PTM version number.
  * @use_cpu14:	true if management registers need to be accessed via CP14.
- * @enable:	is this ETM/PTM currently tracing.
+ * @mode:	this tracer's mode, i.e sysFS, Perf or disabled.
  * @sticky_enable: true if ETM base configuration has been done.
  * @boot_enable:true if we should start tracing at boot time.
  * @os_unlock:	true if access to management registers is allowed.
@@ -238,7 +239,7 @@ struct etm_drvdata {
 	int				port_size;
 	u8				arch;
 	bool				use_cp14;
-	bool				enable;
+	local_t				mode;
 	bool				sticky_enable;
 	bool				boot_enable;
 	bool				os_unlock;
diff --git a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
index 456df2378a6f..387c79fd9d5e 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
@@ -716,7 +716,7 @@ static ssize_t cntr_val_show(struct device *dev,
 	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
 	struct etm_config *config = &drvdata->config;
 
-	if (!drvdata->enable) {
+	if (!local_read(&drvdata->mode)) {
 		spin_lock(&drvdata->spinlock);
 		for (i = 0; i < drvdata->nr_cntr; i++)
 			ret += sprintf(buf, "counter %d: %x\n",
@@ -935,7 +935,7 @@ static ssize_t seq_curr_state_show(struct device *dev,
 	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
 	struct etm_config *config = &drvdata->config;
 
-	if (!drvdata->enable) {
+	if (!local_read(&drvdata->mode)) {
 		val = config->seq_curr_state;
 		goto out;
 	}
diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
index 1952dc31fee4..e16501b41ed8 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -306,7 +306,7 @@ int etm_get_trace_id(struct etm_drvdata *drvdata)
 	if (!drvdata)
 		goto out;
 
-	if (!drvdata->enable)
+	if (!local_read(&drvdata->mode))
 		return drvdata->traceid;
 
 	pm_runtime_get_sync(drvdata->dev);
@@ -332,7 +332,7 @@ static int etm_trace_id(struct coresight_device *csdev)
 	return etm_get_trace_id(drvdata);
 }
 
-static int etm_enable(struct coresight_device *csdev)
+static int etm_enable_sysfs(struct coresight_device *csdev)
 {
 	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 	int ret;
@@ -351,18 +351,44 @@ static int etm_enable(struct coresight_device *csdev)
 			goto err;
 	}
 
-	drvdata->enable = true;
 	drvdata->sticky_enable = true;
-
 	spin_unlock(&drvdata->spinlock);
 
 	dev_info(drvdata->dev, "ETM tracing enabled\n");
 	return 0;
+
 err:
 	spin_unlock(&drvdata->spinlock);
 	return ret;
 }
 
+static int etm_enable(struct coresight_device *csdev, u32 mode)
+{
+	int ret;
+	u32 val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	val = local_cmpxchg(&drvdata->mode, CS_MODE_DISABLED, mode);
+
+	/* Someone is already using the tracer */
+	if (val)
+		return -EBUSY;
+
+	switch (mode) {
+	case CS_MODE_SYSFS:
+		ret = etm_enable_sysfs(csdev);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	/* The tracer didn't start */
+	if (ret)
+		local_set(&drvdata->mode, CS_MODE_DISABLED);
+
+	return ret;
+}
+
 static void etm_disable_hw(void *info)
 {
 	int i;
@@ -387,7 +413,7 @@ static void etm_disable_hw(void *info)
 	dev_dbg(drvdata->dev, "cpu: %d disable smp call done\n", drvdata->cpu);
 }
 
-static void etm_disable(struct coresight_device *csdev)
+static void etm_disable_sysfs(struct coresight_device *csdev)
 {
 	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
@@ -405,7 +431,6 @@ static void etm_disable(struct coresight_device *csdev)
 	 * ensures that register writes occur when cpu is powered.
 	 */
 	smp_call_function_single(drvdata->cpu, etm_disable_hw, drvdata, 1);
-	drvdata->enable = false;
 
 	spin_unlock(&drvdata->spinlock);
 	put_online_cpus();
@@ -413,6 +438,33 @@ static void etm_disable(struct coresight_device *csdev)
 	dev_info(drvdata->dev, "ETM tracing disabled\n");
 }
 
+static void etm_disable(struct coresight_device *csdev)
+{
+	u32 mode;
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	/*
+	 * For as long as the tracer isn't disabled another entity can't
+	 * change its status.  As such we can read the status here without
+	 * fearing it will change under us.
+	 */
+	mode = local_read(&drvdata->mode);
+
+	switch (mode) {
+	case CS_MODE_DISABLED:
+		break;
+	case CS_MODE_SYSFS:
+		etm_disable_sysfs(csdev);
+		break;
+	default:
+		WARN_ON_ONCE(mode);
+		return;
+	}
+
+	if (mode)
+		local_set(&drvdata->mode, CS_MODE_DISABLED);
+}
+
 static const struct coresight_ops_source etm_source_ops = {
 	.cpu_id		= etm_cpu_id,
 	.trace_id	= etm_trace_id,
@@ -440,7 +492,7 @@ static int etm_cpu_callback(struct notifier_block *nfb, unsigned long action,
 			etmdrvdata[cpu]->os_unlock = true;
 		}
 
-		if (etmdrvdata[cpu]->enable)
+		if (local_read(&etmdrvdata[cpu]->mode))
 			etm_enable_hw(etmdrvdata[cpu]);
 		spin_unlock(&etmdrvdata[cpu]->spinlock);
 		break;
@@ -453,7 +505,7 @@ static int etm_cpu_callback(struct notifier_block *nfb, unsigned long action,
 
 	case CPU_DYING:
 		spin_lock(&etmdrvdata[cpu]->spinlock);
-		if (etmdrvdata[cpu]->enable)
+		if (local_read(&etmdrvdata[cpu]->mode))
 			etm_disable_hw(etmdrvdata[cpu]);
 		spin_unlock(&etmdrvdata[cpu]->spinlock);
 		break;
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index c2f518fbc9a8..0026092fec7f 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -187,7 +187,7 @@ static void etm4_enable_hw(void *info)
 	dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu);
 }
 
-static int etm4_enable(struct coresight_device *csdev)
+static int etm4_enable(struct coresight_device *csdev, u32 mode)
 {
 	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 	int ret;
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 14f245a2018d..ed116b303e87 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -34,6 +34,12 @@
 #define TIMEOUT_US		100
 #define BMVAL(val, lsb, msb)	((val & GENMASK(msb, lsb)) >> lsb)
 
+enum cs_mode {
+	CS_MODE_DISABLED,
+	CS_MODE_SYSFS,
+	CS_MODE_PERF,
+};
+
 static inline void CS_LOCK(void __iomem *addr)
 {
 	do {
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 6b44928c1076..b20afb709141 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -222,7 +222,7 @@ static void coresight_disable_link(struct coresight_device *csdev,
 	csdev->enable = false;
 }
 
-static int coresight_enable_source(struct coresight_device *csdev)
+static int coresight_enable_source(struct coresight_device *csdev, u32 mode)
 {
 	int ret;
 
@@ -234,7 +234,7 @@ static int coresight_enable_source(struct coresight_device *csdev)
 
 	if (!csdev->enable) {
 		if (source_ops(csdev)->enable) {
-			ret = source_ops(csdev)->enable(csdev);
+			ret = source_ops(csdev)->enable(csdev, mode);
 			if (ret)
 				return ret;
 		}
@@ -458,7 +458,7 @@ int coresight_enable(struct coresight_device *csdev)
 	if (ret)
 		goto err_path;
 
-	ret = coresight_enable_source(csdev);
+	ret = coresight_enable_source(csdev, CS_MODE_SYSFS);
 	if (ret)
 		goto err_source;
 
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 851ecb22397e..61dfb8d511ea 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -213,7 +213,7 @@ struct coresight_ops_link {
 struct coresight_ops_source {
 	int (*cpu_id)(struct coresight_device *csdev);
 	int (*trace_id)(struct coresight_device *csdev);
-	int (*enable)(struct coresight_device *csdev);
+	int (*enable)(struct coresight_device *csdev, u32 mode);
 	void (*disable)(struct coresight_device *csdev);
 };
 
-- 
cgit v1.2.3


From 882d5e112491c875ab7c8c336b8beaeec54d0509 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Wed, 17 Feb 2016 17:51:57 -0700
Subject: coresight: etm3x: implementing perf_enable/disable() API

That way traces can be enabled and disabled automatically
from the Perf subystem using the PMU abstraction.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/Kconfig           |  1 +
 drivers/hwtracing/coresight/coresight-etm3x.c | 95 +++++++++++++++++++++++++--
 drivers/hwtracing/coresight/coresight-etm4x.c |  4 +-
 drivers/hwtracing/coresight/coresight.c       |  2 +-
 include/linux/coresight.h                     |  6 +-
 5 files changed, 99 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig
index c85935f3525a..db0541031c72 100644
--- a/drivers/hwtracing/coresight/Kconfig
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -4,6 +4,7 @@
 menuconfig CORESIGHT
 	bool "CoreSight Tracing Support"
 	select ARM_AMBA
+	select PERF_EVENTS
 	help
 	  This framework provides a kernel interface for the CoreSight debug
 	  and trace drivers to register themselves with. It's intended to build
diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
index c82d545e68ef..a9b820ec16aa 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -31,6 +31,7 @@
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 #include <linux/clk.h>
+#include <linux/perf_event.h>
 #include <asm/sections.h>
 
 #include "coresight-etm.h"
@@ -297,6 +298,47 @@ void etm_config_trace_mode(struct etm_config *config)
 	config->addr_type[1] = ETM_ADDR_TYPE_RANGE;
 }
 
+#define ETM3X_SUPPORTED_OPTIONS (ETMCR_CYC_ACC | ETMCR_TIMESTAMP_EN)
+
+static int etm_parse_event_config(struct etm_drvdata *drvdata,
+				  struct perf_event_attr *attr)
+{
+	struct etm_config *config = &drvdata->config;
+
+	if (!attr)
+		return -EINVAL;
+
+	/* Clear configuration from previous run */
+	memset(config, 0, sizeof(struct etm_config));
+
+	if (attr->exclude_kernel)
+		config->mode = ETM_MODE_EXCL_KERN;
+
+	if (attr->exclude_user)
+		config->mode = ETM_MODE_EXCL_USER;
+
+	/* Always start from the default config */
+	etm_set_default(config);
+
+	/*
+	 * By default the tracers are configured to trace the whole address
+	 * range.  Narrow the field only if requested by user space.
+	 */
+	if (config->mode)
+		etm_config_trace_mode(config);
+
+	/*
+	 * At this time only cycle accurate and timestamp options are
+	 * available.
+	 */
+	if (attr->config & ~ETM3X_SUPPORTED_OPTIONS)
+		return -EINVAL;
+
+	config->ctrl = attr->config;
+
+	return 0;
+}
+
 static void etm_enable_hw(void *info)
 {
 	int i;
@@ -316,8 +358,10 @@ static void etm_enable_hw(void *info)
 	etm_set_prog(drvdata);
 
 	etmcr = etm_readl(drvdata, ETMCR);
-	etmcr &= (ETMCR_PWD_DWN | ETMCR_ETM_PRG);
+	/* Clear setting from a previous run if need be */
+	etmcr &= ~ETM3X_SUPPORTED_OPTIONS;
 	etmcr |= drvdata->port_size;
+	etmcr |= ETMCR_ETM_EN;
 	etm_writel(drvdata, config->ctrl | etmcr, ETMCR);
 	etm_writel(drvdata, config->trigger_event, ETMTRIGGER);
 	etm_writel(drvdata, config->startstop_ctrl, ETMTSSCR);
@@ -357,9 +401,6 @@ static void etm_enable_hw(void *info)
 	/* No VMID comparator value selected */
 	etm_writel(drvdata, 0x0, ETMVMIDCVR);
 
-	/* Ensures trace output is enabled from this ETM */
-	etm_writel(drvdata, config->ctrl | ETMCR_ETM_EN | etmcr, ETMCR);
-
 	etm_clr_prog(drvdata);
 	CS_LOCK(drvdata->base);
 
@@ -407,6 +448,22 @@ static int etm_trace_id(struct coresight_device *csdev)
 	return etm_get_trace_id(drvdata);
 }
 
+static int etm_enable_perf(struct coresight_device *csdev,
+			   struct perf_event_attr *attr)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id()))
+		return -EINVAL;
+
+	/* Configure the tracer based on the session's specifics */
+	etm_parse_event_config(drvdata, attr);
+	/* And enable it */
+	etm_enable_hw(drvdata);
+
+	return 0;
+}
+
 static int etm_enable_sysfs(struct coresight_device *csdev)
 {
 	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -437,7 +494,8 @@ err:
 	return ret;
 }
 
-static int etm_enable(struct coresight_device *csdev, u32 mode)
+static int etm_enable(struct coresight_device *csdev,
+		      struct perf_event_attr *attr, u32 mode)
 {
 	int ret;
 	u32 val;
@@ -453,6 +511,9 @@ static int etm_enable(struct coresight_device *csdev, u32 mode)
 	case CS_MODE_SYSFS:
 		ret = etm_enable_sysfs(csdev);
 		break;
+	case CS_MODE_PERF:
+		ret = etm_enable_perf(csdev, attr);
+		break;
 	default:
 		ret = -EINVAL;
 	}
@@ -485,6 +546,27 @@ static void etm_disable_hw(void *info)
 	dev_dbg(drvdata->dev, "cpu: %d disable smp call done\n", drvdata->cpu);
 }
 
+static void etm_disable_perf(struct coresight_device *csdev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id()))
+		return;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Setting the prog bit disables tracing immediately */
+	etm_set_prog(drvdata);
+
+	/*
+	 * There is no way to know when the tracer will be used again so
+	 * power down the tracer.
+	 */
+	etm_set_pwrdwn(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
 static void etm_disable_sysfs(struct coresight_device *csdev)
 {
 	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -528,6 +610,9 @@ static void etm_disable(struct coresight_device *csdev)
 	case CS_MODE_SYSFS:
 		etm_disable_sysfs(csdev);
 		break;
+	case CS_MODE_PERF:
+		etm_disable_perf(csdev);
+		break;
 	default:
 		WARN_ON_ONCE(mode);
 		return;
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index 0026092fec7f..d0169ba7fbf2 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -32,6 +32,7 @@
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 #include <linux/pm_runtime.h>
+#include <linux/perf_event.h>
 #include <asm/sections.h>
 
 #include "coresight-etm4x.h"
@@ -187,7 +188,8 @@ static void etm4_enable_hw(void *info)
 	dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu);
 }
 
-static int etm4_enable(struct coresight_device *csdev, u32 mode)
+static int etm4_enable(struct coresight_device *csdev,
+		       struct perf_event_attr *attr, u32 mode)
 {
 	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 	int ret;
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index b20afb709141..95cccb179763 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -234,7 +234,7 @@ static int coresight_enable_source(struct coresight_device *csdev, u32 mode)
 
 	if (!csdev->enable) {
 		if (source_ops(csdev)->enable) {
-			ret = source_ops(csdev)->enable(csdev, mode);
+			ret = source_ops(csdev)->enable(csdev, NULL, mode);
 			if (ret)
 				return ret;
 		}
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 61dfb8d511ea..6801dd64ee5d 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -14,6 +14,7 @@
 #define _LINUX_CORESIGHT_H
 
 #include <linux/device.h>
+#include <linux/perf_event.h>
 #include <linux/sched.h>
 
 /* Peripheral id registers (0xFD0-0xFEC) */
@@ -206,14 +207,15 @@ struct coresight_ops_link {
  * @cpu_id:	returns the value of the CPU number this component
  *		is associated to.
  * @trace_id:	returns the value of the component's trace ID as known
-		to the HW.
+ *		to the HW.
  * @enable:	enables tracing for a source.
  * @disable:	disables tracing for a source.
  */
 struct coresight_ops_source {
 	int (*cpu_id)(struct coresight_device *csdev);
 	int (*trace_id)(struct coresight_device *csdev);
-	int (*enable)(struct coresight_device *csdev, u32 mode);
+	int (*enable)(struct coresight_device *csdev,
+		      struct perf_event_attr *attr,  u32 mode);
 	void (*disable)(struct coresight_device *csdev);
 };
 
-- 
cgit v1.2.3


From e827d4550aa3225b8965ce4c266208cfe0297509 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Wed, 17 Feb 2016 17:51:59 -0700
Subject: coresight: etb10: adding operation mode for sink->enable()

Adding an operation mode to the sink->enable() API in order
to prevent simultaneous access from different callers.

TPIU and TMC won't be supplemented with the AUX area
API immediately and as such ignore the new mode.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight-etb10.c | 30 ++++++++++++++++++++-------
 drivers/hwtracing/coresight/coresight-priv.h  |  2 +-
 drivers/hwtracing/coresight/coresight-tmc.c   |  2 +-
 drivers/hwtracing/coresight/coresight-tpiu.c  |  2 +-
 drivers/hwtracing/coresight/coresight.c       | 10 ++++-----
 include/linux/coresight.h                     |  2 +-
 6 files changed, 32 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index 162c9ccc8c33..79099f95ba3f 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -73,9 +73,9 @@
  * @miscdev:	specifics to handle "/dev/xyz.etb" entry.
  * @spinlock:	only one at a time pls.
  * @reading:	synchronise user space access to etb buffer.
+ * @mode:	this ETB is being used.
  * @buf:	area of memory where ETB buffer content gets sent.
  * @buffer_depth: size of @buf.
- * @enable:	this ETB is being used.
  * @trigger_cntr: amount of words to store after a trigger.
  */
 struct etb_drvdata {
@@ -86,9 +86,9 @@ struct etb_drvdata {
 	struct miscdevice	miscdev;
 	spinlock_t		spinlock;
 	local_t			reading;
+	local_t			mode;
 	u8			*buf;
 	u32			buffer_depth;
-	bool			enable;
 	u32			trigger_cntr;
 };
 
@@ -133,16 +133,31 @@ static void etb_enable_hw(struct etb_drvdata *drvdata)
 	CS_LOCK(drvdata->base);
 }
 
-static int etb_enable(struct coresight_device *csdev)
+static int etb_enable(struct coresight_device *csdev, u32 mode)
 {
-	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	u32 val;
 	unsigned long flags;
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	val = local_cmpxchg(&drvdata->mode,
+			    CS_MODE_DISABLED, mode);
+	/*
+	 * When accessing from Perf, a HW buffer can be handled
+	 * by a single trace entity.  In sysFS mode many tracers
+	 * can be logging to the same HW buffer.
+	 */
+	if (val == CS_MODE_PERF)
+		return -EBUSY;
+
+	/* Nothing to do, the tracer is already enabled. */
+	if (val == CS_MODE_SYSFS)
+		goto out;
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
 	etb_enable_hw(drvdata);
-	drvdata->enable = true;
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
+out:
 	dev_info(drvdata->dev, "ETB enabled\n");
 	return 0;
 }
@@ -243,9 +258,10 @@ static void etb_disable(struct coresight_device *csdev)
 	spin_lock_irqsave(&drvdata->spinlock, flags);
 	etb_disable_hw(drvdata);
 	etb_dump_hw(drvdata);
-	drvdata->enable = false;
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
+	local_set(&drvdata->mode, CS_MODE_DISABLED);
+
 	dev_info(drvdata->dev, "ETB disabled\n");
 }
 
@@ -263,7 +279,7 @@ static void etb_dump(struct etb_drvdata *drvdata)
 	unsigned long flags;
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
-	if (drvdata->enable) {
+	if (local_read(&drvdata->mode) == CS_MODE_SYSFS) {
 		etb_disable_hw(drvdata);
 		etb_dump_hw(drvdata);
 		etb_enable_hw(drvdata);
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 932f34a84d96..333eddaed339 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -62,7 +62,7 @@ static inline void CS_UNLOCK(void __iomem *addr)
 }
 
 void coresight_disable_path(struct list_head *path);
-int coresight_enable_path(struct list_head *path);
+int coresight_enable_path(struct list_head *path, u32 mode);
 struct coresight_device *coresight_get_sink(struct list_head *path);
 struct list_head *coresight_build_path(struct coresight_device *csdev);
 void coresight_release_path(struct list_head *path);
diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c
index b526396d80b6..ac91b0b40ec2 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.c
+++ b/drivers/hwtracing/coresight/coresight-tmc.c
@@ -265,7 +265,7 @@ static int tmc_enable(struct tmc_drvdata *drvdata, enum tmc_mode mode)
 	return 0;
 }
 
-static int tmc_enable_sink(struct coresight_device *csdev)
+static int tmc_enable_sink(struct coresight_device *csdev, u32 mode)
 {
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
index 0d8fce651ff7..71582f6dfd4c 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -70,7 +70,7 @@ static void tpiu_enable_hw(struct tpiu_drvdata *drvdata)
 	CS_LOCK(drvdata->base);
 }
 
-static int tpiu_enable(struct coresight_device *csdev)
+static int tpiu_enable(struct coresight_device *csdev, u32 mode)
 {
 	struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 95cccb179763..6ec2b66af9ee 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -121,13 +121,13 @@ static int coresight_find_link_outport(struct coresight_device *csdev,
 	return 0;
 }
 
-static int coresight_enable_sink(struct coresight_device *csdev)
+static int coresight_enable_sink(struct coresight_device *csdev, u32 mode)
 {
 	int ret;
 
 	if (!csdev->enable) {
 		if (sink_ops(csdev)->enable) {
-			ret = sink_ops(csdev)->enable(csdev);
+			ret = sink_ops(csdev)->enable(csdev, mode);
 			if (ret)
 				return ret;
 		}
@@ -283,7 +283,7 @@ void coresight_disable_path(struct list_head *path)
 	}
 }
 
-int coresight_enable_path(struct list_head *path)
+int coresight_enable_path(struct list_head *path, u32 mode)
 {
 
 	int ret = 0;
@@ -296,7 +296,7 @@ int coresight_enable_path(struct list_head *path)
 		switch (csdev->type) {
 		case CORESIGHT_DEV_TYPE_SINK:
 		case CORESIGHT_DEV_TYPE_LINKSINK:
-			ret = coresight_enable_sink(csdev);
+			ret = coresight_enable_sink(csdev, mode);
 			if (ret)
 				goto err;
 			break;
@@ -454,7 +454,7 @@ int coresight_enable(struct coresight_device *csdev)
 		goto out;
 	}
 
-	ret = coresight_enable_path(path);
+	ret = coresight_enable_path(path, CS_MODE_SYSFS);
 	if (ret)
 		goto err_path;
 
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 6801dd64ee5d..9fa92dcdd2ea 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -186,7 +186,7 @@ struct coresight_device {
  * @disable:	disables the sink.
  */
 struct coresight_ops_sink {
-	int (*enable)(struct coresight_device *csdev);
+	int (*enable)(struct coresight_device *csdev, u32 mode);
 	void (*disable)(struct coresight_device *csdev);
 };
 
-- 
cgit v1.2.3


From 2997aa4063d97fdb39450c6078bd81a7b0504f22 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Wed, 17 Feb 2016 17:52:00 -0700
Subject: coresight: etb10: implementing AUX API

Adding an ETB10 specific AUX area operations to be used
by the perf framework when events are initialised.

Part of this operation involves modeling the mmap'ed area
based on the specific ways a sink buffer gathers information.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight-etb10.c | 234 ++++++++++++++++++++++++++
 include/linux/coresight.h                     |  21 ++-
 2 files changed, 253 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index 79099f95ba3f..a2eb6bdeaafa 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -28,6 +28,11 @@
 #include <linux/coresight.h>
 #include <linux/amba/bus.h>
 #include <linux/clk.h>
+#include <linux/circ_buf.h>
+#include <linux/mm.h>
+#include <linux/perf_event.h>
+
+#include <asm/local.h>
 
 #include "coresight-priv.h"
 
@@ -64,6 +69,26 @@
 #define ETB_FFSR_BIT		1
 #define ETB_FRAME_SIZE_WORDS	4
 
+/**
+ * struct cs_buffer - keep track of a recording session' specifics
+ * @cur:	index of the current buffer
+ * @nr_pages:	max number of pages granted to us
+ * @offset:	offset within the current buffer
+ * @data_size:	how much we collected in this run
+ * @lost:	other than zero if we had a HW buffer wrap around
+ * @snapshot:	is this run in snapshot mode
+ * @data_pages:	a handle the ring buffer
+ */
+struct cs_buffers {
+	unsigned int		cur;
+	unsigned int		nr_pages;
+	unsigned long		offset;
+	local_t			data_size;
+	local_t			lost;
+	bool			snapshot;
+	void			**data_pages;
+};
+
 /**
  * struct etb_drvdata - specifics associated to an ETB component
  * @base:	memory mapped base address for this component.
@@ -265,9 +290,218 @@ static void etb_disable(struct coresight_device *csdev)
 	dev_info(drvdata->dev, "ETB disabled\n");
 }
 
+static void *etb_alloc_buffer(struct coresight_device *csdev, int cpu,
+			      void **pages, int nr_pages, bool overwrite)
+{
+	int node;
+	struct cs_buffers *buf;
+
+	if (cpu == -1)
+		cpu = smp_processor_id();
+	node = cpu_to_node(cpu);
+
+	buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node);
+	if (!buf)
+		return NULL;
+
+	buf->snapshot = overwrite;
+	buf->nr_pages = nr_pages;
+	buf->data_pages = pages;
+
+	return buf;
+}
+
+static void etb_free_buffer(void *config)
+{
+	struct cs_buffers *buf = config;
+
+	kfree(buf);
+}
+
+static int etb_set_buffer(struct coresight_device *csdev,
+			  struct perf_output_handle *handle,
+			  void *sink_config)
+{
+	int ret = 0;
+	unsigned long head;
+	struct cs_buffers *buf = sink_config;
+
+	/* wrap head around to the amount of space we have */
+	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
+
+	/* find the page to write to */
+	buf->cur = head / PAGE_SIZE;
+
+	/* and offset within that page */
+	buf->offset = head % PAGE_SIZE;
+
+	local_set(&buf->data_size, 0);
+
+	return ret;
+}
+
+static unsigned long etb_reset_buffer(struct coresight_device *csdev,
+				      struct perf_output_handle *handle,
+				      void *sink_config, bool *lost)
+{
+	unsigned long size = 0;
+	struct cs_buffers *buf = sink_config;
+
+	if (buf) {
+		/*
+		 * In snapshot mode ->data_size holds the new address of the
+		 * ring buffer's head.  The size itself is the whole address
+		 * range since we want the latest information.
+		 */
+		if (buf->snapshot)
+			handle->head = local_xchg(&buf->data_size,
+						  buf->nr_pages << PAGE_SHIFT);
+
+		/*
+		 * Tell the tracer PMU how much we got in this run and if
+		 * something went wrong along the way.  Nobody else can use
+		 * this cs_buffers instance until we are done.  As such
+		 * resetting parameters here and squaring off with the ring
+		 * buffer API in the tracer PMU is fine.
+		 */
+		*lost = !!local_xchg(&buf->lost, 0);
+		size = local_xchg(&buf->data_size, 0);
+	}
+
+	return size;
+}
+
+static void etb_update_buffer(struct coresight_device *csdev,
+			      struct perf_output_handle *handle,
+			      void *sink_config)
+{
+	int i, cur;
+	u8 *buf_ptr;
+	u32 read_ptr, write_ptr, capacity;
+	u32 status, read_data, to_read;
+	unsigned long offset;
+	struct cs_buffers *buf = sink_config;
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	if (!buf)
+		return;
+
+	capacity = drvdata->buffer_depth * ETB_FRAME_SIZE_WORDS;
+
+	CS_UNLOCK(drvdata->base);
+	etb_disable_hw(drvdata);
+
+	/* unit is in words, not bytes */
+	read_ptr = readl_relaxed(drvdata->base + ETB_RAM_READ_POINTER);
+	write_ptr = readl_relaxed(drvdata->base + ETB_RAM_WRITE_POINTER);
+
+	/*
+	 * Entries should be aligned to the frame size.  If they are not
+	 * go back to the last alignement point to give decoding tools a
+	 * chance to fix things.
+	 */
+	if (write_ptr % ETB_FRAME_SIZE_WORDS) {
+		dev_err(drvdata->dev,
+			"write_ptr: %lu not aligned to formatter frame size\n",
+			(unsigned long)write_ptr);
+
+		write_ptr &= ~(ETB_FRAME_SIZE_WORDS - 1);
+		local_inc(&buf->lost);
+	}
+
+	/*
+	 * Get a hold of the status register and see if a wrap around
+	 * has occurred.  If so adjust things accordingly.  Otherwise
+	 * start at the beginning and go until the write pointer has
+	 * been reached.
+	 */
+	status = readl_relaxed(drvdata->base + ETB_STATUS_REG);
+	if (status & ETB_STATUS_RAM_FULL) {
+		local_inc(&buf->lost);
+		to_read = capacity;
+		read_ptr = write_ptr;
+	} else {
+		to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->buffer_depth);
+		to_read *= ETB_FRAME_SIZE_WORDS;
+	}
+
+	/*
+	 * Make sure we don't overwrite data that hasn't been consumed yet.
+	 * It is entirely possible that the HW buffer has more data than the
+	 * ring buffer can currently handle.  If so adjust the start address
+	 * to take only the last traces.
+	 *
+	 * In snapshot mode we are looking to get the latest traces only and as
+	 * such, we don't care about not overwriting data that hasn't been
+	 * processed by user space.
+	 */
+	if (!buf->snapshot && to_read > handle->size) {
+		u32 mask = ~(ETB_FRAME_SIZE_WORDS - 1);
+
+		/* The new read pointer must be frame size aligned */
+		to_read -= handle->size & mask;
+		/*
+		 * Move the RAM read pointer up, keeping in mind that
+		 * everything is in frame size units.
+		 */
+		read_ptr = (write_ptr + drvdata->buffer_depth) -
+					to_read / ETB_FRAME_SIZE_WORDS;
+		/* Wrap around if need be*/
+		read_ptr &= ~(drvdata->buffer_depth - 1);
+		/* let the decoder know we've skipped ahead */
+		local_inc(&buf->lost);
+	}
+
+	/* finally tell HW where we want to start reading from */
+	writel_relaxed(read_ptr, drvdata->base + ETB_RAM_READ_POINTER);
+
+	cur = buf->cur;
+	offset = buf->offset;
+	for (i = 0; i < to_read; i += 4) {
+		buf_ptr = buf->data_pages[cur] + offset;
+		read_data = readl_relaxed(drvdata->base +
+					  ETB_RAM_READ_DATA_REG);
+		*buf_ptr++ = read_data >> 0;
+		*buf_ptr++ = read_data >> 8;
+		*buf_ptr++ = read_data >> 16;
+		*buf_ptr++ = read_data >> 24;
+
+		offset += 4;
+		if (offset >= PAGE_SIZE) {
+			offset = 0;
+			cur++;
+			/* wrap around at the end of the buffer */
+			cur &= buf->nr_pages - 1;
+		}
+	}
+
+	/* reset ETB buffer for next run */
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_READ_POINTER);
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER);
+
+	/*
+	 * In snapshot mode all we have to do is communicate to
+	 * perf_aux_output_end() the address of the current head.  In full
+	 * trace mode the same function expects a size to move rb->aux_head
+	 * forward.
+	 */
+	if (buf->snapshot)
+		local_set(&buf->data_size, (cur * PAGE_SIZE) + offset);
+	else
+		local_add(to_read, &buf->data_size);
+
+	etb_enable_hw(drvdata);
+	CS_LOCK(drvdata->base);
+}
+
 static const struct coresight_ops_sink etb_sink_ops = {
 	.enable		= etb_enable,
 	.disable	= etb_disable,
+	.alloc_buffer	= etb_alloc_buffer,
+	.free_buffer	= etb_free_buffer,
+	.set_buffer	= etb_set_buffer,
+	.reset_buffer	= etb_reset_buffer,
+	.update_buffer	= etb_update_buffer,
 };
 
 static const struct coresight_ops etb_cs_ops = {
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 9fa92dcdd2ea..385d62e64abb 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -182,12 +182,29 @@ struct coresight_device {
 /**
  * struct coresight_ops_sink - basic operations for a sink
  * Operations available for sinks
- * @enable:	enables the sink.
- * @disable:	disables the sink.
+ * @enable:		enables the sink.
+ * @disable:		disables the sink.
+ * @alloc_buffer:	initialises perf's ring buffer for trace collection.
+ * @free_buffer:	release memory allocated in @get_config.
+ * @set_buffer:		initialises buffer mechanic before a trace session.
+ * @reset_buffer:	finalises buffer mechanic after a trace session.
+ * @update_buffer:	update buffer pointers after a trace session.
  */
 struct coresight_ops_sink {
 	int (*enable)(struct coresight_device *csdev, u32 mode);
 	void (*disable)(struct coresight_device *csdev);
+	void *(*alloc_buffer)(struct coresight_device *csdev, int cpu,
+			      void **pages, int nr_pages, bool overwrite);
+	void (*free_buffer)(void *config);
+	int (*set_buffer)(struct coresight_device *csdev,
+			  struct perf_output_handle *handle,
+			  void *sink_config);
+	unsigned long (*reset_buffer)(struct coresight_device *csdev,
+				      struct perf_output_handle *handle,
+				      void *sink_config, bool *lost);
+	void (*update_buffer)(struct coresight_device *csdev,
+			      struct perf_output_handle *handle,
+			      void *sink_config);
 };
 
 /**
-- 
cgit v1.2.3


From 0bcbf2e30ff2271b54f54c8697a185f7d86ec6e4 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Wed, 17 Feb 2016 17:52:01 -0700
Subject: coresight: etm-perf: new PMU driver for ETM tracers

Perf is a well known and used tool for performance monitoring
and much more. A such it is an ideal candidate for integration
with coresight based HW tracing.

This patch introduces a PMU that represent a coresight tracer to
the Perf core.

Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/Makefile             |   3 +-
 drivers/hwtracing/coresight/coresight-etm-perf.c | 393 +++++++++++++++++++++++
 drivers/hwtracing/coresight/coresight-etm-perf.h |  32 ++
 drivers/hwtracing/coresight/coresight-etm3x.c    |   7 +
 include/linux/coresight-pmu.h                    |  27 ++
 5 files changed, 461 insertions(+), 1 deletion(-)
 create mode 100644 drivers/hwtracing/coresight/coresight-etm-perf.c
 create mode 100644 drivers/hwtracing/coresight/coresight-etm-perf.h
 create mode 100644 include/linux/coresight-pmu.h

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile
index 233d66cf22d3..cf8c6d689747 100644
--- a/drivers/hwtracing/coresight/Makefile
+++ b/drivers/hwtracing/coresight/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_CORESIGHT_SINK_ETBV10) += coresight-etb10.o
 obj-$(CONFIG_CORESIGHT_LINKS_AND_SINKS) += coresight-funnel.o \
 					   coresight-replicator.o
 obj-$(CONFIG_CORESIGHT_SOURCE_ETM3X) += coresight-etm3x.o coresight-etm-cp14.o \
-					coresight-etm3x-sysfs.o
+					coresight-etm3x-sysfs.o \
+					coresight-etm-perf.o
 obj-$(CONFIG_CORESIGHT_SOURCE_ETM4X) += coresight-etm4x.o
 obj-$(CONFIG_CORESIGHT_QCOM_REPLICATOR) += coresight-replicator-qcom.o
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
new file mode 100644
index 000000000000..36153a77e982
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -0,0 +1,393 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/coresight.h>
+#include <linux/coresight-pmu.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+#include "coresight-priv.h"
+
+static struct pmu etm_pmu;
+static bool etm_perf_up;
+
+/**
+ * struct etm_event_data - Coresight specifics associated to an event
+ * @work:		Handle to free allocated memory outside IRQ context.
+ * @mask:		Hold the CPU(s) this event was set for.
+ * @snk_config:		The sink configuration.
+ * @path:		An array of path, each slot for one CPU.
+ */
+struct etm_event_data {
+	struct work_struct work;
+	cpumask_t mask;
+	void *snk_config;
+	struct list_head **path;
+};
+
+static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle);
+static DEFINE_PER_CPU(struct coresight_device *, csdev_src);
+
+/* ETMv3.5/PTM's ETMCR is 'config' */
+PMU_FORMAT_ATTR(cycacc,		"config:" __stringify(ETM_OPT_CYCACC));
+PMU_FORMAT_ATTR(timestamp,	"config:" __stringify(ETM_OPT_TS));
+
+static struct attribute *etm_config_formats_attr[] = {
+	&format_attr_cycacc.attr,
+	&format_attr_timestamp.attr,
+	NULL,
+};
+
+static struct attribute_group etm_pmu_format_group = {
+	.name   = "format",
+	.attrs  = etm_config_formats_attr,
+};
+
+static const struct attribute_group *etm_pmu_attr_groups[] = {
+	&etm_pmu_format_group,
+	NULL,
+};
+
+static void etm_event_read(struct perf_event *event) {}
+
+static int etm_event_init(struct perf_event *event)
+{
+	if (event->attr.type != etm_pmu.type)
+		return -ENOENT;
+
+	return 0;
+}
+
+static void free_event_data(struct work_struct *work)
+{
+	int cpu;
+	cpumask_t *mask;
+	struct etm_event_data *event_data;
+	struct coresight_device *sink;
+
+	event_data = container_of(work, struct etm_event_data, work);
+	mask = &event_data->mask;
+	/*
+	 * First deal with the sink configuration.  See comment in
+	 * etm_setup_aux() about why we take the first available path.
+	 */
+	if (event_data->snk_config) {
+		cpu = cpumask_first(mask);
+		sink = coresight_get_sink(event_data->path[cpu]);
+		if (sink_ops(sink)->free_buffer)
+			sink_ops(sink)->free_buffer(event_data->snk_config);
+	}
+
+	for_each_cpu(cpu, mask) {
+		if (event_data->path[cpu])
+			coresight_release_path(event_data->path[cpu]);
+	}
+
+	kfree(event_data->path);
+	kfree(event_data);
+}
+
+static void *alloc_event_data(int cpu)
+{
+	int size;
+	cpumask_t *mask;
+	struct etm_event_data *event_data;
+
+	/* First get memory for the session's data */
+	event_data = kzalloc(sizeof(struct etm_event_data), GFP_KERNEL);
+	if (!event_data)
+		return NULL;
+
+	/* Make sure nothing disappears under us */
+	get_online_cpus();
+	size = num_online_cpus();
+
+	mask = &event_data->mask;
+	if (cpu != -1)
+		cpumask_set_cpu(cpu, mask);
+	else
+		cpumask_copy(mask, cpu_online_mask);
+	put_online_cpus();
+
+	/*
+	 * Each CPU has a single path between source and destination.  As such
+	 * allocate an array using CPU numbers as indexes.  That way a path
+	 * for any CPU can easily be accessed at any given time.  We proceed
+	 * the same way for sessions involving a single CPU.  The cost of
+	 * unused memory when dealing with single CPU trace scenarios is small
+	 * compared to the cost of searching through an optimized array.
+	 */
+	event_data->path = kcalloc(size,
+				   sizeof(struct list_head *), GFP_KERNEL);
+	if (!event_data->path) {
+		kfree(event_data);
+		return NULL;
+	}
+
+	return event_data;
+}
+
+static void etm_free_aux(void *data)
+{
+	struct etm_event_data *event_data = data;
+
+	schedule_work(&event_data->work);
+}
+
+static void *etm_setup_aux(int event_cpu, void **pages,
+			   int nr_pages, bool overwrite)
+{
+	int cpu;
+	cpumask_t *mask;
+	struct coresight_device *sink;
+	struct etm_event_data *event_data = NULL;
+
+	event_data = alloc_event_data(event_cpu);
+	if (!event_data)
+		return NULL;
+
+	INIT_WORK(&event_data->work, free_event_data);
+
+	mask = &event_data->mask;
+
+	/* Setup the path for each CPU in a trace session */
+	for_each_cpu(cpu, mask) {
+		struct coresight_device *csdev;
+
+		csdev = per_cpu(csdev_src, cpu);
+		if (!csdev)
+			goto err;
+
+		/*
+		 * Building a path doesn't enable it, it simply builds a
+		 * list of devices from source to sink that can be
+		 * referenced later when the path is actually needed.
+		 */
+		event_data->path[cpu] = coresight_build_path(csdev);
+		if (!event_data->path[cpu])
+			goto err;
+	}
+
+	/*
+	 * In theory nothing prevent tracers in a trace session from being
+	 * associated with different sinks, nor having a sink per tracer.  But
+	 * until we have HW with this kind of topology and a way to convey
+	 * sink assignement from the perf cmd line we need to assume tracers
+	 * in a trace session are using the same sink.  Therefore pick the sink
+	 * found at the end of the first available path.
+	 */
+	cpu = cpumask_first(mask);
+	/* Grab the sink at the end of the path */
+	sink = coresight_get_sink(event_data->path[cpu]);
+	if (!sink)
+		goto err;
+
+	if (!sink_ops(sink)->alloc_buffer)
+		goto err;
+
+	/* Get the AUX specific data from the sink buffer */
+	event_data->snk_config =
+			sink_ops(sink)->alloc_buffer(sink, cpu, pages,
+						     nr_pages, overwrite);
+	if (!event_data->snk_config)
+		goto err;
+
+out:
+	return event_data;
+
+err:
+	etm_free_aux(event_data);
+	event_data = NULL;
+	goto out;
+}
+
+static void etm_event_start(struct perf_event *event, int flags)
+{
+	int cpu = smp_processor_id();
+	struct etm_event_data *event_data;
+	struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle);
+	struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
+
+	if (!csdev)
+		goto fail;
+
+	/*
+	 * Deal with the ring buffer API and get a handle on the
+	 * session's information.
+	 */
+	event_data = perf_aux_output_begin(handle, event);
+	if (!event_data)
+		goto fail;
+
+	/* We need a sink, no need to continue without one */
+	sink = coresight_get_sink(event_data->path[cpu]);
+	if (WARN_ON_ONCE(!sink || !sink_ops(sink)->set_buffer))
+		goto fail_end_stop;
+
+	/* Configure the sink */
+	if (sink_ops(sink)->set_buffer(sink, handle,
+				       event_data->snk_config))
+		goto fail_end_stop;
+
+	/* Nothing will happen without a path */
+	if (coresight_enable_path(event_data->path[cpu], CS_MODE_PERF))
+		goto fail_end_stop;
+
+	/* Tell the perf core the event is alive */
+	event->hw.state = 0;
+
+	/* Finally enable the tracer */
+	if (source_ops(csdev)->enable(csdev, &event->attr, CS_MODE_PERF))
+		goto fail_end_stop;
+
+out:
+	return;
+
+fail_end_stop:
+	perf_aux_output_end(handle, 0, true);
+fail:
+	event->hw.state = PERF_HES_STOPPED;
+	goto out;
+}
+
+static void etm_event_stop(struct perf_event *event, int mode)
+{
+	bool lost;
+	int cpu = smp_processor_id();
+	unsigned long size;
+	struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
+	struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle);
+	struct etm_event_data *event_data = perf_get_aux(handle);
+
+	if (event->hw.state == PERF_HES_STOPPED)
+		return;
+
+	if (!csdev)
+		return;
+
+	sink = coresight_get_sink(event_data->path[cpu]);
+	if (!sink)
+		return;
+
+	/* stop tracer */
+	source_ops(csdev)->disable(csdev);
+
+	/* tell the core */
+	event->hw.state = PERF_HES_STOPPED;
+
+	if (mode & PERF_EF_UPDATE) {
+		if (WARN_ON_ONCE(handle->event != event))
+			return;
+
+		/* update trace information */
+		if (!sink_ops(sink)->update_buffer)
+			return;
+
+		sink_ops(sink)->update_buffer(sink, handle,
+					      event_data->snk_config);
+
+		if (!sink_ops(sink)->reset_buffer)
+			return;
+
+		size = sink_ops(sink)->reset_buffer(sink, handle,
+						    event_data->snk_config,
+						    &lost);
+
+		perf_aux_output_end(handle, size, lost);
+	}
+
+	/* Disabling the path make its elements available to other sessions */
+	coresight_disable_path(event_data->path[cpu]);
+}
+
+static int etm_event_add(struct perf_event *event, int mode)
+{
+	int ret = 0;
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (mode & PERF_EF_START) {
+		etm_event_start(event, 0);
+		if (hwc->state & PERF_HES_STOPPED)
+			ret = -EINVAL;
+	} else {
+		hwc->state = PERF_HES_STOPPED;
+	}
+
+	return ret;
+}
+
+static void etm_event_del(struct perf_event *event, int mode)
+{
+	etm_event_stop(event, PERF_EF_UPDATE);
+}
+
+int etm_perf_symlink(struct coresight_device *csdev, bool link)
+{
+	char entry[sizeof("cpu9999999")];
+	int ret = 0, cpu = source_ops(csdev)->cpu_id(csdev);
+	struct device *pmu_dev = etm_pmu.dev;
+	struct device *cs_dev = &csdev->dev;
+
+	sprintf(entry, "cpu%d", cpu);
+
+	if (!etm_perf_up)
+		return -EPROBE_DEFER;
+
+	if (link) {
+		ret = sysfs_create_link(&pmu_dev->kobj, &cs_dev->kobj, entry);
+		if (ret)
+			return ret;
+		per_cpu(csdev_src, cpu) = csdev;
+	} else {
+		sysfs_remove_link(&pmu_dev->kobj, entry);
+		per_cpu(csdev_src, cpu) = NULL;
+	}
+
+	return 0;
+}
+
+static int __init etm_perf_init(void)
+{
+	int ret;
+
+	etm_pmu.capabilities	= PERF_PMU_CAP_EXCLUSIVE;
+
+	etm_pmu.attr_groups	= etm_pmu_attr_groups;
+	etm_pmu.task_ctx_nr	= perf_sw_context;
+	etm_pmu.read		= etm_event_read;
+	etm_pmu.event_init	= etm_event_init;
+	etm_pmu.setup_aux	= etm_setup_aux;
+	etm_pmu.free_aux	= etm_free_aux;
+	etm_pmu.start		= etm_event_start;
+	etm_pmu.stop		= etm_event_stop;
+	etm_pmu.add		= etm_event_add;
+	etm_pmu.del		= etm_event_del;
+
+	ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1);
+	if (ret == 0)
+		etm_perf_up = true;
+
+	return ret;
+}
+module_init(etm_perf_init);
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h
new file mode 100644
index 000000000000..87f5a134eb6f
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _CORESIGHT_ETM_PERF_H
+#define _CORESIGHT_ETM_PERF_H
+
+struct coresight_device;
+
+#ifdef CONFIG_CORESIGHT
+int etm_perf_symlink(struct coresight_device *csdev, bool link);
+
+#else
+static inline int etm_perf_symlink(struct coresight_device *csdev, bool link)
+{ return -EINVAL; }
+
+#endif /* CONFIG_CORESIGHT */
+
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
index a9b820ec16aa..77b37413803f 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -35,6 +35,7 @@
 #include <asm/sections.h>
 
 #include "coresight-etm.h"
+#include "coresight-etm-perf.h"
 
 static int boot_enable;
 module_param_named(boot_enable, boot_enable, int, S_IRUGO);
@@ -827,6 +828,12 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id)
 		goto err_arch_supported;
 	}
 
+	ret = etm_perf_symlink(drvdata->csdev, true);
+	if (ret) {
+		coresight_unregister(drvdata->csdev);
+		goto err_arch_supported;
+	}
+
 	pm_runtime_put(&adev->dev);
 	dev_info(dev, "%s initialized\n", (char *)id->data);
 
diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h
new file mode 100644
index 000000000000..6c5386b23b10
--- /dev/null
+++ b/include/linux/coresight-pmu.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LINUX_CORESIGHT_PMU_H
+#define _LINUX_CORESIGHT_PMU_H
+
+#define CORESIGHT_ETM_PMU_NAME "cs_etm"
+
+/* ETMv3.5/PTM's ETMCR config bit */
+#define ETM_OPT_CYCACC  12
+#define ETM_OPT_TS      28
+
+#endif
-- 
cgit v1.2.3


From 17534ceb835a1a96eb921a2a80df168723d6570a Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Wed, 17 Feb 2016 17:52:02 -0700
Subject: coresight: introducing a global trace ID function

TraceID values have to be unique for all tracers and
consistent between drivers and user space.  As such
introducing a central function to be used whenever a
traceID value is required.

The patch also account for data traceIDs, which are usually
I(N) + 1.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight-etm3x.c |  7 ++-----
 include/linux/coresight-pmu.h                 | 12 ++++++++++++
 2 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
index 77b37413803f..0ba1a3981373 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -27,6 +27,7 @@
 #include <linux/cpu.h>
 #include <linux/of.h>
 #include <linux/coresight.h>
+#include <linux/coresight-pmu.h>
 #include <linux/amba/bus.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
@@ -740,11 +741,7 @@ static void etm_init_arch_data(void *info)
 
 static void etm_init_trace_id(struct etm_drvdata *drvdata)
 {
-	/*
-	 * A trace ID of value 0 is invalid, so let's start at some
-	 * random value that fits in 7 bits and go from there.
-	 */
-	drvdata->traceid = 0x10 + drvdata->cpu;
+	drvdata->traceid = coresight_get_trace_id(drvdata->cpu);
 }
 
 static int etm_probe(struct amba_device *adev, const struct amba_id *id)
diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h
index 6c5386b23b10..7d410260661b 100644
--- a/include/linux/coresight-pmu.h
+++ b/include/linux/coresight-pmu.h
@@ -19,9 +19,21 @@
 #define _LINUX_CORESIGHT_PMU_H
 
 #define CORESIGHT_ETM_PMU_NAME "cs_etm"
+#define CORESIGHT_ETM_PMU_SEED  0x10
 
 /* ETMv3.5/PTM's ETMCR config bit */
 #define ETM_OPT_CYCACC  12
 #define ETM_OPT_TS      28
 
+static inline int coresight_get_trace_id(int cpu)
+{
+	/*
+	 * A trace ID of value 0 is invalid, so let's start at some
+	 * random value that fits in 7 bits and go from there.  Since
+	 * the common convention is to have data trace IDs be I(N) + 1,
+	 * set instruction trace IDs as a function of the CPU number.
+	 */
+	return (CORESIGHT_ETM_PMU_SEED + (cpu * 2));
+}
+
 #endif
-- 
cgit v1.2.3


From 941943cf519f7cacbbcecee5c4ef4b77b466bd5c Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 17 Feb 2016 17:52:03 -0700
Subject: drivers/hwtracing: make coresight-* explicitly non-modular

None of the Kconfig currently controlling compilation of any of
the files here are tristate, meaning that none of it currently
is being built as a module by anyone.

We need not be concerned about .remove functions and blocking the
unbind sysfs operations, since that was already done in a recent
commit.

Lets remove any remaining modular references, so that when reading the
drivers there is no doubt they are builtin-only.

All drivers get mostly the same changes, so they are handled in batch.
Changes are (1) convert to builtin_amba_driver, (2) delete module.h
include where unused, and (3) relocate the description into the
comments so we don't need MODULE_DESCRIPTION and associated tags.

The etm3x and etm4x use module_param_named, and have been adjusted
to just include moduleparam.h for that purpose.

In commit f309d4443130bf814e991f836e919dca22df37ae ("platform_device:
better support builtin boilerplate avoidance") we introduced the
builtin_driver macro.

Here we use that support and extend it to amba driver registration,
so where a driver is clearly non-modular and builtin-only, we can
update with the simple mapping of

     module_amba_driver(...)  ---> builtin_amba_driver(...)

Since module_amba_driver() uses the same init level priority as
builtin_amba_driver() the init ordering remains unchanged with
this commit.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight-etb10.c           |  9 +++------
 drivers/hwtracing/coresight/coresight-etm3x.c           | 14 ++++++++------
 drivers/hwtracing/coresight/coresight-etm4x.c           |  4 +---
 drivers/hwtracing/coresight/coresight-funnel.c          |  9 +++------
 drivers/hwtracing/coresight/coresight-replicator-qcom.c |  4 +---
 drivers/hwtracing/coresight/coresight-replicator.c      |  7 ++-----
 drivers/hwtracing/coresight/coresight-tmc.c             |  9 +++------
 drivers/hwtracing/coresight/coresight-tpiu.c            |  9 +++------
 drivers/hwtracing/coresight/coresight.c                 |  3 ---
 drivers/hwtracing/coresight/of_coresight.c              |  1 -
 include/linux/amba/bus.h                                |  9 +++++++++
 11 files changed, 33 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index a2eb6bdeaafa..acbce79934d6 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -1,4 +1,6 @@
 /* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Embedded Trace Buffer driver
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -12,7 +14,6 @@
 
 #include <asm/local.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/device.h>
@@ -781,8 +782,4 @@ static struct amba_driver etb_driver = {
 	.probe		= etb_probe,
 	.id_table	= etb_ids,
 };
-
-module_amba_driver(etb_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("CoreSight Embedded Trace Buffer driver");
+builtin_amba_driver(etb_driver);
diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
index 0ba1a3981373..d83ab82672e4 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -1,4 +1,6 @@
 /* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Program Flow Trace driver
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -11,7 +13,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/device.h>
@@ -38,6 +40,10 @@
 #include "coresight-etm.h"
 #include "coresight-etm-perf.h"
 
+/*
+ * Not really modular but using module_param is the easiest way to
+ * remain consistent with existing use cases for now.
+ */
 static int boot_enable;
 module_param_named(boot_enable, boot_enable, int, S_IRUGO);
 
@@ -912,8 +918,4 @@ static struct amba_driver etm_driver = {
 	.probe		= etm_probe,
 	.id_table	= etm_ids,
 };
-
-module_amba_driver(etm_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("CoreSight Program Flow Trace driver");
+builtin_amba_driver(etm_driver);
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index d0169ba7fbf2..1c59bd36834c 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -15,7 +15,6 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/device.h>
-#include <linux/module.h>
 #include <linux/io.h>
 #include <linux/err.h>
 #include <linux/fs.h>
@@ -2710,5 +2709,4 @@ static struct amba_driver etm4x_driver = {
 	.probe		= etm4_probe,
 	.id_table	= etm4_ids,
 };
-
-module_amba_driver(etm4x_driver);
+builtin_amba_driver(etm4x_driver);
diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c
index fb2c679fbc44..0600ca30649d 100644
--- a/drivers/hwtracing/coresight/coresight-funnel.c
+++ b/drivers/hwtracing/coresight/coresight-funnel.c
@@ -1,4 +1,6 @@
 /* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Funnel driver
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -11,7 +13,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/device.h>
@@ -268,8 +269,4 @@ static struct amba_driver funnel_driver = {
 	.probe		= funnel_probe,
 	.id_table	= funnel_ids,
 };
-
-module_amba_driver(funnel_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("CoreSight Funnel driver");
+builtin_amba_driver(funnel_driver);
diff --git a/drivers/hwtracing/coresight/coresight-replicator-qcom.c b/drivers/hwtracing/coresight/coresight-replicator-qcom.c
index 286f90b50989..700f710e4bfa 100644
--- a/drivers/hwtracing/coresight/coresight-replicator-qcom.c
+++ b/drivers/hwtracing/coresight/coresight-replicator-qcom.c
@@ -15,7 +15,6 @@
 #include <linux/clk.h>
 #include <linux/coresight.h>
 #include <linux/device.h>
-#include <linux/module.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/io.h>
@@ -198,5 +197,4 @@ static struct amba_driver replicator_driver = {
 	.probe		= replicator_probe,
 	.id_table	= replicator_ids,
 };
-
-module_amba_driver(replicator_driver);
+builtin_amba_driver(replicator_driver);
diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c
index 0ce98903992c..4299c0569340 100644
--- a/drivers/hwtracing/coresight/coresight-replicator.c
+++ b/drivers/hwtracing/coresight/coresight-replicator.c
@@ -1,4 +1,6 @@
 /* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Replicator driver
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -11,7 +13,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
@@ -166,8 +167,4 @@ static struct platform_driver replicator_driver = {
 		.suppress_bind_attrs = true,
 	},
 };
-
 builtin_platform_driver(replicator_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("CoreSight Replicator driver");
diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c
index ac91b0b40ec2..1be191f5d39c 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.c
+++ b/drivers/hwtracing/coresight/coresight-tmc.c
@@ -1,4 +1,6 @@
 /* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Trace Memory Controller driver
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -11,7 +13,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/device.h>
@@ -782,8 +783,4 @@ static struct amba_driver tmc_driver = {
 	.probe		= tmc_probe,
 	.id_table	= tmc_ids,
 };
-
-module_amba_driver(tmc_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("CoreSight Trace Memory Controller driver");
+builtin_amba_driver(tmc_driver);
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
index 71582f6dfd4c..8fb09d9237ab 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -1,4 +1,6 @@
 /* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * Description: CoreSight Trace Port Interface Unit driver
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -11,7 +13,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/io.h>
@@ -218,8 +219,4 @@ static struct amba_driver tpiu_driver = {
 	.probe		= tpiu_probe,
 	.id_table	= tpiu_ids,
 };
-
-module_amba_driver(tpiu_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("CoreSight Trace Port Interface Unit driver");
+builtin_amba_driver(tpiu_driver);
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 6ec2b66af9ee..2ea5961092c1 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -11,7 +11,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/device.h>
@@ -894,5 +893,3 @@ void coresight_unregister(struct coresight_device *csdev)
 	device_unregister(&csdev->dev);
 }
 EXPORT_SYMBOL_GPL(coresight_unregister);
-
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
index 3cc57c1e3b5d..b68da1888fd5 100644
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -10,7 +10,6 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/err.h>
 #include <linux/slab.h>
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 9006c4e75cf7..3d8dcdd1aeae 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -163,4 +163,13 @@ struct amba_device name##_device = {				\
 #define module_amba_driver(__amba_drv) \
 	module_driver(__amba_drv, amba_driver_register, amba_driver_unregister)
 
+/*
+ * builtin_amba_driver() - Helper macro for drivers that don't do anything
+ * special in driver initcall.  This eliminates a lot of boilerplate.  Each
+ * driver may only use this macro once, and calling it replaces the instance
+ * device_initcall().
+ */
+#define builtin_amba_driver(__amba_drv) \
+	builtin_driver(__amba_drv, amba_driver_register)
+
 #endif
-- 
cgit v1.2.3


From cf2222178645e545e96717b2825601321ce4745c Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Thu, 14 Jan 2016 17:57:47 -0500
Subject: ima: define a new hook to measure and appraise a file already in
 memory

This patch defines a new IMA hook ima_post_read_file() for measuring
and appraising files read by the kernel. The caller loads the file into
memory before calling this function, which calculates the hash followed by
the normal IMA policy based processing.

Changelog v5:
- fail ima_post_read_file() if either file or buf is NULL
v3:
- rename ima_hash_and_process_file() to ima_post_read_file()

v1:
- split patch

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Acked-by: Dmitry Kasatkin <dmitry.kasatkin@huawei.com>
---
 include/linux/ima.h                   |  8 +++++++
 include/linux/security.h              |  1 +
 security/integrity/ima/ima.h          |  4 +++-
 security/integrity/ima/ima_api.c      |  6 +++--
 security/integrity/ima/ima_appraise.c |  2 +-
 security/integrity/ima/ima_main.c     | 45 ++++++++++++++++++++++++++++-------
 security/integrity/ima/ima_policy.c   |  1 +
 security/integrity/integrity.h        |  7 ++++--
 security/security.c                   |  7 +++++-
 9 files changed, 66 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 120ccc53fcb7..d29a6a23fc19 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -20,6 +20,8 @@ extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern int ima_module_check(struct file *file);
 extern int ima_fw_from_file(struct file *file, char *buf, size_t size);
+extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
+			      enum kernel_read_file_id id);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -52,6 +54,12 @@ static inline int ima_fw_from_file(struct file *file, char *buf, size_t size)
 	return 0;
 }
 
+static inline int ima_post_read_file(struct file *file, void *buf, loff_t size,
+				     enum kernel_read_file_id id)
+{
+	return 0;
+}
+
 #endif /* CONFIG_IMA */
 
 #ifdef CONFIG_IMA_APPRAISE
diff --git a/include/linux/security.h b/include/linux/security.h
index b68ce94e4e00..d920718dc845 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -24,6 +24,7 @@
 
 #include <linux/key.h>
 #include <linux/capability.h>
+#include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/string.h>
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 2c5262f2823f..0b7134c04165 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -19,6 +19,7 @@
 
 #include <linux/types.h>
 #include <linux/crypto.h>
+#include <linux/fs.h>
 #include <linux/security.h>
 #include <linux/hash.h>
 #include <linux/tpm.h>
@@ -152,7 +153,8 @@ enum ima_hooks {
 int ima_get_action(struct inode *inode, int mask, enum ima_hooks func);
 int ima_must_measure(struct inode *inode, int mask, enum ima_hooks func);
 int ima_collect_measurement(struct integrity_iint_cache *iint,
-			    struct file *file, enum hash_algo algo);
+			    struct file *file, void *buf, loff_t size,
+			    enum hash_algo algo);
 void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file,
 			   const unsigned char *filename,
 			   struct evm_ima_xattr_data *xattr_value,
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index 8750254506a9..370e42dfc5c5 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -188,7 +188,8 @@ int ima_get_action(struct inode *inode, int mask, enum ima_hooks func)
  * Return 0 on success, error code otherwise
  */
 int ima_collect_measurement(struct integrity_iint_cache *iint,
-			    struct file *file, enum hash_algo algo)
+			    struct file *file, void *buf, loff_t size,
+			    enum hash_algo algo)
 {
 	const char *audit_cause = "failed";
 	struct inode *inode = file_inode(file);
@@ -210,7 +211,8 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
 
 		hash.hdr.algo = algo;
 
-		result = ima_calc_file_hash(file, &hash.hdr);
+		result = (!buf) ?  ima_calc_file_hash(file, &hash.hdr) :
+			ima_calc_buffer_hash(buf, size, &hash.hdr);
 		if (!result) {
 			int length = sizeof(hash.hdr) + hash.hdr.length;
 			void *tmpbuf = krealloc(iint->ima_hash, length,
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 288844908788..cb0d0ff1137b 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -300,7 +300,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
 	if (iint->flags & IMA_DIGSIG)
 		return;
 
-	rc = ima_collect_measurement(iint, file, ima_hash_algo);
+	rc = ima_collect_measurement(iint, file, NULL, 0, ima_hash_algo);
 	if (rc < 0)
 		return;
 
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 1be99a27a7f3..757765354158 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -153,8 +153,8 @@ void ima_file_free(struct file *file)
 	ima_check_last_writer(iint, inode, file);
 }
 
-static int process_measurement(struct file *file, int mask,
-			       enum ima_hooks func, int opened)
+static int process_measurement(struct file *file, char *buf, loff_t size,
+			       int mask, enum ima_hooks func, int opened)
 {
 	struct inode *inode = file_inode(file);
 	struct integrity_iint_cache *iint = NULL;
@@ -226,7 +226,7 @@ static int process_measurement(struct file *file, int mask,
 
 	hash_algo = ima_get_hash_algo(xattr_value, xattr_len);
 
-	rc = ima_collect_measurement(iint, file, hash_algo);
+	rc = ima_collect_measurement(iint, file, buf, size, hash_algo);
 	if (rc != 0) {
 		if (file->f_flags & O_DIRECT)
 			rc = (iint->flags & IMA_PERMIT_DIRECTIO) ? 0 : -EACCES;
@@ -273,7 +273,8 @@ out:
 int ima_file_mmap(struct file *file, unsigned long prot)
 {
 	if (file && (prot & PROT_EXEC))
-		return process_measurement(file, MAY_EXEC, MMAP_CHECK, 0);
+		return process_measurement(file, NULL, 0, MAY_EXEC,
+					   MMAP_CHECK, 0);
 	return 0;
 }
 
@@ -292,7 +293,8 @@ int ima_file_mmap(struct file *file, unsigned long prot)
  */
 int ima_bprm_check(struct linux_binprm *bprm)
 {
-	return process_measurement(bprm->file, MAY_EXEC, BPRM_CHECK, 0);
+	return process_measurement(bprm->file, NULL, 0, MAY_EXEC,
+				   BPRM_CHECK, 0);
 }
 
 /**
@@ -307,7 +309,7 @@ int ima_bprm_check(struct linux_binprm *bprm)
  */
 int ima_file_check(struct file *file, int mask, int opened)
 {
-	return process_measurement(file,
+	return process_measurement(file, NULL, 0,
 				   mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
 				   FILE_CHECK, opened);
 }
@@ -332,7 +334,7 @@ int ima_module_check(struct file *file)
 #endif
 		return 0;	/* We rely on module signature checking */
 	}
-	return process_measurement(file, MAY_EXEC, MODULE_CHECK, 0);
+	return process_measurement(file, NULL, 0, MAY_EXEC, MODULE_CHECK, 0);
 }
 
 int ima_fw_from_file(struct file *file, char *buf, size_t size)
@@ -343,7 +345,34 @@ int ima_fw_from_file(struct file *file, char *buf, size_t size)
 			return -EACCES;	/* INTEGRITY_UNKNOWN */
 		return 0;
 	}
-	return process_measurement(file, MAY_EXEC, FIRMWARE_CHECK, 0);
+	return process_measurement(file, NULL, 0, MAY_EXEC, FIRMWARE_CHECK, 0);
+}
+
+/**
+ * ima_post_read_file - in memory collect/appraise/audit measurement
+ * @file: pointer to the file to be measured/appraised/audit
+ * @buf: pointer to in memory file contents
+ * @size: size of in memory file contents
+ * @read_id: caller identifier
+ *
+ * Measure/appraise/audit in memory file based on policy.  Policy rules
+ * are written in terms of a policy identifier.
+ *
+ * On success return 0.  On integrity appraisal error, assuming the file
+ * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
+ */
+int ima_post_read_file(struct file *file, void *buf, loff_t size,
+		       enum kernel_read_file_id read_id)
+{
+	enum ima_hooks func = FILE_CHECK;
+
+	if (!file || !buf || size == 0) { /* should never happen */
+		if (ima_appraise & IMA_APPRAISE_ENFORCE)
+			return -EACCES;
+		return 0;
+	}
+
+	return process_measurement(file, buf, size, MAY_READ, func, 0);
 }
 
 static int __init init_ima(void)
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index b089ebef6648..cfbe86f476d0 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -12,6 +12,7 @@
  */
 #include <linux/module.h>
 #include <linux/list.h>
+#include <linux/fs.h>
 #include <linux/security.h>
 #include <linux/magic.h>
 #include <linux/parser.h>
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 5efe2ecc538d..9a0ea4c4e3dd 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -49,12 +49,14 @@
 #define IMA_MODULE_APPRAISED	0x00008000
 #define IMA_FIRMWARE_APPRAISE	0x00010000
 #define IMA_FIRMWARE_APPRAISED	0x00020000
+#define IMA_READ_APPRAISE	0x00040000
+#define IMA_READ_APPRAISED	0x00080000
 #define IMA_APPRAISE_SUBMASK	(IMA_FILE_APPRAISE | IMA_MMAP_APPRAISE | \
 				 IMA_BPRM_APPRAISE | IMA_MODULE_APPRAISE | \
-				 IMA_FIRMWARE_APPRAISE)
+				 IMA_FIRMWARE_APPRAISE | IMA_READ_APPRAISE)
 #define IMA_APPRAISED_SUBMASK	(IMA_FILE_APPRAISED | IMA_MMAP_APPRAISED | \
 				 IMA_BPRM_APPRAISED | IMA_MODULE_APPRAISED | \
-				 IMA_FIRMWARE_APPRAISED)
+				 IMA_FIRMWARE_APPRAISED | IMA_READ_APPRAISED)
 
 enum evm_ima_xattr_type {
 	IMA_XATTR_DIGEST = 0x01,
@@ -111,6 +113,7 @@ struct integrity_iint_cache {
 	enum integrity_status ima_bprm_status:4;
 	enum integrity_status ima_module_status:4;
 	enum integrity_status ima_firmware_status:4;
+	enum integrity_status ima_read_status:4;
 	enum integrity_status evm_status:4;
 	struct ima_digest_data *ima_hash;
 };
diff --git a/security/security.c b/security/security.c
index 5b96eabaafd4..ef4c65a9fd17 100644
--- a/security/security.c
+++ b/security/security.c
@@ -913,7 +913,12 @@ int security_kernel_module_from_file(struct file *file)
 int security_kernel_post_read_file(struct file *file, char *buf, loff_t size,
 				   enum kernel_read_file_id id)
 {
-	return call_int_hook(kernel_post_read_file, 0, file, buf, size, id);
+	int ret;
+
+	ret = call_int_hook(kernel_post_read_file, 0, file, buf, size, id);
+	if (ret)
+		return ret;
+	return ima_post_read_file(file, buf, size, id);
 }
 EXPORT_SYMBOL_GPL(security_kernel_post_read_file);
 
-- 
cgit v1.2.3


From 09596b94f7d28595602482e69ed954deab707437 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Thu, 19 Nov 2015 12:39:22 -0500
Subject: vfs: define kernel_read_file_from_path

This patch defines kernel_read_file_from_path(), a wrapper for the VFS
common kernel_read_file().

Changelog:
- revert error msg regression - reported by Sergey Senozhatsky
- Separated from the IMA patch

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c          | 19 +++++++++++++++++++
 include/linux/fs.h |  2 ++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 1138dc502c77..64cb3bc788c1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -884,6 +884,25 @@ out:
 }
 EXPORT_SYMBOL_GPL(kernel_read_file);
 
+int kernel_read_file_from_path(char *path, void **buf, loff_t *size,
+			       loff_t max_size, enum kernel_read_file_id id)
+{
+	struct file *file;
+	int ret;
+
+	if (!path || !*path)
+		return -EINVAL;
+
+	file = filp_open(path, O_RDONLY, 0);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ret = kernel_read_file(file, buf, size, max_size, id);
+	fput(file);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kernel_read_file_from_path);
+
 ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
 {
 	ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index aa84bcb9c368..00fa5c45fd63 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2583,6 +2583,8 @@ enum kernel_read_file_id {
 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
 extern int kernel_read_file(struct file *, void **, loff_t *, loff_t,
 			    enum kernel_read_file_id);
+extern int kernel_read_file_from_path(char *, void **, loff_t *, loff_t,
+				      enum kernel_read_file_id);
 extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t);
 extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
 extern struct file * open_exec(const char *);
-- 
cgit v1.2.3


From e40ba6d56b41754b37b995dbc8035b2b3a6afd8a Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Thu, 19 Nov 2015 12:39:22 -0500
Subject: firmware: replace call to fw_read_file_contents() with kernel version

Replace the fw_read_file_contents with kernel_file_read_from_path().

Although none of the upstreamed LSMs define a kernel_fw_from_file hook,
IMA is called by the security function to prevent unsigned firmware from
being loaded and to measure/appraise signed firmware, based on policy.

Instead of reading the firmware twice, once for measuring/appraising the
firmware and again for reading the firmware contents into memory, the
kernel_post_read_file() security hook calculates the file hash based on
the in memory file buffer.  The firmware is read once.

This patch removes the LSM kernel_fw_from_file() hook and security call.

Changelog v4+:
- revert dropped buf->size assignment - reported by Sergey Senozhatsky
v3:
- remove kernel_fw_from_file hook
- use kernel_file_read_from_path() - requested by Luis
v2:
- reordered and squashed firmware patches
- fix MAX firmware size (Kees Cook)

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Luis R. Rodriguez <mcgrof@kernel.org>
---
 drivers/base/firmware_class.c     | 52 ++++++++-------------------------------
 include/linux/fs.h                |  1 +
 include/linux/ima.h               |  6 -----
 include/linux/lsm_hooks.h         | 11 ---------
 include/linux/security.h          |  7 ------
 security/integrity/ima/ima_main.c | 21 ++++++++--------
 security/security.c               | 13 ----------
 7 files changed, 21 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index c743a2f18c33..a414008ea64c 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -23,6 +23,7 @@
 #include <linux/sched.h>
 #include <linux/file.h>
 #include <linux/list.h>
+#include <linux/fs.h>
 #include <linux/async.h>
 #include <linux/pm.h>
 #include <linux/suspend.h>
@@ -291,37 +292,6 @@ static const char * const fw_path[] = {
 module_param_string(path, fw_path_para, sizeof(fw_path_para), 0644);
 MODULE_PARM_DESC(path, "customized firmware image search path with a higher priority than default path");
 
-static int fw_read_file_contents(struct file *file, struct firmware_buf *fw_buf)
-{
-	int size;
-	char *buf;
-	int rc;
-
-	if (!S_ISREG(file_inode(file)->i_mode))
-		return -EINVAL;
-	size = i_size_read(file_inode(file));
-	if (size <= 0)
-		return -EINVAL;
-	buf = vmalloc(size);
-	if (!buf)
-		return -ENOMEM;
-	rc = kernel_read(file, 0, buf, size);
-	if (rc != size) {
-		if (rc > 0)
-			rc = -EIO;
-		goto fail;
-	}
-	rc = security_kernel_fw_from_file(file, buf, size);
-	if (rc)
-		goto fail;
-	fw_buf->data = buf;
-	fw_buf->size = size;
-	return 0;
-fail:
-	vfree(buf);
-	return rc;
-}
-
 static void fw_finish_direct_load(struct device *device,
 				  struct firmware_buf *buf)
 {
@@ -334,6 +304,7 @@ static void fw_finish_direct_load(struct device *device,
 static int fw_get_filesystem_firmware(struct device *device,
 				       struct firmware_buf *buf)
 {
+	loff_t size;
 	int i, len;
 	int rc = -ENOENT;
 	char *path;
@@ -343,8 +314,6 @@ static int fw_get_filesystem_firmware(struct device *device,
 		return -ENOMEM;
 
 	for (i = 0; i < ARRAY_SIZE(fw_path); i++) {
-		struct file *file;
-
 		/* skip the unset customized path */
 		if (!fw_path[i][0])
 			continue;
@@ -356,18 +325,16 @@ static int fw_get_filesystem_firmware(struct device *device,
 			break;
 		}
 
-		file = filp_open(path, O_RDONLY, 0);
-		if (IS_ERR(file))
-			continue;
-		rc = fw_read_file_contents(file, buf);
-		fput(file);
+		buf->size = 0;
+		rc = kernel_read_file_from_path(path, &buf->data, &size,
+						INT_MAX, READING_FIRMWARE);
 		if (rc) {
 			dev_warn(device, "loading %s failed with error %d\n",
 				 path, rc);
 			continue;
 		}
-		dev_dbg(device, "direct-loading %s\n",
-			buf->fw_id);
+		dev_dbg(device, "direct-loading %s\n", buf->fw_id);
+		buf->size = size;
 		fw_finish_direct_load(device, buf);
 		break;
 	}
@@ -689,8 +656,9 @@ static ssize_t firmware_loading_store(struct device *dev,
 				dev_err(dev, "%s: map pages failed\n",
 					__func__);
 			else
-				rc = security_kernel_fw_from_file(NULL,
-						fw_buf->data, fw_buf->size);
+				rc = security_kernel_post_read_file(NULL,
+						fw_buf->data, fw_buf->size,
+						READING_FIRMWARE);
 
 			/*
 			 * Same logic as fw_load_abort, only the DONE bit
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 00fa5c45fd63..c8bc4d8c843f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2577,6 +2577,7 @@ static inline void i_readcount_inc(struct inode *inode)
 extern int do_pipe_flags(int *, int);
 
 enum kernel_read_file_id {
+	READING_FIRMWARE = 1,
 	READING_MAX_ID
 };
 
diff --git a/include/linux/ima.h b/include/linux/ima.h
index d29a6a23fc19..7aea4863c244 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -19,7 +19,6 @@ extern int ima_file_check(struct file *file, int mask, int opened);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern int ima_module_check(struct file *file);
-extern int ima_fw_from_file(struct file *file, char *buf, size_t size);
 extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
 			      enum kernel_read_file_id id);
 
@@ -49,11 +48,6 @@ static inline int ima_module_check(struct file *file)
 	return 0;
 }
 
-static inline int ima_fw_from_file(struct file *file, char *buf, size_t size)
-{
-	return 0;
-}
-
 static inline int ima_post_read_file(struct file *file, void *buf, loff_t size,
 				     enum kernel_read_file_id id)
 {
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 2337f33913c1..7d04a1220223 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -541,15 +541,6 @@
  *	@inode points to the inode to use as a reference.
  *	The current task must be the one that nominated @inode.
  *	Return 0 if successful.
- * @kernel_fw_from_file:
- *	Load firmware from userspace (not called for built-in firmware).
- *	@file contains the file structure pointing to the file containing
- *	the firmware to load. This argument will be NULL if the firmware
- *	was loaded via the uevent-triggered blob-based interface exposed
- *	by CONFIG_FW_LOADER_USER_HELPER.
- *	@buf pointer to buffer containing firmware contents.
- *	@size length of the firmware contents.
- *	Return 0 if permission is granted.
  * @kernel_module_request:
  *	Ability to trigger the kernel to automatically upcall to userspace for
  *	userspace to load a kernel module with the given name.
@@ -1462,7 +1453,6 @@ union security_list_options {
 	void (*cred_transfer)(struct cred *new, const struct cred *old);
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
-	int (*kernel_fw_from_file)(struct file *file, char *buf, size_t size);
 	int (*kernel_module_request)(char *kmod_name);
 	int (*kernel_module_from_file)(struct file *file);
 	int (*kernel_post_read_file)(struct file *file, char *buf, loff_t size,
@@ -1725,7 +1715,6 @@ struct security_hook_heads {
 	struct list_head cred_transfer;
 	struct list_head kernel_act_as;
 	struct list_head kernel_create_files_as;
-	struct list_head kernel_fw_from_file;
 	struct list_head kernel_post_read_file;
 	struct list_head kernel_module_request;
 	struct list_head kernel_module_from_file;
diff --git a/include/linux/security.h b/include/linux/security.h
index d920718dc845..cee1349e1155 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -300,7 +300,6 @@ int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
 void security_transfer_creds(struct cred *new, const struct cred *old);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
-int security_kernel_fw_from_file(struct file *file, char *buf, size_t size);
 int security_kernel_module_request(char *kmod_name);
 int security_kernel_module_from_file(struct file *file);
 int security_kernel_post_read_file(struct file *file, char *buf, loff_t size,
@@ -854,12 +853,6 @@ static inline int security_kernel_create_files_as(struct cred *cred,
 	return 0;
 }
 
-static inline int security_kernel_fw_from_file(struct file *file,
-					       char *buf, size_t size)
-{
-	return 0;
-}
-
 static inline int security_kernel_module_request(char *kmod_name)
 {
 	return 0;
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 757765354158..e9651be17b72 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -337,17 +337,6 @@ int ima_module_check(struct file *file)
 	return process_measurement(file, NULL, 0, MAY_EXEC, MODULE_CHECK, 0);
 }
 
-int ima_fw_from_file(struct file *file, char *buf, size_t size)
-{
-	if (!file) {
-		if ((ima_appraise & IMA_APPRAISE_FIRMWARE) &&
-		    (ima_appraise & IMA_APPRAISE_ENFORCE))
-			return -EACCES;	/* INTEGRITY_UNKNOWN */
-		return 0;
-	}
-	return process_measurement(file, NULL, 0, MAY_EXEC, FIRMWARE_CHECK, 0);
-}
-
 /**
  * ima_post_read_file - in memory collect/appraise/audit measurement
  * @file: pointer to the file to be measured/appraised/audit
@@ -366,12 +355,22 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size,
 {
 	enum ima_hooks func = FILE_CHECK;
 
+	if (!file && read_id == READING_FIRMWARE) {
+		if ((ima_appraise & IMA_APPRAISE_FIRMWARE) &&
+		    (ima_appraise & IMA_APPRAISE_ENFORCE))
+			return -EACCES;	/* INTEGRITY_UNKNOWN */
+		return 0;
+	}
+
 	if (!file || !buf || size == 0) { /* should never happen */
 		if (ima_appraise & IMA_APPRAISE_ENFORCE)
 			return -EACCES;
 		return 0;
 	}
 
+	if (read_id == READING_FIRMWARE)
+		func = FIRMWARE_CHECK;
+
 	return process_measurement(file, buf, size, MAY_READ, func, 0);
 }
 
diff --git a/security/security.c b/security/security.c
index ef4c65a9fd17..cd85be61c416 100644
--- a/security/security.c
+++ b/security/security.c
@@ -884,17 +884,6 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode)
 	return call_int_hook(kernel_create_files_as, 0, new, inode);
 }
 
-int security_kernel_fw_from_file(struct file *file, char *buf, size_t size)
-{
-	int ret;
-
-	ret = call_int_hook(kernel_fw_from_file, 0, file, buf, size);
-	if (ret)
-		return ret;
-	return ima_fw_from_file(file, buf, size);
-}
-EXPORT_SYMBOL_GPL(security_kernel_fw_from_file);
-
 int security_kernel_module_request(char *kmod_name)
 {
 	return call_int_hook(kernel_module_request, 0, kmod_name);
@@ -1703,8 +1692,6 @@ struct security_hook_heads security_hook_heads = {
 		LIST_HEAD_INIT(security_hook_heads.kernel_act_as),
 	.kernel_create_files_as =
 		LIST_HEAD_INIT(security_hook_heads.kernel_create_files_as),
-	.kernel_fw_from_file =
-		LIST_HEAD_INIT(security_hook_heads.kernel_fw_from_file),
 	.kernel_module_request =
 		LIST_HEAD_INIT(security_hook_heads.kernel_module_request),
 	.kernel_module_from_file =
-- 
cgit v1.2.3


From 39eeb4fb97f60dbdfc823c1a673a8844b9226b60 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Sat, 30 Jan 2016 22:23:26 -0500
Subject: security: define kernel_read_file hook

The kernel_read_file security hook is called prior to reading the file
into memory.

Changelog v4+:
- export security_kernel_read_file()

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Luis R. Rodriguez <mcgrof@kernel.org>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
---
 fs/exec.c                         |  4 ++++
 include/linux/ima.h               |  6 ++++++
 include/linux/lsm_hooks.h         |  8 ++++++++
 include/linux/security.h          |  7 +++++++
 security/integrity/ima/ima_main.c | 16 ++++++++++++++++
 security/security.c               | 13 +++++++++++++
 6 files changed, 54 insertions(+)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 64cb3bc788c1..8aaa38666119 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -842,6 +842,10 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size,
 	if (!S_ISREG(file_inode(file)->i_mode) || max_size < 0)
 		return -EINVAL;
 
+	ret = security_kernel_read_file(file, id);
+	if (ret)
+		return ret;
+
 	i_size = i_size_read(file_inode(file));
 	if (max_size > 0 && i_size > max_size)
 		return -EFBIG;
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 7aea4863c244..6adcaea8101c 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -19,6 +19,7 @@ extern int ima_file_check(struct file *file, int mask, int opened);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern int ima_module_check(struct file *file);
+extern int ima_read_file(struct file *file, enum kernel_read_file_id id);
 extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
 			      enum kernel_read_file_id id);
 
@@ -48,6 +49,11 @@ static inline int ima_module_check(struct file *file)
 	return 0;
 }
 
+static inline int ima_read_file(struct file *file, enum kernel_read_file_id id)
+{
+	return 0;
+}
+
 static inline int ima_post_read_file(struct file *file, void *buf, loff_t size,
 				     enum kernel_read_file_id id)
 {
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 7d04a1220223..d32b7bd13635 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -552,6 +552,12 @@
  *	the kernel module to load. If the module is being loaded from a blob,
  *	this argument will be NULL.
  *	Return 0 if permission is granted.
+ * @kernel_read_file:
+ *	Read a file specified by userspace.
+ *	@file contains the file structure pointing to the file being read
+ *	by the kernel.
+ *	@id kernel read file identifier
+ *	Return 0 if permission is granted.
  * @kernel_post_read_file:
  *	Read a file specified by userspace.
  *	@file contains the file structure pointing to the file being read
@@ -1455,6 +1461,7 @@ union security_list_options {
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
 	int (*kernel_module_request)(char *kmod_name);
 	int (*kernel_module_from_file)(struct file *file);
+	int (*kernel_read_file)(struct file *file, enum kernel_read_file_id id);
 	int (*kernel_post_read_file)(struct file *file, char *buf, loff_t size,
 				     enum kernel_read_file_id id);
 	int (*task_fix_setuid)(struct cred *new, const struct cred *old,
@@ -1715,6 +1722,7 @@ struct security_hook_heads {
 	struct list_head cred_transfer;
 	struct list_head kernel_act_as;
 	struct list_head kernel_create_files_as;
+	struct list_head kernel_read_file;
 	struct list_head kernel_post_read_file;
 	struct list_head kernel_module_request;
 	struct list_head kernel_module_from_file;
diff --git a/include/linux/security.h b/include/linux/security.h
index cee1349e1155..071fb747fdbb 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -302,6 +302,7 @@ int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_module_request(char *kmod_name);
 int security_kernel_module_from_file(struct file *file);
+int security_kernel_read_file(struct file *file, enum kernel_read_file_id id);
 int security_kernel_post_read_file(struct file *file, char *buf, loff_t size,
 				   enum kernel_read_file_id id);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
@@ -863,6 +864,12 @@ static inline int security_kernel_module_from_file(struct file *file)
 	return 0;
 }
 
+static inline int security_kernel_read_file(struct file *file,
+					    enum kernel_read_file_id id)
+{
+	return 0;
+}
+
 static inline int security_kernel_post_read_file(struct file *file,
 						 char *buf, loff_t size,
 						 enum kernel_read_file_id id)
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index e9651be17b72..bbb80df28fb1 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -337,6 +337,22 @@ int ima_module_check(struct file *file)
 	return process_measurement(file, NULL, 0, MAY_EXEC, MODULE_CHECK, 0);
 }
 
+/**
+ * ima_read_file - pre-measure/appraise hook decision based on policy
+ * @file: pointer to the file to be measured/appraised/audit
+ * @read_id: caller identifier
+ *
+ * Permit reading a file based on policy. The policy rules are written
+ * in terms of the policy identifier.  Appraising the integrity of
+ * a file requires a file descriptor.
+ *
+ * For permission return 0, otherwise return -EACCES.
+ */
+int ima_read_file(struct file *file, enum kernel_read_file_id read_id)
+{
+	return 0;
+}
+
 /**
  * ima_post_read_file - in memory collect/appraise/audit measurement
  * @file: pointer to the file to be measured/appraised/audit
diff --git a/security/security.c b/security/security.c
index cd85be61c416..8e699f98a600 100644
--- a/security/security.c
+++ b/security/security.c
@@ -899,6 +899,17 @@ int security_kernel_module_from_file(struct file *file)
 	return ima_module_check(file);
 }
 
+int security_kernel_read_file(struct file *file, enum kernel_read_file_id id)
+{
+	int ret;
+
+	ret = call_int_hook(kernel_read_file, 0, file, id);
+	if (ret)
+		return ret;
+	return ima_read_file(file, id);
+}
+EXPORT_SYMBOL_GPL(security_kernel_read_file);
+
 int security_kernel_post_read_file(struct file *file, char *buf, loff_t size,
 				   enum kernel_read_file_id id)
 {
@@ -1696,6 +1707,8 @@ struct security_hook_heads security_hook_heads = {
 		LIST_HEAD_INIT(security_hook_heads.kernel_module_request),
 	.kernel_module_from_file =
 		LIST_HEAD_INIT(security_hook_heads.kernel_module_from_file),
+	.kernel_read_file =
+		LIST_HEAD_INIT(security_hook_heads.kernel_read_file),
 	.kernel_post_read_file =
 		LIST_HEAD_INIT(security_hook_heads.kernel_post_read_file),
 	.task_fix_setuid =
-- 
cgit v1.2.3


From b844f0ecbc5626ec26cfc70cb144a4c9b85dc3f2 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Mon, 1 Feb 2016 08:36:21 -0500
Subject: vfs: define kernel_copy_file_from_fd()

This patch defines kernel_read_file_from_fd(), a wrapper for the VFS
common kernel_read_file().

Changelog:
- Separated from the kernel modules patch
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
---
 fs/exec.c          | 16 ++++++++++++++++
 include/linux/fs.h |  2 ++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 8aaa38666119..9bdf0edf570d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -907,6 +907,22 @@ int kernel_read_file_from_path(char *path, void **buf, loff_t *size,
 }
 EXPORT_SYMBOL_GPL(kernel_read_file_from_path);
 
+int kernel_read_file_from_fd(int fd, void **buf, loff_t *size, loff_t max_size,
+			     enum kernel_read_file_id id)
+{
+	struct fd f = fdget(fd);
+	int ret = -EBADF;
+
+	if (!f.file)
+		goto out;
+
+	ret = kernel_read_file(f.file, buf, size, max_size, id);
+out:
+	fdput(f);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kernel_read_file_from_fd);
+
 ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
 {
 	ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c8bc4d8c843f..9c85deae1bf2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2586,6 +2586,8 @@ extern int kernel_read_file(struct file *, void **, loff_t *, loff_t,
 			    enum kernel_read_file_id);
 extern int kernel_read_file_from_path(char *, void **, loff_t *, loff_t,
 				      enum kernel_read_file_id);
+extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t,
+				    enum kernel_read_file_id);
 extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t);
 extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
 extern struct file * open_exec(const char *);
-- 
cgit v1.2.3


From a1db74209483a24c861c848b4bb79a4d945ef6fa Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Wed, 30 Dec 2015 07:35:30 -0500
Subject: module: replace copy_module_from_fd with kernel version

Replace copy_module_from_fd() with kernel_read_file_from_fd().

Although none of the upstreamed LSMs define a kernel_module_from_file
hook, IMA is called, based on policy, to prevent unsigned kernel modules
from being loaded by the original kernel module syscall and to
measure/appraise signed kernel modules.

The security function security_kernel_module_from_file() was called prior
to reading a kernel module.  Preventing unsigned kernel modules from being
loaded by the original kernel module syscall remains on the pre-read
kernel_read_file() security hook.  Instead of reading the kernel module
twice, once for measuring/appraising and again for loading the kernel
module, the signature validation is moved to the kernel_post_read_file()
security hook.

This patch removes the security_kernel_module_from_file() hook and security
call.

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/fs.h                |  1 +
 include/linux/ima.h               |  6 ----
 include/linux/lsm_hooks.h         |  7 ----
 include/linux/security.h          |  5 ---
 kernel/module.c                   | 68 +++++----------------------------------
 security/integrity/ima/ima_main.c | 35 ++++++++------------
 security/security.c               | 12 -------
 7 files changed, 22 insertions(+), 112 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9c85deae1bf2..fb08b668c37a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2578,6 +2578,7 @@ extern int do_pipe_flags(int *, int);
 
 enum kernel_read_file_id {
 	READING_FIRMWARE = 1,
+	READING_MODULE,
 	READING_MAX_ID
 };
 
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 6adcaea8101c..e6516cbbe9bf 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -18,7 +18,6 @@ extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_file_check(struct file *file, int mask, int opened);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
-extern int ima_module_check(struct file *file);
 extern int ima_read_file(struct file *file, enum kernel_read_file_id id);
 extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
 			      enum kernel_read_file_id id);
@@ -44,11 +43,6 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot)
 	return 0;
 }
 
-static inline int ima_module_check(struct file *file)
-{
-	return 0;
-}
-
 static inline int ima_read_file(struct file *file, enum kernel_read_file_id id)
 {
 	return 0;
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index d32b7bd13635..cdee11cbcdf1 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -546,12 +546,6 @@
  *	userspace to load a kernel module with the given name.
  *	@kmod_name name of the module requested by the kernel
  *	Return 0 if successful.
- * @kernel_module_from_file:
- *	Load a kernel module from userspace.
- *	@file contains the file structure pointing to the file containing
- *	the kernel module to load. If the module is being loaded from a blob,
- *	this argument will be NULL.
- *	Return 0 if permission is granted.
  * @kernel_read_file:
  *	Read a file specified by userspace.
  *	@file contains the file structure pointing to the file being read
@@ -1725,7 +1719,6 @@ struct security_hook_heads {
 	struct list_head kernel_read_file;
 	struct list_head kernel_post_read_file;
 	struct list_head kernel_module_request;
-	struct list_head kernel_module_from_file;
 	struct list_head task_fix_setuid;
 	struct list_head task_setpgid;
 	struct list_head task_getpgid;
diff --git a/include/linux/security.h b/include/linux/security.h
index 071fb747fdbb..157f0cb1e4d2 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -859,11 +859,6 @@ static inline int security_kernel_module_request(char *kmod_name)
 	return 0;
 }
 
-static inline int security_kernel_module_from_file(struct file *file)
-{
-	return 0;
-}
-
 static inline int security_kernel_read_file(struct file *file,
 					    enum kernel_read_file_id id)
 {
diff --git a/kernel/module.c b/kernel/module.c
index 8358f4697c0c..955410928696 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2654,7 +2654,7 @@ static int copy_module_from_user(const void __user *umod, unsigned long len,
 	if (info->len < sizeof(*(info->hdr)))
 		return -ENOEXEC;
 
-	err = security_kernel_module_from_file(NULL);
+	err = security_kernel_read_file(NULL, READING_MODULE);
 	if (err)
 		return err;
 
@@ -2672,63 +2672,6 @@ static int copy_module_from_user(const void __user *umod, unsigned long len,
 	return 0;
 }
 
-/* Sets info->hdr and info->len. */
-static int copy_module_from_fd(int fd, struct load_info *info)
-{
-	struct fd f = fdget(fd);
-	int err;
-	struct kstat stat;
-	loff_t pos;
-	ssize_t bytes = 0;
-
-	if (!f.file)
-		return -ENOEXEC;
-
-	err = security_kernel_module_from_file(f.file);
-	if (err)
-		goto out;
-
-	err = vfs_getattr(&f.file->f_path, &stat);
-	if (err)
-		goto out;
-
-	if (stat.size > INT_MAX) {
-		err = -EFBIG;
-		goto out;
-	}
-
-	/* Don't hand 0 to vmalloc, it whines. */
-	if (stat.size == 0) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	info->hdr = vmalloc(stat.size);
-	if (!info->hdr) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	pos = 0;
-	while (pos < stat.size) {
-		bytes = kernel_read(f.file, pos, (char *)(info->hdr) + pos,
-				    stat.size - pos);
-		if (bytes < 0) {
-			vfree(info->hdr);
-			err = bytes;
-			goto out;
-		}
-		if (bytes == 0)
-			break;
-		pos += bytes;
-	}
-	info->len = pos;
-
-out:
-	fdput(f);
-	return err;
-}
-
 static void free_copy(struct load_info *info)
 {
 	vfree(info->hdr);
@@ -3589,8 +3532,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 
 SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
 {
-	int err;
 	struct load_info info = { };
+	loff_t size;
+	void *hdr;
+	int err;
 
 	err = may_init_module();
 	if (err)
@@ -3602,9 +3547,12 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
 		      |MODULE_INIT_IGNORE_VERMAGIC))
 		return -EINVAL;
 
-	err = copy_module_from_fd(fd, &info);
+	err = kernel_read_file_from_fd(fd, &hdr, &size, INT_MAX,
+				       READING_MODULE);
 	if (err)
 		return err;
+	info.hdr = hdr;
+	info.len = size;
 
 	return load_module(&info, uargs, flags);
 }
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index bbb80df28fb1..5da0b9c00072 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -315,28 +315,6 @@ int ima_file_check(struct file *file, int mask, int opened)
 }
 EXPORT_SYMBOL_GPL(ima_file_check);
 
-/**
- * ima_module_check - based on policy, collect/store/appraise measurement.
- * @file: pointer to the file to be measured/appraised
- *
- * Measure/appraise kernel modules based on policy.
- *
- * On success return 0.  On integrity appraisal error, assuming the file
- * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
- */
-int ima_module_check(struct file *file)
-{
-	if (!file) {
-#ifndef CONFIG_MODULE_SIG_FORCE
-		if ((ima_appraise & IMA_APPRAISE_MODULES) &&
-		    (ima_appraise & IMA_APPRAISE_ENFORCE))
-			return -EACCES;	/* INTEGRITY_UNKNOWN */
-#endif
-		return 0;	/* We rely on module signature checking */
-	}
-	return process_measurement(file, NULL, 0, MAY_EXEC, MODULE_CHECK, 0);
-}
-
 /**
  * ima_read_file - pre-measure/appraise hook decision based on policy
  * @file: pointer to the file to be measured/appraised/audit
@@ -350,6 +328,14 @@ int ima_module_check(struct file *file)
  */
 int ima_read_file(struct file *file, enum kernel_read_file_id read_id)
 {
+	if (!file && read_id == READING_MODULE) {
+#ifndef CONFIG_MODULE_SIG_FORCE
+		if ((ima_appraise & IMA_APPRAISE_MODULES) &&
+		    (ima_appraise & IMA_APPRAISE_ENFORCE))
+			return -EACCES;	/* INTEGRITY_UNKNOWN */
+#endif
+		return 0;	/* We rely on module signature checking */
+	}
 	return 0;
 }
 
@@ -378,6 +364,9 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size,
 		return 0;
 	}
 
+	if (!file && read_id == READING_MODULE) /* MODULE_SIG_FORCE enabled */
+		return 0;
+
 	if (!file || !buf || size == 0) { /* should never happen */
 		if (ima_appraise & IMA_APPRAISE_ENFORCE)
 			return -EACCES;
@@ -386,6 +375,8 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size,
 
 	if (read_id == READING_FIRMWARE)
 		func = FIRMWARE_CHECK;
+	else if (read_id == READING_MODULE)
+		func = MODULE_CHECK;
 
 	return process_measurement(file, buf, size, MAY_READ, func, 0);
 }
diff --git a/security/security.c b/security/security.c
index 8e699f98a600..3644b0344d29 100644
--- a/security/security.c
+++ b/security/security.c
@@ -889,16 +889,6 @@ int security_kernel_module_request(char *kmod_name)
 	return call_int_hook(kernel_module_request, 0, kmod_name);
 }
 
-int security_kernel_module_from_file(struct file *file)
-{
-	int ret;
-
-	ret = call_int_hook(kernel_module_from_file, 0, file);
-	if (ret)
-		return ret;
-	return ima_module_check(file);
-}
-
 int security_kernel_read_file(struct file *file, enum kernel_read_file_id id)
 {
 	int ret;
@@ -1705,8 +1695,6 @@ struct security_hook_heads security_hook_heads = {
 		LIST_HEAD_INIT(security_hook_heads.kernel_create_files_as),
 	.kernel_module_request =
 		LIST_HEAD_INIT(security_hook_heads.kernel_module_request),
-	.kernel_module_from_file =
-		LIST_HEAD_INIT(security_hook_heads.kernel_module_from_file),
 	.kernel_read_file =
 		LIST_HEAD_INIT(security_hook_heads.kernel_read_file),
 	.kernel_post_read_file =
-- 
cgit v1.2.3


From b804defe4297157a9ff45863769efe9a01953398 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Thu, 14 Jan 2016 20:59:14 -0500
Subject: kexec: replace call to copy_file_from_fd() with kernel version

Replace copy_file_from_fd() with kernel_read_file_from_fd().

Two new identifiers named READING_KEXEC_IMAGE and READING_KEXEC_INITRAMFS
are defined for measuring, appraising or auditing the kexec image and
initramfs.

Changelog v3:
- return -EBADF, not -ENOEXEC
- identifier change
- split patch, moving copy_file_from_fd() to a separate patch
- split patch, moving IMA changes to a separate patch
v0:
- use kstat file size type loff_t, not size_t
- Calculate the file hash from the in memory buffer - Dave Young

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Acked-by: Dave Young <dyoung@redhat.com>
---
 include/linux/fs.h  |  2 ++
 kernel/kexec_file.c | 73 +++++++----------------------------------------------
 2 files changed, 11 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index fb08b668c37a..52567252288e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2579,6 +2579,8 @@ extern int do_pipe_flags(int *, int);
 enum kernel_read_file_id {
 	READING_FIRMWARE = 1,
 	READING_MODULE,
+	READING_KEXEC_IMAGE,
+	READING_KEXEC_INITRAMFS,
 	READING_MAX_ID
 };
 
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 007b791f676d..b696c3f3708f 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -18,6 +18,7 @@
 #include <linux/kexec.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
+#include <linux/fs.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
 #include <linux/syscalls.h>
@@ -33,65 +34,6 @@ size_t __weak kexec_purgatory_size = 0;
 
 static int kexec_calculate_store_digests(struct kimage *image);
 
-static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
-{
-	struct fd f = fdget(fd);
-	int ret;
-	struct kstat stat;
-	loff_t pos;
-	ssize_t bytes = 0;
-
-	if (!f.file)
-		return -EBADF;
-
-	ret = vfs_getattr(&f.file->f_path, &stat);
-	if (ret)
-		goto out;
-
-	if (stat.size > INT_MAX) {
-		ret = -EFBIG;
-		goto out;
-	}
-
-	/* Don't hand 0 to vmalloc, it whines. */
-	if (stat.size == 0) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	*buf = vmalloc(stat.size);
-	if (!*buf) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	pos = 0;
-	while (pos < stat.size) {
-		bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
-				    stat.size - pos);
-		if (bytes < 0) {
-			vfree(*buf);
-			ret = bytes;
-			goto out;
-		}
-
-		if (bytes == 0)
-			break;
-		pos += bytes;
-	}
-
-	if (pos != stat.size) {
-		ret = -EBADF;
-		vfree(*buf);
-		goto out;
-	}
-
-	*buf_len = pos;
-out:
-	fdput(f);
-	return ret;
-}
-
 /* Architectures can provide this probe function */
 int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
 					 unsigned long buf_len)
@@ -182,16 +124,17 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
 {
 	int ret = 0;
 	void *ldata;
+	loff_t size;
 
-	ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
-				&image->kernel_buf_len);
+	ret = kernel_read_file_from_fd(kernel_fd, &image->kernel_buf,
+				       &size, INT_MAX, READING_KEXEC_IMAGE);
 	if (ret)
 		return ret;
+	image->kernel_buf_len = size;
 
 	/* Call arch image probe handlers */
 	ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
 					    image->kernel_buf_len);
-
 	if (ret)
 		goto out;
 
@@ -206,10 +149,12 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
 #endif
 	/* It is possible that there no initramfs is being loaded */
 	if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
-		ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
-					&image->initrd_buf_len);
+		ret = kernel_read_file_from_fd(initrd_fd, &image->initrd_buf,
+					       &size, INT_MAX,
+					       READING_KEXEC_INITRAMFS);
 		if (ret)
 			goto out;
+		image->initrd_buf_len = size;
 	}
 
 	if (cmdline_len) {
-- 
cgit v1.2.3


From 7429b092811fb20c6a5b261c2c116a6a90cb9a29 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@huawei.com>
Date: Fri, 11 Apr 2014 17:47:01 +0300
Subject: ima: load policy using path

We currently cannot do appraisal or signature vetting of IMA policies
since we currently can only load IMA policies by writing the contents
of the policy directly in, as follows:

cat policy-file > <securityfs>/ima/policy

If we provide the kernel the path to the IMA policy so it can load
the policy itself it'd be able to later appraise or vet the file
signature if it has one.  This patch adds support to load the IMA
policy with a given path as follows:

echo /etc/ima/ima_policy > /sys/kernel/security/ima/policy

Changelog v4+:
- moved kernel_read_file_from_path() error messages to callers
v3:
- moved kernel_read_file_from_path() to a separate patch
v2:
- after re-ordering the patches, replace calling integrity_kernel_read()
  to read the file with kernel_read_file_from_path() (Mimi)
- Patch description re-written by Luis R. Rodriguez

Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
---
 include/linux/fs.h              |  1 +
 security/integrity/ima/ima_fs.c | 45 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 44 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 52567252288e..e514f76db04f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2581,6 +2581,7 @@ enum kernel_read_file_id {
 	READING_MODULE,
 	READING_KEXEC_IMAGE,
 	READING_KEXEC_INITRAMFS,
+	READING_POLICY,
 	READING_MAX_ID
 };
 
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index f355231997b4..a6c61b351f36 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -22,6 +22,7 @@
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/parser.h>
+#include <linux/vmalloc.h>
 
 #include "ima.h"
 
@@ -258,6 +259,43 @@ static const struct file_operations ima_ascii_measurements_ops = {
 	.release = seq_release,
 };
 
+static ssize_t ima_read_policy(char *path)
+{
+	void *data;
+	char *datap;
+	loff_t size;
+	int rc, pathlen = strlen(path);
+
+	char *p;
+
+	/* remove \n */
+	datap = path;
+	strsep(&datap, "\n");
+
+	rc = kernel_read_file_from_path(path, &data, &size, 0, READING_POLICY);
+	if (rc < 0) {
+		pr_err("Unable to open file: %s (%d)", path, rc);
+		return rc;
+	}
+
+	datap = data;
+	while (size > 0 && (p = strsep(&datap, "\n"))) {
+		pr_debug("rule: %s\n", p);
+		rc = ima_parse_add_rule(p);
+		if (rc < 0)
+			break;
+		size -= rc;
+	}
+
+	vfree(data);
+	if (rc < 0)
+		return rc;
+	else if (size)
+		return -EINVAL;
+	else
+		return pathlen;
+}
+
 static ssize_t ima_write_policy(struct file *file, const char __user *buf,
 				size_t datalen, loff_t *ppos)
 {
@@ -286,9 +324,12 @@ static ssize_t ima_write_policy(struct file *file, const char __user *buf,
 	result = mutex_lock_interruptible(&ima_write_mutex);
 	if (result < 0)
 		goto out_free;
-	result = ima_parse_add_rule(data);
-	mutex_unlock(&ima_write_mutex);
 
+	if (data[0] == '/')
+		result = ima_read_policy(data);
+	else 
+		result = ima_parse_add_rule(data);
+	mutex_unlock(&ima_write_mutex);
 out_free:
 	kfree(data);
 out:
-- 
cgit v1.2.3


From 8e2fe1d9f1a20924f98ea46931a1d7fb092aa876 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 19 Feb 2016 23:05:22 +0100
Subject: bpf: add new arg_type that allows for 0 sized stack buffer

Currently, when we pass a buffer from the eBPF stack into a helper
function, the function proto indicates argument types as ARG_PTR_TO_STACK
and ARG_CONST_STACK_SIZE pair. If R<X> contains the former, then R<X+1>
must be of the latter type. Then, verifier checks whether the buffer
points into eBPF stack, is initialized, etc. The verifier also guarantees
that the constant value passed in R<X+1> is greater than 0, so helper
functions don't need to test for it and can always assume a non-NULL
initialized buffer as well as non-0 buffer size.

This patch adds a new argument types ARG_CONST_STACK_SIZE_OR_ZERO that
allows to also pass NULL as R<X> and 0 as R<X+1> into the helper function.
Such helper functions, of course, need to be able to handle these cases
internally then. Verifier guarantees that either R<X> == NULL && R<X+1> == 0
or R<X> != NULL && R<X+1> != 0 (like the case of ARG_CONST_STACK_SIZE), any
other combinations are not possible to load.

I went through various options of extending the verifier, and introducing
the type ARG_CONST_STACK_SIZE_OR_ZERO seems to have most minimal changes
needed to the verifier.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   |  1 +
 kernel/bpf/verifier.c | 42 ++++++++++++++++++++++++++++++++----------
 2 files changed, 33 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0cadbb7456c0..51e498e5470e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -65,6 +65,7 @@ enum bpf_arg_type {
 	 */
 	ARG_PTR_TO_STACK,	/* any pointer to eBPF program stack */
 	ARG_CONST_STACK_SIZE,	/* number of bytes accessed from stack */
+	ARG_CONST_STACK_SIZE_OR_ZERO, /* number of bytes accessed from stack or 0 */
 
 	ARG_PTR_TO_CTX,		/* pointer to context */
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 42ba4ccc020b..36dc497deaa3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -779,15 +779,24 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
  * bytes from that pointer, make sure that it's within stack boundary
  * and all elements of stack are initialized
  */
-static int check_stack_boundary(struct verifier_env *env,
-				int regno, int access_size)
+static int check_stack_boundary(struct verifier_env *env, int regno,
+				int access_size, bool zero_size_allowed)
 {
 	struct verifier_state *state = &env->cur_state;
 	struct reg_state *regs = state->regs;
 	int off, i;
 
-	if (regs[regno].type != PTR_TO_STACK)
+	if (regs[regno].type != PTR_TO_STACK) {
+		if (zero_size_allowed && access_size == 0 &&
+		    regs[regno].type == CONST_IMM &&
+		    regs[regno].imm  == 0)
+			return 0;
+
+		verbose("R%d type=%s expected=%s\n", regno,
+			reg_type_str[regs[regno].type],
+			reg_type_str[PTR_TO_STACK]);
 		return -EACCES;
+	}
 
 	off = regs[regno].imm;
 	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
@@ -830,15 +839,24 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
 		return 0;
 	}
 
-	if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
+	if (arg_type == ARG_PTR_TO_MAP_KEY ||
 	    arg_type == ARG_PTR_TO_MAP_VALUE) {
 		expected_type = PTR_TO_STACK;
-	} else if (arg_type == ARG_CONST_STACK_SIZE) {
+	} else if (arg_type == ARG_CONST_STACK_SIZE ||
+		   arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
 		expected_type = CONST_IMM;
 	} else if (arg_type == ARG_CONST_MAP_PTR) {
 		expected_type = CONST_PTR_TO_MAP;
 	} else if (arg_type == ARG_PTR_TO_CTX) {
 		expected_type = PTR_TO_CTX;
+	} else if (arg_type == ARG_PTR_TO_STACK) {
+		expected_type = PTR_TO_STACK;
+		/* One exception here. In case function allows for NULL to be
+		 * passed in as argument, it's a CONST_IMM type. Final test
+		 * happens during stack boundary checking.
+		 */
+		if (reg->type == CONST_IMM && reg->imm == 0)
+			expected_type = CONST_IMM;
 	} else {
 		verbose("unsupported arg_type %d\n", arg_type);
 		return -EFAULT;
@@ -868,8 +886,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
 			verbose("invalid map_ptr to access map->key\n");
 			return -EACCES;
 		}
-		err = check_stack_boundary(env, regno, (*mapp)->key_size);
-
+		err = check_stack_boundary(env, regno, (*mapp)->key_size,
+					   false);
 	} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
 		 * check [value, value + map->value_size) validity
@@ -879,9 +897,12 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
 			verbose("invalid map_ptr to access map->value\n");
 			return -EACCES;
 		}
-		err = check_stack_boundary(env, regno, (*mapp)->value_size);
+		err = check_stack_boundary(env, regno, (*mapp)->value_size,
+					   false);
+	} else if (arg_type == ARG_CONST_STACK_SIZE ||
+		   arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
+		bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
 
-	} else if (arg_type == ARG_CONST_STACK_SIZE) {
 		/* bpf_xxx(..., buf, len) call will access 'len' bytes
 		 * from stack pointer 'buf'. Check it
 		 * note: regno == len, regno - 1 == buf
@@ -891,7 +912,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
 			verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
 			return -EACCES;
 		}
-		err = check_stack_boundary(env, regno - 1, reg->imm);
+		err = check_stack_boundary(env, regno - 1, reg->imm,
+					   zero_size_allowed);
 	}
 
 	return err;
-- 
cgit v1.2.3


From 3697649ff29e0f647565eed04b27a7779c646a22 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 19 Feb 2016 23:05:25 +0100
Subject: bpf: try harder on clones when writing into skb

When we're dealing with clones and the area is not writeable, try
harder and get a copy via pskb_expand_head(). Replace also other
occurences in tc actions with the new skb_try_make_writable().

Reported-by: Ashhad Sheikh <ashhadsheikh394@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  7 +++++++
 net/core/filter.c      | 19 ++++++++++---------
 net/sched/act_csum.c   |  8 ++------
 net/sched/act_nat.c    | 18 +++++-------------
 4 files changed, 24 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 89b536796e53..6a57757a86cf 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2630,6 +2630,13 @@ static inline int skb_clone_writable(const struct sk_buff *skb, unsigned int len
 	       skb_headroom(skb) + len <= skb->hdr_len;
 }
 
+static inline int skb_try_make_writable(struct sk_buff *skb,
+					unsigned int write_len)
+{
+	return skb_cloned(skb) && !skb_clone_writable(skb, write_len) &&
+	       pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+}
+
 static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom,
 			    int cloned)
 {
diff --git a/net/core/filter.c b/net/core/filter.c
index ea391e6be7fa..f031b82128f3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1364,9 +1364,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 	 */
 	if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
 		return -EFAULT;
-
-	if (unlikely(skb_cloned(skb) &&
-		     !skb_clone_writable(skb, offset + len)))
+	if (unlikely(skb_try_make_writable(skb, offset + len)))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, len, sp->buff);
@@ -1439,9 +1437,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 		return -EINVAL;
 	if (unlikely((u32) offset > 0xffff))
 		return -EFAULT;
-
-	if (unlikely(skb_cloned(skb) &&
-		     !skb_clone_writable(skb, offset + sizeof(sum))))
+	if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1488,9 +1484,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 		return -EINVAL;
 	if (unlikely((u32) offset > 0xffff))
 		return -EFAULT;
-
-	if (unlikely(skb_cloned(skb) &&
-		     !skb_clone_writable(skb, offset + sizeof(sum))))
+	if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1734,6 +1728,13 @@ bool bpf_helper_changes_skb_data(void *func)
 		return true;
 	if (func == bpf_skb_vlan_pop)
 		return true;
+	if (func == bpf_skb_store_bytes)
+		return true;
+	if (func == bpf_l3_csum_replace)
+		return true;
+	if (func == bpf_l4_csum_replace)
+		return true;
+
 	return false;
 }
 
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b07c535ba8e7..eeb3eb3ea9eb 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -105,9 +105,7 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
 	int hl = ihl + jhl;
 
 	if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
-	    (skb_cloned(skb) &&
-	     !skb_clone_writable(skb, hl + ntkoff) &&
-	     pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+	    skb_try_make_writable(skb, hl + ntkoff))
 		return NULL;
 	else
 		return (void *)(skb_network_header(skb) + ihl);
@@ -365,9 +363,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
 	}
 
 	if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
-		if (skb_cloned(skb) &&
-		    !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
-		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		if (skb_try_make_writable(skb, sizeof(*iph) + ntkoff))
 			goto fail;
 
 		ip_send_check(ip_hdr(skb));
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index b7c4ead8b5a8..27607b863aba 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -126,9 +126,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
 		addr = iph->daddr;
 
 	if (!((old_addr ^ addr) & mask)) {
-		if (skb_cloned(skb) &&
-		    !skb_clone_writable(skb, sizeof(*iph) + noff) &&
-		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		if (skb_try_make_writable(skb, sizeof(*iph) + noff))
 			goto drop;
 
 		new_addr &= mask;
@@ -156,9 +154,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
 		struct tcphdr *tcph;
 
 		if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) ||
-		    (skb_cloned(skb) &&
-		     !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) &&
-		     pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+		    skb_try_make_writable(skb, ihl + sizeof(*tcph) + noff))
 			goto drop;
 
 		tcph = (void *)(skb_network_header(skb) + ihl);
@@ -171,9 +167,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
 		struct udphdr *udph;
 
 		if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) ||
-		    (skb_cloned(skb) &&
-		     !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) &&
-		     pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+		    skb_try_make_writable(skb, ihl + sizeof(*udph) + noff))
 			goto drop;
 
 		udph = (void *)(skb_network_header(skb) + ihl);
@@ -213,10 +207,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
 		if ((old_addr ^ addr) & mask)
 			break;
 
-		if (skb_cloned(skb) &&
-		    !skb_clone_writable(skb, ihl + sizeof(*icmph) +
-					     sizeof(*iph) + noff) &&
-		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		if (skb_try_make_writable(skb, ihl + sizeof(*icmph) +
+					  sizeof(*iph) + noff))
 			goto drop;
 
 		icmph = (void *)(skb_network_header(skb) + ihl);
-- 
cgit v1.2.3


From 944945986f125bdbbeaa78dac0c0eadb963eb34a Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Sun, 21 Feb 2016 11:40:10 +0200
Subject: qed: Introduce DMA_REGPAIR_LE

FW hsi contains regpairs, mostly for 64-bit address representations.
Since same paradigm is applied each time a regpair is filled, this
introduces a new utility macro for setting such regpairs.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_l2.c          |  9 +++------
 drivers/net/ethernet/qlogic/qed/qed_sp_commands.c | 12 ++++--------
 drivers/net/ethernet/qlogic/qed/qed_spq.c         |  9 +++------
 include/linux/qed/qed_chain.h                     |  4 ++++
 4 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 8d1bc7e7e996..bba59c51f72c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -557,12 +557,10 @@ qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
 	p_ramrod->complete_event_flg	= 1;
 
 	p_ramrod->bd_max_bytes	= cpu_to_le16(bd_max_bytes);
-	p_ramrod->bd_base.hi	= DMA_HI_LE(bd_chain_phys_addr);
-	p_ramrod->bd_base.lo	= DMA_LO_LE(bd_chain_phys_addr);
+	DMA_REGPAIR_LE(p_ramrod->bd_base, bd_chain_phys_addr);
 
 	p_ramrod->num_of_pbl_pages	= cpu_to_le16(cqe_pbl_size);
-	p_ramrod->cqe_pbl_addr.hi	= DMA_HI_LE(cqe_pbl_addr);
-	p_ramrod->cqe_pbl_addr.lo	= DMA_LO_LE(cqe_pbl_addr);
+	DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, cqe_pbl_addr);
 
 	rc = qed_spq_post(p_hwfn, p_ent, NULL);
 
@@ -721,8 +719,7 @@ qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
 	p_ramrod->stats_counter_id	= stats_id;
 
 	p_ramrod->pbl_size		= cpu_to_le16(pbl_size);
-	p_ramrod->pbl_base_addr.hi	= DMA_HI_LE(pbl_addr);
-	p_ramrod->pbl_base_addr.lo	= DMA_LO_LE(pbl_addr);
+	DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, pbl_addr);
 
 	pq_id			= qed_get_qm_pq(p_hwfn,
 						PROTOCOLID_ETH,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index e271ef95745c..1c06c37d4c3d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -136,16 +136,12 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->outer_tag = p_hwfn->hw_info.ovlan;
 
 	/* Place EQ address in RAMROD */
-	p_ramrod->event_ring_pbl_addr.hi =
-			DMA_HI_LE(p_hwfn->p_eq->chain.pbl.p_phys_table);
-	p_ramrod->event_ring_pbl_addr.lo =
-			DMA_LO_LE(p_hwfn->p_eq->chain.pbl.p_phys_table);
+	DMA_REGPAIR_LE(p_ramrod->event_ring_pbl_addr,
+		       p_hwfn->p_eq->chain.pbl.p_phys_table);
 	p_ramrod->event_ring_num_pages = (u8)p_hwfn->p_eq->chain.page_cnt;
 
-	p_ramrod->consolid_q_pbl_addr.hi =
-			DMA_HI_LE(p_hwfn->p_consq->chain.pbl.p_phys_table);
-	p_ramrod->consolid_q_pbl_addr.lo =
-			DMA_LO_LE(p_hwfn->p_consq->chain.pbl.p_phys_table);
+	DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr,
+		       p_hwfn->p_consq->chain.pbl.p_phys_table);
 
 	p_hwfn->hw_info.personality = PERSONALITY_ETH;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index f6c6c21601d7..89469d5aae25 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -183,10 +183,8 @@ static void qed_spq_hw_initialize(struct qed_hwfn *p_hwfn,
 	p_cxt->xstorm_st_context.spq_base_hi =
 		DMA_HI_LE(p_spq->chain.p_phys_addr);
 
-	p_cxt->xstorm_st_context.consolid_base_addr.lo =
-		DMA_LO_LE(p_hwfn->p_consq->chain.p_phys_addr);
-	p_cxt->xstorm_st_context.consolid_base_addr.hi =
-		DMA_HI_LE(p_hwfn->p_consq->chain.p_phys_addr);
+	DMA_REGPAIR_LE(p_cxt->xstorm_st_context.consolid_base_addr,
+		       p_hwfn->p_consq->chain.p_phys_addr);
 }
 
 static int qed_spq_hw_post(struct qed_hwfn *p_hwfn,
@@ -423,8 +421,7 @@ void qed_spq_setup(struct qed_hwfn *p_hwfn)
 	p_virt	= p_spq->p_virt;
 
 	for (i = 0; i < p_spq->chain.capacity; i++) {
-		p_virt->elem.data_ptr.hi = DMA_HI_LE(p_phys);
-		p_virt->elem.data_ptr.lo = DMA_LO_LE(p_phys);
+		DMA_REGPAIR_LE(p_virt->elem.data_ptr, p_phys);
 
 		list_add_tail(&p_virt->list, &p_spq->free_pool);
 
diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index 41b9049b57e2..5f8fcaaa6504 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -19,6 +19,10 @@
 /* dma_addr_t manip */
 #define DMA_LO_LE(x)            cpu_to_le32(lower_32_bits(x))
 #define DMA_HI_LE(x)            cpu_to_le32(upper_32_bits(x))
+#define DMA_REGPAIR_LE(x, val)  do { \
+					(x).hi = DMA_HI_LE((val)); \
+					(x).lo = DMA_LO_LE((val)); \
+				} while (0)
 
 #define HILO_GEN(hi, lo, type)  ((((type)(hi)) << 32) + (lo))
 #define HILO_DMA(hi, lo)        HILO_GEN(hi, lo, dma_addr_t)
-- 
cgit v1.2.3


From 662b1d890c593673964758fe5b6f22067bffba7a Mon Sep 17 00:00:00 2001
From: Peter Jones <pjones@redhat.com>
Date: Wed, 17 Feb 2016 12:35:54 +0000
Subject: efi: Reformat GUID tables to follow the format in UEFI spec

This makes it much easier to hunt for typos in the GUID definitions.

It also makes checkpatch complain less about efi.h GUID additions, so
that if you add another one with the same style, checkpatch won't
complain about it.

Signed-off-by: Peter Jones <pjones@redhat.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1455712566-16727-2-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/efi.h | 63 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 42 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 1acd72364297..42be9c92fdf0 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -536,67 +536,88 @@ void efi_native_runtime_setup(void);
  *  EFI Configuration Table and GUID definitions
  */
 #define NULL_GUID \
-    EFI_GUID(  0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 )
+	EFI_GUID(0x00000000, 0x0000, 0x0000, \
+		 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
 
 #define MPS_TABLE_GUID    \
-    EFI_GUID(  0xeb9d2d2f, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d )
+	EFI_GUID(0xeb9d2d2f, 0x2d88, 0x11d3, \
+		 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
 
 #define ACPI_TABLE_GUID    \
-    EFI_GUID(  0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d )
+	EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, \
+		 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
 
 #define ACPI_20_TABLE_GUID    \
-    EFI_GUID(  0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81 )
+	EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, \
+		 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81)
 
 #define SMBIOS_TABLE_GUID    \
-    EFI_GUID(  0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d )
+	EFI_GUID(0xeb9d2d31, 0x2d88, 0x11d3, \
+		 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
 
 #define SMBIOS3_TABLE_GUID    \
-    EFI_GUID(  0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94 )
+	EFI_GUID(0xf2fd1544, 0x9794, 0x4a2c, \
+		 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94)
 
 #define SAL_SYSTEM_TABLE_GUID    \
-    EFI_GUID(  0xeb9d2d32, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d )
+	EFI_GUID(0xeb9d2d32, 0x2d88, 0x11d3, \
+		 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
 
 #define HCDP_TABLE_GUID	\
-    EFI_GUID(  0xf951938d, 0x620b, 0x42ef, 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98 )
+	EFI_GUID(0xf951938d, 0x620b, 0x42ef, \
+		 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98)
 
 #define UGA_IO_PROTOCOL_GUID \
-    EFI_GUID(  0x61a4d49e, 0x6f68, 0x4f1b, 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0xb, 0x7, 0xa2 )
+	EFI_GUID(0x61a4d49e, 0x6f68, 0x4f1b, \
+		 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0x0b, 0x07, 0xa2)
 
 #define EFI_GLOBAL_VARIABLE_GUID \
-    EFI_GUID(  0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c )
+	EFI_GUID(0x8be4df61, 0x93ca, 0x11d2, \
+		 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c)
 
 #define UV_SYSTEM_TABLE_GUID \
-    EFI_GUID(  0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 )
+	EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, \
+		 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93)
 
 #define LINUX_EFI_CRASH_GUID \
-    EFI_GUID(  0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0 )
+	EFI_GUID(0xcfc8fc79, 0xbe2e, 0x4ddc, \
+		 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0)
 
 #define LOADED_IMAGE_PROTOCOL_GUID \
-    EFI_GUID(  0x5b1b31a1, 0x9562, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
+	EFI_GUID(0x5b1b31a1, 0x9562, 0x11d2, \
+		 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
 
 #define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID \
-    EFI_GUID(  0x9042a9de, 0x23dc, 0x4a38, 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a )
+	EFI_GUID(0x9042a9de, 0x23dc, 0x4a38, \
+		 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a)
 
 #define EFI_UGA_PROTOCOL_GUID \
-    EFI_GUID(  0x982c298b, 0xf4fa, 0x41cb, 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39 )
+	EFI_GUID(0x982c298b, 0xf4fa, 0x41cb, \
+		 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39)
 
 #define EFI_PCI_IO_PROTOCOL_GUID \
-    EFI_GUID(  0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x2, 0x9a )
+	EFI_GUID(0x4cf5b200, 0x68b8, 0x4ca5, \
+		 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x02, 0x9a)
 
 #define EFI_FILE_INFO_ID \
-    EFI_GUID(  0x9576e92, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
+	EFI_GUID(0x9576e92, 0x6d3f, 0x11d2, \
+		 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
 
 #define EFI_SYSTEM_RESOURCE_TABLE_GUID \
-    EFI_GUID(  0xb122a263, 0x3661, 0x4f68, 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80 )
+	EFI_GUID(0xb122a263, 0x3661, 0x4f68, \
+		 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80)
 
 #define EFI_FILE_SYSTEM_GUID \
-    EFI_GUID(  0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
+	EFI_GUID(0x964e5b22, 0x6459, 0x11d2, \
+		 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
 
 #define DEVICE_TREE_GUID \
-    EFI_GUID(  0xb1b621d5, 0xf19c, 0x41a5, 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0 )
+	EFI_GUID(0xb1b621d5, 0xf19c, 0x41a5, \
+		 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0)
 
 #define EFI_PROPERTIES_TABLE_GUID \
-    EFI_GUID(  0x880aaca3, 0x4adc, 0x4a04, 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5 )
+	EFI_GUID(0x880aaca3, 0x4adc, 0x4a04, \
+		 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5)
 
 typedef struct {
 	efi_guid_t guid;
-- 
cgit v1.2.3


From 9575632052bacc2fda38d845eb17b0fb808e13eb Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Mon, 15 Feb 2016 22:27:02 +0530
Subject: dmaengine: make slave address physical
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The slave dmaengine semantics required the client to map dma
addresses and pass DMA address to dmaengine drivers. This
was a convenient notion coming from generic dma offload cases
where dmaengines are interchangeable and client is not aware of
which engine to map to.

But in case of slave, we know the dmaengine and always use a
specific one. Further the IOMMU cases can lead to failure of this
notion, so make this as physical address and now dmaengine driver
will do the required mapping.

Original-patch-by: Linus Walleij <linus.walleij@linaro.org>
Original-patch-Acked-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 16a1cad30c33..d85ecd20af50 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -357,8 +357,8 @@ enum dma_slave_buswidth {
  */
 struct dma_slave_config {
 	enum dma_transfer_direction direction;
-	dma_addr_t src_addr;
-	dma_addr_t dst_addr;
+	phys_addr_t src_addr;
+	phys_addr_t dst_addr;
 	enum dma_slave_buswidth src_addr_width;
 	enum dma_slave_buswidth dst_addr_width;
 	u32 src_maxburst;
-- 
cgit v1.2.3


From f083b09b7819c785db4f82a81f68da3bccfb04bf Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Sat, 6 Feb 2016 18:38:46 -0500
Subject: dm: set DM_TARGET_WILDCARD feature on "error" target

The DM_TARGET_WILDCARD feature indicates that the "error" target may
replace any target; even immutable targets.  This feature will be useful
to preserve the ability to replace the "multipath" target even once it
is formally converted over to having the DM_TARGET_IMMUTABLE feature.

Also, implicit in the DM_TARGET_WILDCARD feature flag being set is that
.map, .map_rq, .clone_and_map_rq and .release_clone_rq are all defined
in the target_type.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-ioctl.c         |  3 ++-
 drivers/md/dm-table.c         | 14 ++++++++++++++
 drivers/md/dm-target.c        |  3 ++-
 drivers/md/dm.h               |  1 +
 include/linux/device-mapper.h |  7 +++++++
 5 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 80a439543259..4763c4ae30e4 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1291,7 +1291,8 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
 
 	immutable_target_type = dm_get_immutable_target_type(md);
 	if (immutable_target_type &&
-	    (immutable_target_type != dm_table_get_immutable_target_type(t))) {
+	    (immutable_target_type != dm_table_get_immutable_target_type(t)) &&
+	    !dm_table_get_wildcard_target(t)) {
 		DMWARN("can't replace immutable target type %s",
 		       immutable_target_type->name);
 		r = -EINVAL;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 061152a43730..a49e62b8611f 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -920,6 +920,20 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t)
 	return t->immutable_target_type;
 }
 
+struct dm_target *dm_table_get_wildcard_target(struct dm_table *t)
+{
+	struct dm_target *uninitialized_var(ti);
+	unsigned i = 0;
+
+	while (i < dm_table_get_num_targets(t)) {
+		ti = dm_table_get_target(t, i++);
+		if (dm_target_is_wildcard(ti->type))
+			return ti;
+	}
+
+	return NULL;
+}
+
 bool dm_table_request_based(struct dm_table *t)
 {
 	return __table_type_request_based(dm_table_get_type(t));
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 925ec1b15e75..a317dd884ba6 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -150,7 +150,8 @@ static void io_err_release_clone_rq(struct request *clone)
 
 static struct target_type error_target = {
 	.name = "error",
-	.version = {1, 3, 0},
+	.version = {1, 4, 0},
+	.features = DM_TARGET_WILDCARD,
 	.ctr  = io_err_ctr,
 	.dtr  = io_err_dtr,
 	.map  = io_err_map,
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 7edcf97dfa5a..53df2585571b 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -73,6 +73,7 @@ int dm_table_resume_targets(struct dm_table *t);
 int dm_table_any_congested(struct dm_table *t, int bdi_bits);
 unsigned dm_table_get_type(struct dm_table *t);
 struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
+struct dm_target *dm_table_get_wildcard_target(struct dm_table *t);
 bool dm_table_request_based(struct dm_table *t);
 bool dm_table_mq_request_based(struct dm_table *t);
 void dm_table_free_md_mempools(struct dm_table *t);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index ec1c61c87d89..87d50ecbc9df 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -189,6 +189,13 @@ struct target_type {
 #define DM_TARGET_IMMUTABLE		0x00000004
 #define dm_target_is_immutable(type)	((type)->features & DM_TARGET_IMMUTABLE)
 
+/*
+ * Indicates that a target may replace any target; even immutable targets.
+ * .map, .map_rq, .clone_and_map_rq and .release_clone_rq are all defined.
+ */
+#define DM_TARGET_WILDCARD		0x00000008
+#define dm_target_is_wildcard(type)	((type)->features & DM_TARGET_WILDCARD)
+
 /*
  * Some targets need to be sent the same WRITE bio severals times so
  * that they can send copies of it to different devices.  This function
-- 
cgit v1.2.3


From f9a61eb4e2471c56a63cd804c7474128138c38ac Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 22 Feb 2016 11:49:09 -0500
Subject: mbcache2: reimplement mbcache

Original mbcache was designed to have more features than what ext?
filesystems ended up using. It supported entry being in more hashes, it
had a home-grown rwlocking of each entry, and one cache could cache
entries from multiple filesystems. This genericity also resulted in more
complex locking, larger cache entries, and generally more code
complexity.

This is reimplementation of the mbcache functionality to exactly fit the
purpose ext? filesystems use it for. Cache entries are now considerably
smaller (7 instead of 13 longs), the code is considerably smaller as
well (414 vs 913 lines of code), and IMO also simpler. The new code is
also much more lightweight.

I have measured the speed using artificial xattr-bench benchmark, which
spawns P processes, each process sets xattr for F different files, and
the value of xattr is randomly chosen from a pool of V values. Averages
of runtimes for 5 runs for various combinations of parameters are below.
The first value in each cell is old mbache, the second value is the new
mbcache.

V=10
F\P	1		2		4		8		16		32		64
10	0.158,0.157	0.208,0.196	0.500,0.277	0.798,0.400	3.258,0.584	13.807,1.047	61.339,2.803
100	0.172,0.167	0.279,0.222	0.520,0.275	0.825,0.341	2.981,0.505	12.022,1.202	44.641,2.943
1000	0.185,0.174	0.297,0.239	0.445,0.283	0.767,0.340	2.329,0.480	6.342,1.198	16.440,3.888

V=100
F\P	1		2		4		8		16		32		64
10	0.162,0.153	0.200,0.186	0.362,0.257	0.671,0.496	1.433,0.943	3.801,1.345	7.938,2.501
100	0.153,0.160	0.221,0.199	0.404,0.264	0.945,0.379	1.556,0.485	3.761,1.156	7.901,2.484
1000	0.215,0.191	0.303,0.246	0.471,0.288	0.960,0.347	1.647,0.479	3.916,1.176	8.058,3.160

V=1000
F\P	1		2		4		8		16		32		64
10	0.151,0.129	0.210,0.163	0.326,0.245	0.685,0.521	1.284,0.859	3.087,2.251	6.451,4.801
100	0.154,0.153	0.211,0.191	0.276,0.282	0.687,0.506	1.202,0.877	3.259,1.954	8.738,2.887
1000	0.145,0.179	0.202,0.222	0.449,0.319	0.899,0.333	1.577,0.524	4.221,1.240	9.782,3.579

V=10000
F\P	1		2		4		8		16		32		64
10	0.161,0.154	0.198,0.190	0.296,0.256	0.662,0.480	1.192,0.818	2.989,2.200	6.362,4.746
100	0.176,0.174	0.236,0.203	0.326,0.255	0.696,0.511	1.183,0.855	4.205,3.444	19.510,17.760
1000	0.199,0.183	0.240,0.227	1.159,1.014	2.286,2.154	6.023,6.039	---,10.933	---,36.620

V=100000
F\P	1		2		4		8		16		32		64
10	0.171,0.162	0.204,0.198	0.285,0.230	0.692,0.500	1.225,0.881	2.990,2.243	6.379,4.771
100	0.151,0.171	0.220,0.210	0.295,0.255	0.720,0.518	1.226,0.844	3.423,2.831	19.234,17.544
1000	0.192,0.189	0.249,0.225	1.162,1.043	2.257,2.093	5.853,4.997	---,10.399	---,32.198

We see that the new code is faster in pretty much all the cases and
starting from 4 processes there are significant gains with the new code
resulting in upto 20-times shorter runtimes. Also for large numbers of
cached entries all values for the old code could not be measured as the
kernel started hitting softlockups and died before the test completed.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/Makefile              |   2 +-
 fs/mbcache2.c            | 359 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mbcache2.h |  50 +++++++
 3 files changed, 410 insertions(+), 1 deletion(-)
 create mode 100644 fs/mbcache2.c
 create mode 100644 include/linux/mbcache2.h

(limited to 'include/linux')

diff --git a/fs/Makefile b/fs/Makefile
index 79f522575cba..15b3d6c4e46a 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF)	+= compat_binfmt_elf.o
 obj-$(CONFIG_BINFMT_ELF_FDPIC)	+= binfmt_elf_fdpic.o
 obj-$(CONFIG_BINFMT_FLAT)	+= binfmt_flat.o
 
-obj-$(CONFIG_FS_MBCACHE)	+= mbcache.o
+obj-$(CONFIG_FS_MBCACHE)	+= mbcache.o mbcache2.o
 obj-$(CONFIG_FS_POSIX_ACL)	+= posix_acl.o
 obj-$(CONFIG_NFS_COMMON)	+= nfs_common/
 obj-$(CONFIG_COREDUMP)		+= coredump.o
diff --git a/fs/mbcache2.c b/fs/mbcache2.c
new file mode 100644
index 000000000000..5c3e1a8c38f6
--- /dev/null
+++ b/fs/mbcache2.c
@@ -0,0 +1,359 @@
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/list_bl.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mbcache2.h>
+
+/*
+ * Mbcache is a simple key-value store. Keys need not be unique, however
+ * key-value pairs are expected to be unique (we use this fact in
+ * mb2_cache_entry_delete_block()).
+ *
+ * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
+ * They use hash of a block contents as a key and block number as a value.
+ * That's why keys need not be unique (different xattr blocks may end up having
+ * the same hash). However block number always uniquely identifies a cache
+ * entry.
+ *
+ * We provide functions for creation and removal of entries, search by key,
+ * and a special "delete entry with given key-value pair" operation. Fixed
+ * size hash table is used for fast key lookups.
+ */
+
+struct mb2_cache {
+	/* Hash table of entries */
+	struct hlist_bl_head	*c_hash;
+	/* log2 of hash table size */
+	int			c_bucket_bits;
+	/* Protects c_lru_list, c_entry_count */
+	spinlock_t		c_lru_list_lock;
+	struct list_head	c_lru_list;
+	/* Number of entries in cache */
+	unsigned long		c_entry_count;
+	struct shrinker		c_shrink;
+};
+
+static struct kmem_cache *mb2_entry_cache;
+
+/*
+ * mb2_cache_entry_create - create entry in cache
+ * @cache - cache where the entry should be created
+ * @mask - gfp mask with which the entry should be allocated
+ * @key - key of the entry
+ * @block - block that contains data
+ *
+ * Creates entry in @cache with key @key and records that data is stored in
+ * block @block. The function returns -EBUSY if entry with the same key
+ * and for the same block already exists in cache. Otherwise 0 is returned.
+ */
+int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
+			   sector_t block)
+{
+	struct mb2_cache_entry *entry, *dup;
+	struct hlist_bl_node *dup_node;
+	struct hlist_bl_head *head;
+
+	entry = kmem_cache_alloc(mb2_entry_cache, mask);
+	if (!entry)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&entry->e_lru_list);
+	/* One ref for hash, one ref returned */
+	atomic_set(&entry->e_refcnt, 1);
+	entry->e_key = key;
+	entry->e_block = block;
+	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+	entry->e_hash_list_head = head;
+	hlist_bl_lock(head);
+	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
+		if (dup->e_key == key && dup->e_block == block) {
+			hlist_bl_unlock(head);
+			kmem_cache_free(mb2_entry_cache, entry);
+			return -EBUSY;
+		}
+	}
+	hlist_bl_add_head(&entry->e_hash_list, head);
+	hlist_bl_unlock(head);
+
+	spin_lock(&cache->c_lru_list_lock);
+	list_add_tail(&entry->e_lru_list, &cache->c_lru_list);
+	/* Grab ref for LRU list */
+	atomic_inc(&entry->e_refcnt);
+	cache->c_entry_count++;
+	spin_unlock(&cache->c_lru_list_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(mb2_cache_entry_create);
+
+void __mb2_cache_entry_free(struct mb2_cache_entry *entry)
+{
+	kmem_cache_free(mb2_entry_cache, entry);
+}
+EXPORT_SYMBOL(__mb2_cache_entry_free);
+
+static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache,
+					    struct mb2_cache_entry *entry,
+					    u32 key)
+{
+	struct mb2_cache_entry *old_entry = entry;
+	struct hlist_bl_node *node;
+	struct hlist_bl_head *head;
+
+	if (entry)
+		head = entry->e_hash_list_head;
+	else
+		head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+	hlist_bl_lock(head);
+	if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
+		node = entry->e_hash_list.next;
+	else
+		node = hlist_bl_first(head);
+	while (node) {
+		entry = hlist_bl_entry(node, struct mb2_cache_entry,
+				       e_hash_list);
+		if (entry->e_key == key) {
+			atomic_inc(&entry->e_refcnt);
+			goto out;
+		}
+		node = node->next;
+	}
+	entry = NULL;
+out:
+	hlist_bl_unlock(head);
+	if (old_entry)
+		mb2_cache_entry_put(cache, old_entry);
+
+	return entry;
+}
+
+/*
+ * mb2_cache_entry_find_first - find the first entry in cache with given key
+ * @cache: cache where we should search
+ * @key: key to look for
+ *
+ * Search in @cache for entry with key @key. Grabs reference to the first
+ * entry found and returns the entry.
+ */
+struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache,
+						   u32 key)
+{
+	return __entry_find(cache, NULL, key);
+}
+EXPORT_SYMBOL(mb2_cache_entry_find_first);
+
+/*
+ * mb2_cache_entry_find_next - find next entry in cache with the same
+ * @cache: cache where we should search
+ * @entry: entry to start search from
+ *
+ * Finds next entry in the hash chain which has the same key as @entry.
+ * If @entry is unhashed (which can happen when deletion of entry races
+ * with the search), finds the first entry in the hash chain. The function
+ * drops reference to @entry and returns with a reference to the found entry.
+ */
+struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache,
+						  struct mb2_cache_entry *entry)
+{
+	return __entry_find(cache, entry, entry->e_key);
+}
+EXPORT_SYMBOL(mb2_cache_entry_find_next);
+
+/* mb2_cache_entry_delete_block - remove information about block from cache
+ * @cache - cache we work with
+ * @key - key of the entry to remove
+ * @block - block containing data for @key
+ *
+ * Remove entry from cache @cache with key @key with data stored in @block.
+ */
+void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
+				  sector_t block)
+{
+	struct hlist_bl_node *node;
+	struct hlist_bl_head *head;
+	struct mb2_cache_entry *entry;
+
+	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+	hlist_bl_lock(head);
+	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+		if (entry->e_key == key && entry->e_block == block) {
+			/* We keep hash list reference to keep entry alive */
+			hlist_bl_del_init(&entry->e_hash_list);
+			hlist_bl_unlock(head);
+			spin_lock(&cache->c_lru_list_lock);
+			if (!list_empty(&entry->e_lru_list)) {
+				list_del_init(&entry->e_lru_list);
+				cache->c_entry_count--;
+				atomic_dec(&entry->e_refcnt);
+			}
+			spin_unlock(&cache->c_lru_list_lock);
+			mb2_cache_entry_put(cache, entry);
+			return;
+		}
+	}
+	hlist_bl_unlock(head);
+}
+EXPORT_SYMBOL(mb2_cache_entry_delete_block);
+
+/* mb2_cache_entry_touch - cache entry got used
+ * @cache - cache the entry belongs to
+ * @entry - entry that got used
+ *
+ * Move entry in lru list to reflect the fact that it was used.
+ */
+void mb2_cache_entry_touch(struct mb2_cache *cache,
+			   struct mb2_cache_entry *entry)
+{
+	spin_lock(&cache->c_lru_list_lock);
+	if (!list_empty(&entry->e_lru_list))
+		list_move_tail(&cache->c_lru_list, &entry->e_lru_list);
+	spin_unlock(&cache->c_lru_list_lock);
+}
+EXPORT_SYMBOL(mb2_cache_entry_touch);
+
+static unsigned long mb2_cache_count(struct shrinker *shrink,
+				     struct shrink_control *sc)
+{
+	struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
+					       c_shrink);
+
+	return cache->c_entry_count;
+}
+
+/* Shrink number of entries in cache */
+static unsigned long mb2_cache_scan(struct shrinker *shrink,
+				    struct shrink_control *sc)
+{
+	int nr_to_scan = sc->nr_to_scan;
+	struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
+					      c_shrink);
+	struct mb2_cache_entry *entry;
+	struct hlist_bl_head *head;
+	unsigned int shrunk = 0;
+
+	spin_lock(&cache->c_lru_list_lock);
+	while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) {
+		entry = list_first_entry(&cache->c_lru_list,
+					 struct mb2_cache_entry, e_lru_list);
+		list_del_init(&entry->e_lru_list);
+		cache->c_entry_count--;
+		/*
+		 * We keep LRU list reference so that entry doesn't go away
+		 * from under us.
+		 */
+		spin_unlock(&cache->c_lru_list_lock);
+		head = entry->e_hash_list_head;
+		hlist_bl_lock(head);
+		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+			hlist_bl_del_init(&entry->e_hash_list);
+			atomic_dec(&entry->e_refcnt);
+		}
+		hlist_bl_unlock(head);
+		if (mb2_cache_entry_put(cache, entry))
+			shrunk++;
+		cond_resched();
+		spin_lock(&cache->c_lru_list_lock);
+	}
+	spin_unlock(&cache->c_lru_list_lock);
+
+	return shrunk;
+}
+
+/*
+ * mb2_cache_create - create cache
+ * @bucket_bits: log2 of the hash table size
+ *
+ * Create cache for keys with 2^bucket_bits hash entries.
+ */
+struct mb2_cache *mb2_cache_create(int bucket_bits)
+{
+	struct mb2_cache *cache;
+	int bucket_count = 1 << bucket_bits;
+	int i;
+
+	if (!try_module_get(THIS_MODULE))
+		return NULL;
+
+	cache = kzalloc(sizeof(struct mb2_cache), GFP_KERNEL);
+	if (!cache)
+		goto err_out;
+	cache->c_bucket_bits = bucket_bits;
+	INIT_LIST_HEAD(&cache->c_lru_list);
+	spin_lock_init(&cache->c_lru_list_lock);
+	cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
+				GFP_KERNEL);
+	if (!cache->c_hash) {
+		kfree(cache);
+		goto err_out;
+	}
+	for (i = 0; i < bucket_count; i++)
+		INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
+
+	cache->c_shrink.count_objects = mb2_cache_count;
+	cache->c_shrink.scan_objects = mb2_cache_scan;
+	cache->c_shrink.seeks = DEFAULT_SEEKS;
+	register_shrinker(&cache->c_shrink);
+
+	return cache;
+
+err_out:
+	module_put(THIS_MODULE);
+	return NULL;
+}
+EXPORT_SYMBOL(mb2_cache_create);
+
+/*
+ * mb2_cache_destroy - destroy cache
+ * @cache: the cache to destroy
+ *
+ * Free all entries in cache and cache itself. Caller must make sure nobody
+ * (except shrinker) can reach @cache when calling this.
+ */
+void mb2_cache_destroy(struct mb2_cache *cache)
+{
+	struct mb2_cache_entry *entry, *next;
+
+	unregister_shrinker(&cache->c_shrink);
+
+	/*
+	 * We don't bother with any locking. Cache must not be used at this
+	 * point.
+	 */
+	list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) {
+		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+			hlist_bl_del_init(&entry->e_hash_list);
+			atomic_dec(&entry->e_refcnt);
+		} else
+			WARN_ON(1);
+		list_del(&entry->e_lru_list);
+		WARN_ON(atomic_read(&entry->e_refcnt) != 1);
+		mb2_cache_entry_put(cache, entry);
+	}
+	kfree(cache->c_hash);
+	kfree(cache);
+	module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL(mb2_cache_destroy);
+
+static int __init mb2cache_init(void)
+{
+	mb2_entry_cache = kmem_cache_create("mbcache",
+				sizeof(struct mb2_cache_entry), 0,
+				SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
+	BUG_ON(!mb2_entry_cache);
+	return 0;
+}
+
+static void __exit mb2cache_exit(void)
+{
+	kmem_cache_destroy(mb2_entry_cache);
+}
+
+module_init(mb2cache_init)
+module_exit(mb2cache_exit)
+
+MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
+MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mbcache2.h b/include/linux/mbcache2.h
new file mode 100644
index 000000000000..b6f160ff2533
--- /dev/null
+++ b/include/linux/mbcache2.h
@@ -0,0 +1,50 @@
+#ifndef _LINUX_MB2CACHE_H
+#define _LINUX_MB2CACHE_H
+
+#include <linux/hash.h>
+#include <linux/list_bl.h>
+#include <linux/list.h>
+#include <linux/atomic.h>
+#include <linux/fs.h>
+
+struct mb2_cache;
+
+struct mb2_cache_entry {
+	/* LRU list - protected by cache->c_lru_list_lock */
+	struct list_head	e_lru_list;
+	/* Hash table list - protected by bitlock in e_hash_list_head */
+	struct hlist_bl_node	e_hash_list;
+	atomic_t		e_refcnt;
+	/* Key in hash - stable during lifetime of the entry */
+	u32			e_key;
+	/* Block number of hashed block - stable during lifetime of the entry */
+	sector_t		e_block;
+	/* Head of hash list (for list bit lock) - stable */
+	struct hlist_bl_head	*e_hash_list_head;
+};
+
+struct mb2_cache *mb2_cache_create(int bucket_bits);
+void mb2_cache_destroy(struct mb2_cache *cache);
+
+int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
+			   sector_t block);
+void __mb2_cache_entry_free(struct mb2_cache_entry *entry);
+static inline int mb2_cache_entry_put(struct mb2_cache *cache,
+				      struct mb2_cache_entry *entry)
+{
+	if (!atomic_dec_and_test(&entry->e_refcnt))
+		return 0;
+	__mb2_cache_entry_free(entry);
+	return 1;
+}
+
+void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
+				  sector_t block);
+struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache,
+						   u32 key);
+struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache,
+						  struct mb2_cache_entry *entry);
+void mb2_cache_entry_touch(struct mb2_cache *cache,
+			   struct mb2_cache_entry *entry);
+
+#endif	/* _LINUX_MB2CACHE_H */
-- 
cgit v1.2.3


From ecd1e64412d5242b8afdef58a714bab3c5464f79 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 22 Feb 2016 12:21:14 -0500
Subject: mbcache: remove mbcache

Both ext2 and ext4 are now converted to mbcache2. Remove the old mbcache
code.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/Makefile             |   2 +-
 fs/mbcache.c            | 858 ------------------------------------------------
 include/linux/mbcache.h |  55 ----
 3 files changed, 1 insertion(+), 914 deletions(-)
 delete mode 100644 fs/mbcache.c
 delete mode 100644 include/linux/mbcache.h

(limited to 'include/linux')

diff --git a/fs/Makefile b/fs/Makefile
index 15b3d6c4e46a..59b844007fbc 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF)	+= compat_binfmt_elf.o
 obj-$(CONFIG_BINFMT_ELF_FDPIC)	+= binfmt_elf_fdpic.o
 obj-$(CONFIG_BINFMT_FLAT)	+= binfmt_flat.o
 
-obj-$(CONFIG_FS_MBCACHE)	+= mbcache.o mbcache2.o
+obj-$(CONFIG_FS_MBCACHE)	+= mbcache2.o
 obj-$(CONFIG_FS_POSIX_ACL)	+= posix_acl.o
 obj-$(CONFIG_NFS_COMMON)	+= nfs_common/
 obj-$(CONFIG_COREDUMP)		+= coredump.o
diff --git a/fs/mbcache.c b/fs/mbcache.c
deleted file mode 100644
index 187477ded6b3..000000000000
--- a/fs/mbcache.c
+++ /dev/null
@@ -1,858 +0,0 @@
-/*
- * linux/fs/mbcache.c
- * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
- */
-
-/*
- * Filesystem Meta Information Block Cache (mbcache)
- *
- * The mbcache caches blocks of block devices that need to be located
- * by their device/block number, as well as by other criteria (such
- * as the block's contents).
- *
- * There can only be one cache entry in a cache per device and block number.
- * Additional indexes need not be unique in this sense. The number of
- * additional indexes (=other criteria) can be hardwired at compile time
- * or specified at cache create time.
- *
- * Each cache entry is of fixed size. An entry may be `valid' or `invalid'
- * in the cache. A valid entry is in the main hash tables of the cache,
- * and may also be in the lru list. An invalid entry is not in any hashes
- * or lists.
- *
- * A valid cache entry is only in the lru list if no handles refer to it.
- * Invalid cache entries will be freed when the last handle to the cache
- * entry is released. Entries that cannot be freed immediately are put
- * back on the lru list.
- */
-
-/*
- * Lock descriptions and usage:
- *
- * Each hash chain of both the block and index hash tables now contains
- * a built-in lock used to serialize accesses to the hash chain.
- *
- * Accesses to global data structures mb_cache_list and mb_cache_lru_list
- * are serialized via the global spinlock mb_cache_spinlock.
- *
- * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize
- * accesses to its local data, such as e_used and e_queued.
- *
- * Lock ordering:
- *
- * Each block hash chain's lock has the highest lock order, followed by an
- * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's
- * lock), and mb_cach_spinlock, with the lowest order.  While holding
- * either a block or index hash chain lock, a thread can acquire an
- * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock.
- *
- * Synchronization:
- *
- * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and
- * index hash chian, it needs to lock the corresponding hash chain.  For each
- * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to
- * prevent either any simultaneous release or free on the entry and also
- * to serialize accesses to either the e_used or e_queued member of the entry.
- *
- * To avoid having a dangling reference to an already freed
- * mb_cache_entry, an mb_cache_entry is only freed when it is not on a
- * block hash chain and also no longer being referenced, both e_used,
- * and e_queued are 0's.  When an mb_cache_entry is explicitly freed it is
- * first removed from a block hash chain.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <linux/hash.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/list_bl.h>
-#include <linux/mbcache.h>
-#include <linux/init.h>
-#include <linux/blockgroup_lock.h>
-#include <linux/log2.h>
-
-#ifdef MB_CACHE_DEBUG
-# define mb_debug(f...) do { \
-		printk(KERN_DEBUG f); \
-		printk("\n"); \
-	} while (0)
-#define mb_assert(c) do { if (!(c)) \
-		printk(KERN_ERR "assertion " #c " failed\n"); \
-	} while(0)
-#else
-# define mb_debug(f...) do { } while(0)
-# define mb_assert(c) do { } while(0)
-#endif
-#define mb_error(f...) do { \
-		printk(KERN_ERR f); \
-		printk("\n"); \
-	} while(0)
-
-#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
-
-#define MB_CACHE_ENTRY_LOCK_BITS	ilog2(NR_BG_LOCKS)
-#define	MB_CACHE_ENTRY_LOCK_INDEX(ce)			\
-	(hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
-
-static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
-static struct blockgroup_lock *mb_cache_bg_lock;
-static struct kmem_cache *mb_cache_kmem_cache;
-
-MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
-MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
-MODULE_LICENSE("GPL");
-
-EXPORT_SYMBOL(mb_cache_create);
-EXPORT_SYMBOL(mb_cache_shrink);
-EXPORT_SYMBOL(mb_cache_destroy);
-EXPORT_SYMBOL(mb_cache_entry_alloc);
-EXPORT_SYMBOL(mb_cache_entry_insert);
-EXPORT_SYMBOL(mb_cache_entry_release);
-EXPORT_SYMBOL(mb_cache_entry_free);
-EXPORT_SYMBOL(mb_cache_entry_get);
-#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
-EXPORT_SYMBOL(mb_cache_entry_find_first);
-EXPORT_SYMBOL(mb_cache_entry_find_next);
-#endif
-
-/*
- * Global data: list of all mbcache's, lru list, and a spinlock for
- * accessing cache data structures on SMP machines. The lru list is
- * global across all mbcaches.
- */
-
-static LIST_HEAD(mb_cache_list);
-static LIST_HEAD(mb_cache_lru_list);
-static DEFINE_SPINLOCK(mb_cache_spinlock);
-
-static inline void
-__spin_lock_mb_cache_entry(struct mb_cache_entry *ce)
-{
-	spin_lock(bgl_lock_ptr(mb_cache_bg_lock,
-		MB_CACHE_ENTRY_LOCK_INDEX(ce)));
-}
-
-static inline void
-__spin_unlock_mb_cache_entry(struct mb_cache_entry *ce)
-{
-	spin_unlock(bgl_lock_ptr(mb_cache_bg_lock,
-		MB_CACHE_ENTRY_LOCK_INDEX(ce)));
-}
-
-static inline int
-__mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce)
-{
-	return !hlist_bl_unhashed(&ce->e_block_list);
-}
-
-
-static inline void
-__mb_cache_entry_unhash_block(struct mb_cache_entry *ce)
-{
-	if (__mb_cache_entry_is_block_hashed(ce))
-		hlist_bl_del_init(&ce->e_block_list);
-}
-
-static inline int
-__mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce)
-{
-	return !hlist_bl_unhashed(&ce->e_index.o_list);
-}
-
-static inline void
-__mb_cache_entry_unhash_index(struct mb_cache_entry *ce)
-{
-	if (__mb_cache_entry_is_index_hashed(ce))
-		hlist_bl_del_init(&ce->e_index.o_list);
-}
-
-/*
- * __mb_cache_entry_unhash_unlock()
- *
- * This function is called to unhash both the block and index hash
- * chain.
- * It assumes both the block and index hash chain is locked upon entry.
- * It also unlock both hash chains both exit
- */
-static inline void
-__mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce)
-{
-	__mb_cache_entry_unhash_index(ce);
-	hlist_bl_unlock(ce->e_index_hash_p);
-	__mb_cache_entry_unhash_block(ce);
-	hlist_bl_unlock(ce->e_block_hash_p);
-}
-
-static void
-__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
-{
-	struct mb_cache *cache = ce->e_cache;
-
-	mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)));
-	kmem_cache_free(cache->c_entry_cache, ce);
-	atomic_dec(&cache->c_entry_count);
-}
-
-static void
-__mb_cache_entry_release(struct mb_cache_entry *ce)
-{
-	/* First lock the entry to serialize access to its local data. */
-	__spin_lock_mb_cache_entry(ce);
-	/* Wake up all processes queuing for this cache entry. */
-	if (ce->e_queued)
-		wake_up_all(&mb_cache_queue);
-	if (ce->e_used >= MB_CACHE_WRITER)
-		ce->e_used -= MB_CACHE_WRITER;
-	/*
-	 * Make sure that all cache entries on lru_list have
-	 * both e_used and e_qued of 0s.
-	 */
-	ce->e_used--;
-	if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) {
-		if (!__mb_cache_entry_is_block_hashed(ce)) {
-			__spin_unlock_mb_cache_entry(ce);
-			goto forget;
-		}
-		/*
-		 * Need access to lru list, first drop entry lock,
-		 * then reacquire the lock in the proper order.
-		 */
-		spin_lock(&mb_cache_spinlock);
-		if (list_empty(&ce->e_lru_list))
-			list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
-		spin_unlock(&mb_cache_spinlock);
-	}
-	__spin_unlock_mb_cache_entry(ce);
-	return;
-forget:
-	mb_assert(list_empty(&ce->e_lru_list));
-	__mb_cache_entry_forget(ce, GFP_KERNEL);
-}
-
-/*
- * mb_cache_shrink_scan()  memory pressure callback
- *
- * This function is called by the kernel memory management when memory
- * gets low.
- *
- * @shrink: (ignored)
- * @sc: shrink_control passed from reclaim
- *
- * Returns the number of objects freed.
- */
-static unsigned long
-mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
-{
-	LIST_HEAD(free_list);
-	struct mb_cache_entry *entry, *tmp;
-	int nr_to_scan = sc->nr_to_scan;
-	gfp_t gfp_mask = sc->gfp_mask;
-	unsigned long freed = 0;
-
-	mb_debug("trying to free %d entries", nr_to_scan);
-	spin_lock(&mb_cache_spinlock);
-	while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) {
-		struct mb_cache_entry *ce =
-			list_entry(mb_cache_lru_list.next,
-				struct mb_cache_entry, e_lru_list);
-		list_del_init(&ce->e_lru_list);
-		if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))
-			continue;
-		spin_unlock(&mb_cache_spinlock);
-		/* Prevent any find or get operation on the entry */
-		hlist_bl_lock(ce->e_block_hash_p);
-		hlist_bl_lock(ce->e_index_hash_p);
-		/* Ignore if it is touched by a find/get */
-		if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) ||
-			!list_empty(&ce->e_lru_list)) {
-			hlist_bl_unlock(ce->e_index_hash_p);
-			hlist_bl_unlock(ce->e_block_hash_p);
-			spin_lock(&mb_cache_spinlock);
-			continue;
-		}
-		__mb_cache_entry_unhash_unlock(ce);
-		list_add_tail(&ce->e_lru_list, &free_list);
-		spin_lock(&mb_cache_spinlock);
-	}
-	spin_unlock(&mb_cache_spinlock);
-
-	list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
-		__mb_cache_entry_forget(entry, gfp_mask);
-		freed++;
-	}
-	return freed;
-}
-
-static unsigned long
-mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
-{
-	struct mb_cache *cache;
-	unsigned long count = 0;
-
-	spin_lock(&mb_cache_spinlock);
-	list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
-		mb_debug("cache %s (%d)", cache->c_name,
-			  atomic_read(&cache->c_entry_count));
-		count += atomic_read(&cache->c_entry_count);
-	}
-	spin_unlock(&mb_cache_spinlock);
-
-	return vfs_pressure_ratio(count);
-}
-
-static struct shrinker mb_cache_shrinker = {
-	.count_objects = mb_cache_shrink_count,
-	.scan_objects = mb_cache_shrink_scan,
-	.seeks = DEFAULT_SEEKS,
-};
-
-/*
- * mb_cache_create()  create a new cache
- *
- * All entries in one cache are equal size. Cache entries may be from
- * multiple devices. If this is the first mbcache created, registers
- * the cache with kernel memory management. Returns NULL if no more
- * memory was available.
- *
- * @name: name of the cache (informal)
- * @bucket_bits: log2(number of hash buckets)
- */
-struct mb_cache *
-mb_cache_create(const char *name, int bucket_bits)
-{
-	int n, bucket_count = 1 << bucket_bits;
-	struct mb_cache *cache = NULL;
-
-	if (!mb_cache_bg_lock) {
-		mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock),
-			GFP_KERNEL);
-		if (!mb_cache_bg_lock)
-			return NULL;
-		bgl_lock_init(mb_cache_bg_lock);
-	}
-
-	cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
-	if (!cache)
-		return NULL;
-	cache->c_name = name;
-	atomic_set(&cache->c_entry_count, 0);
-	cache->c_bucket_bits = bucket_bits;
-	cache->c_block_hash = kmalloc(bucket_count *
-		sizeof(struct hlist_bl_head), GFP_KERNEL);
-	if (!cache->c_block_hash)
-		goto fail;
-	for (n=0; n<bucket_count; n++)
-		INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]);
-	cache->c_index_hash = kmalloc(bucket_count *
-		sizeof(struct hlist_bl_head), GFP_KERNEL);
-	if (!cache->c_index_hash)
-		goto fail;
-	for (n=0; n<bucket_count; n++)
-		INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]);
-	if (!mb_cache_kmem_cache) {
-		mb_cache_kmem_cache = kmem_cache_create(name,
-			sizeof(struct mb_cache_entry), 0,
-			SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
-		if (!mb_cache_kmem_cache)
-			goto fail2;
-	}
-	cache->c_entry_cache = mb_cache_kmem_cache;
-
-	/*
-	 * Set an upper limit on the number of cache entries so that the hash
-	 * chains won't grow too long.
-	 */
-	cache->c_max_entries = bucket_count << 4;
-
-	spin_lock(&mb_cache_spinlock);
-	list_add(&cache->c_cache_list, &mb_cache_list);
-	spin_unlock(&mb_cache_spinlock);
-	return cache;
-
-fail2:
-	kfree(cache->c_index_hash);
-
-fail:
-	kfree(cache->c_block_hash);
-	kfree(cache);
-	return NULL;
-}
-
-
-/*
- * mb_cache_shrink()
- *
- * Removes all cache entries of a device from the cache. All cache entries
- * currently in use cannot be freed, and thus remain in the cache. All others
- * are freed.
- *
- * @bdev: which device's cache entries to shrink
- */
-void
-mb_cache_shrink(struct block_device *bdev)
-{
-	LIST_HEAD(free_list);
-	struct list_head *l;
-	struct mb_cache_entry *ce, *tmp;
-
-	l = &mb_cache_lru_list;
-	spin_lock(&mb_cache_spinlock);
-	while (!list_is_last(l, &mb_cache_lru_list)) {
-		l = l->next;
-		ce = list_entry(l, struct mb_cache_entry, e_lru_list);
-		if (ce->e_bdev == bdev) {
-			list_del_init(&ce->e_lru_list);
-			if (ce->e_used || ce->e_queued ||
-				atomic_read(&ce->e_refcnt))
-				continue;
-			spin_unlock(&mb_cache_spinlock);
-			/*
-			 * Prevent any find or get operation on the entry.
-			 */
-			hlist_bl_lock(ce->e_block_hash_p);
-			hlist_bl_lock(ce->e_index_hash_p);
-			/* Ignore if it is touched by a find/get */
-			if (ce->e_used || ce->e_queued ||
-				atomic_read(&ce->e_refcnt) ||
-				!list_empty(&ce->e_lru_list)) {
-				hlist_bl_unlock(ce->e_index_hash_p);
-				hlist_bl_unlock(ce->e_block_hash_p);
-				l = &mb_cache_lru_list;
-				spin_lock(&mb_cache_spinlock);
-				continue;
-			}
-			__mb_cache_entry_unhash_unlock(ce);
-			mb_assert(!(ce->e_used || ce->e_queued ||
-				atomic_read(&ce->e_refcnt)));
-			list_add_tail(&ce->e_lru_list, &free_list);
-			l = &mb_cache_lru_list;
-			spin_lock(&mb_cache_spinlock);
-		}
-	}
-	spin_unlock(&mb_cache_spinlock);
-
-	list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
-		__mb_cache_entry_forget(ce, GFP_KERNEL);
-	}
-}
-
-
-/*
- * mb_cache_destroy()
- *
- * Shrinks the cache to its minimum possible size (hopefully 0 entries),
- * and then destroys it. If this was the last mbcache, un-registers the
- * mbcache from kernel memory management.
- */
-void
-mb_cache_destroy(struct mb_cache *cache)
-{
-	LIST_HEAD(free_list);
-	struct mb_cache_entry *ce, *tmp;
-
-	spin_lock(&mb_cache_spinlock);
-	list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) {
-		if (ce->e_cache == cache)
-			list_move_tail(&ce->e_lru_list, &free_list);
-	}
-	list_del(&cache->c_cache_list);
-	spin_unlock(&mb_cache_spinlock);
-
-	list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
-		list_del_init(&ce->e_lru_list);
-		/*
-		 * Prevent any find or get operation on the entry.
-		 */
-		hlist_bl_lock(ce->e_block_hash_p);
-		hlist_bl_lock(ce->e_index_hash_p);
-		mb_assert(!(ce->e_used || ce->e_queued ||
-			atomic_read(&ce->e_refcnt)));
-		__mb_cache_entry_unhash_unlock(ce);
-		__mb_cache_entry_forget(ce, GFP_KERNEL);
-	}
-
-	if (atomic_read(&cache->c_entry_count) > 0) {
-		mb_error("cache %s: %d orphaned entries",
-			  cache->c_name,
-			  atomic_read(&cache->c_entry_count));
-	}
-
-	if (list_empty(&mb_cache_list)) {
-		kmem_cache_destroy(mb_cache_kmem_cache);
-		mb_cache_kmem_cache = NULL;
-	}
-	kfree(cache->c_index_hash);
-	kfree(cache->c_block_hash);
-	kfree(cache);
-}
-
-/*
- * mb_cache_entry_alloc()
- *
- * Allocates a new cache entry. The new entry will not be valid initially,
- * and thus cannot be looked up yet. It should be filled with data, and
- * then inserted into the cache using mb_cache_entry_insert(). Returns NULL
- * if no more memory was available.
- */
-struct mb_cache_entry *
-mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
-{
-	struct mb_cache_entry *ce;
-
-	if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
-		struct list_head *l;
-
-		l = &mb_cache_lru_list;
-		spin_lock(&mb_cache_spinlock);
-		while (!list_is_last(l, &mb_cache_lru_list)) {
-			l = l->next;
-			ce = list_entry(l, struct mb_cache_entry, e_lru_list);
-			if (ce->e_cache == cache) {
-				list_del_init(&ce->e_lru_list);
-				if (ce->e_used || ce->e_queued ||
-					atomic_read(&ce->e_refcnt))
-					continue;
-				spin_unlock(&mb_cache_spinlock);
-				/*
-				 * Prevent any find or get operation on the
-				 * entry.
-				 */
-				hlist_bl_lock(ce->e_block_hash_p);
-				hlist_bl_lock(ce->e_index_hash_p);
-				/* Ignore if it is touched by a find/get */
-				if (ce->e_used || ce->e_queued ||
-					atomic_read(&ce->e_refcnt) ||
-					!list_empty(&ce->e_lru_list)) {
-					hlist_bl_unlock(ce->e_index_hash_p);
-					hlist_bl_unlock(ce->e_block_hash_p);
-					l = &mb_cache_lru_list;
-					spin_lock(&mb_cache_spinlock);
-					continue;
-				}
-				mb_assert(list_empty(&ce->e_lru_list));
-				mb_assert(!(ce->e_used || ce->e_queued ||
-					atomic_read(&ce->e_refcnt)));
-				__mb_cache_entry_unhash_unlock(ce);
-				goto found;
-			}
-		}
-		spin_unlock(&mb_cache_spinlock);
-	}
-
-	ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
-	if (!ce)
-		return NULL;
-	atomic_inc(&cache->c_entry_count);
-	INIT_LIST_HEAD(&ce->e_lru_list);
-	INIT_HLIST_BL_NODE(&ce->e_block_list);
-	INIT_HLIST_BL_NODE(&ce->e_index.o_list);
-	ce->e_cache = cache;
-	ce->e_queued = 0;
-	atomic_set(&ce->e_refcnt, 0);
-found:
-	ce->e_block_hash_p = &cache->c_block_hash[0];
-	ce->e_index_hash_p = &cache->c_index_hash[0];
-	ce->e_used = 1 + MB_CACHE_WRITER;
-	return ce;
-}
-
-
-/*
- * mb_cache_entry_insert()
- *
- * Inserts an entry that was allocated using mb_cache_entry_alloc() into
- * the cache. After this, the cache entry can be looked up, but is not yet
- * in the lru list as the caller still holds a handle to it. Returns 0 on
- * success, or -EBUSY if a cache entry for that device + inode exists
- * already (this may happen after a failed lookup, but when another process
- * has inserted the same cache entry in the meantime).
- *
- * @bdev: device the cache entry belongs to
- * @block: block number
- * @key: lookup key
- */
-int
-mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
-		      sector_t block, unsigned int key)
-{
-	struct mb_cache *cache = ce->e_cache;
-	unsigned int bucket;
-	struct hlist_bl_node *l;
-	struct hlist_bl_head *block_hash_p;
-	struct hlist_bl_head *index_hash_p;
-	struct mb_cache_entry *lce;
-
-	mb_assert(ce);
-	bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 
-			   cache->c_bucket_bits);
-	block_hash_p = &cache->c_block_hash[bucket];
-	hlist_bl_lock(block_hash_p);
-	hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) {
-		if (lce->e_bdev == bdev && lce->e_block == block) {
-			hlist_bl_unlock(block_hash_p);
-			return -EBUSY;
-		}
-	}
-	mb_assert(!__mb_cache_entry_is_block_hashed(ce));
-	__mb_cache_entry_unhash_block(ce);
-	__mb_cache_entry_unhash_index(ce);
-	ce->e_bdev = bdev;
-	ce->e_block = block;
-	ce->e_block_hash_p = block_hash_p;
-	ce->e_index.o_key = key;
-	hlist_bl_add_head(&ce->e_block_list, block_hash_p);
-	hlist_bl_unlock(block_hash_p);
-	bucket = hash_long(key, cache->c_bucket_bits);
-	index_hash_p = &cache->c_index_hash[bucket];
-	hlist_bl_lock(index_hash_p);
-	ce->e_index_hash_p = index_hash_p;
-	hlist_bl_add_head(&ce->e_index.o_list, index_hash_p);
-	hlist_bl_unlock(index_hash_p);
-	return 0;
-}
-
-
-/*
- * mb_cache_entry_release()
- *
- * Release a handle to a cache entry. When the last handle to a cache entry
- * is released it is either freed (if it is invalid) or otherwise inserted
- * in to the lru list.
- */
-void
-mb_cache_entry_release(struct mb_cache_entry *ce)
-{
-	__mb_cache_entry_release(ce);
-}
-
-
-/*
- * mb_cache_entry_free()
- *
- */
-void
-mb_cache_entry_free(struct mb_cache_entry *ce)
-{
-	mb_assert(ce);
-	mb_assert(list_empty(&ce->e_lru_list));
-	hlist_bl_lock(ce->e_index_hash_p);
-	__mb_cache_entry_unhash_index(ce);
-	hlist_bl_unlock(ce->e_index_hash_p);
-	hlist_bl_lock(ce->e_block_hash_p);
-	__mb_cache_entry_unhash_block(ce);
-	hlist_bl_unlock(ce->e_block_hash_p);
-	__mb_cache_entry_release(ce);
-}
-
-
-/*
- * mb_cache_entry_get()
- *
- * Get a cache entry  by device / block number. (There can only be one entry
- * in the cache per device and block.) Returns NULL if no such cache entry
- * exists. The returned cache entry is locked for exclusive access ("single
- * writer").
- */
-struct mb_cache_entry *
-mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
-		   sector_t block)
-{
-	unsigned int bucket;
-	struct hlist_bl_node *l;
-	struct mb_cache_entry *ce;
-	struct hlist_bl_head *block_hash_p;
-
-	bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
-			   cache->c_bucket_bits);
-	block_hash_p = &cache->c_block_hash[bucket];
-	/* First serialize access to the block corresponding hash chain. */
-	hlist_bl_lock(block_hash_p);
-	hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) {
-		mb_assert(ce->e_block_hash_p == block_hash_p);
-		if (ce->e_bdev == bdev && ce->e_block == block) {
-			/*
-			 * Prevent a free from removing the entry.
-			 */
-			atomic_inc(&ce->e_refcnt);
-			hlist_bl_unlock(block_hash_p);
-			__spin_lock_mb_cache_entry(ce);
-			atomic_dec(&ce->e_refcnt);
-			if (ce->e_used > 0) {
-				DEFINE_WAIT(wait);
-				while (ce->e_used > 0) {
-					ce->e_queued++;
-					prepare_to_wait(&mb_cache_queue, &wait,
-							TASK_UNINTERRUPTIBLE);
-					__spin_unlock_mb_cache_entry(ce);
-					schedule();
-					__spin_lock_mb_cache_entry(ce);
-					ce->e_queued--;
-				}
-				finish_wait(&mb_cache_queue, &wait);
-			}
-			ce->e_used += 1 + MB_CACHE_WRITER;
-			__spin_unlock_mb_cache_entry(ce);
-
-			if (!list_empty(&ce->e_lru_list)) {
-				spin_lock(&mb_cache_spinlock);
-				list_del_init(&ce->e_lru_list);
-				spin_unlock(&mb_cache_spinlock);
-			}
-			if (!__mb_cache_entry_is_block_hashed(ce)) {
-				__mb_cache_entry_release(ce);
-				return NULL;
-			}
-			return ce;
-		}
-	}
-	hlist_bl_unlock(block_hash_p);
-	return NULL;
-}
-
-#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
-
-static struct mb_cache_entry *
-__mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head,
-		      struct block_device *bdev, unsigned int key)
-{
-
-	/* The index hash chain is alredy acquire by caller. */
-	while (l != NULL) {
-		struct mb_cache_entry *ce =
-			hlist_bl_entry(l, struct mb_cache_entry,
-				e_index.o_list);
-		mb_assert(ce->e_index_hash_p == head);
-		if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
-			/*
-			 * Prevent a free from removing the entry.
-			 */
-			atomic_inc(&ce->e_refcnt);
-			hlist_bl_unlock(head);
-			__spin_lock_mb_cache_entry(ce);
-			atomic_dec(&ce->e_refcnt);
-			ce->e_used++;
-			/* Incrementing before holding the lock gives readers
-			   priority over writers. */
-			if (ce->e_used >= MB_CACHE_WRITER) {
-				DEFINE_WAIT(wait);
-
-				while (ce->e_used >= MB_CACHE_WRITER) {
-					ce->e_queued++;
-					prepare_to_wait(&mb_cache_queue, &wait,
-							TASK_UNINTERRUPTIBLE);
-					__spin_unlock_mb_cache_entry(ce);
-					schedule();
-					__spin_lock_mb_cache_entry(ce);
-					ce->e_queued--;
-				}
-				finish_wait(&mb_cache_queue, &wait);
-			}
-			__spin_unlock_mb_cache_entry(ce);
-			if (!list_empty(&ce->e_lru_list)) {
-				spin_lock(&mb_cache_spinlock);
-				list_del_init(&ce->e_lru_list);
-				spin_unlock(&mb_cache_spinlock);
-			}
-			if (!__mb_cache_entry_is_block_hashed(ce)) {
-				__mb_cache_entry_release(ce);
-				return ERR_PTR(-EAGAIN);
-			}
-			return ce;
-		}
-		l = l->next;
-	}
-	hlist_bl_unlock(head);
-	return NULL;
-}
-
-
-/*
- * mb_cache_entry_find_first()
- *
- * Find the first cache entry on a given device with a certain key in
- * an additional index. Additional matches can be found with
- * mb_cache_entry_find_next(). Returns NULL if no match was found. The
- * returned cache entry is locked for shared access ("multiple readers").
- *
- * @cache: the cache to search
- * @bdev: the device the cache entry should belong to
- * @key: the key in the index
- */
-struct mb_cache_entry *
-mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
-			  unsigned int key)
-{
-	unsigned int bucket = hash_long(key, cache->c_bucket_bits);
-	struct hlist_bl_node *l;
-	struct mb_cache_entry *ce = NULL;
-	struct hlist_bl_head *index_hash_p;
-
-	index_hash_p = &cache->c_index_hash[bucket];
-	hlist_bl_lock(index_hash_p);
-	if (!hlist_bl_empty(index_hash_p)) {
-		l = hlist_bl_first(index_hash_p);
-		ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
-	} else
-		hlist_bl_unlock(index_hash_p);
-	return ce;
-}
-
-
-/*
- * mb_cache_entry_find_next()
- *
- * Find the next cache entry on a given device with a certain key in an
- * additional index. Returns NULL if no match could be found. The previous
- * entry is atomatically released, so that mb_cache_entry_find_next() can
- * be called like this:
- *
- * entry = mb_cache_entry_find_first();
- * while (entry) {
- * 	...
- *	entry = mb_cache_entry_find_next(entry, ...);
- * }
- *
- * @prev: The previous match
- * @bdev: the device the cache entry should belong to
- * @key: the key in the index
- */
-struct mb_cache_entry *
-mb_cache_entry_find_next(struct mb_cache_entry *prev,
-			 struct block_device *bdev, unsigned int key)
-{
-	struct mb_cache *cache = prev->e_cache;
-	unsigned int bucket = hash_long(key, cache->c_bucket_bits);
-	struct hlist_bl_node *l;
-	struct mb_cache_entry *ce;
-	struct hlist_bl_head *index_hash_p;
-
-	index_hash_p = &cache->c_index_hash[bucket];
-	mb_assert(prev->e_index_hash_p == index_hash_p);
-	hlist_bl_lock(index_hash_p);
-	mb_assert(!hlist_bl_empty(index_hash_p));
-	l = prev->e_index.o_list.next;
-	ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
-	__mb_cache_entry_release(prev);
-	return ce;
-}
-
-#endif  /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */
-
-static int __init init_mbcache(void)
-{
-	register_shrinker(&mb_cache_shrinker);
-	return 0;
-}
-
-static void __exit exit_mbcache(void)
-{
-	unregister_shrinker(&mb_cache_shrinker);
-}
-
-module_init(init_mbcache)
-module_exit(exit_mbcache)
-
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
deleted file mode 100644
index 6a392e7a723a..000000000000
--- a/include/linux/mbcache.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
-  File: linux/mbcache.h
-
-  (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
-*/
-struct mb_cache_entry {
-	struct list_head		e_lru_list;
-	struct mb_cache			*e_cache;
-	unsigned short			e_used;
-	unsigned short			e_queued;
-	atomic_t			e_refcnt;
-	struct block_device		*e_bdev;
-	sector_t			e_block;
-	struct hlist_bl_node		e_block_list;
-	struct {
-		struct hlist_bl_node	o_list;
-		unsigned int		o_key;
-	} e_index;
-	struct hlist_bl_head		*e_block_hash_p;
-	struct hlist_bl_head		*e_index_hash_p;
-};
-
-struct mb_cache {
-	struct list_head		c_cache_list;
-	const char			*c_name;
-	atomic_t			c_entry_count;
-	int				c_max_entries;
-	int				c_bucket_bits;
-	struct kmem_cache		*c_entry_cache;
-	struct hlist_bl_head		*c_block_hash;
-	struct hlist_bl_head		*c_index_hash;
-};
-
-/* Functions on caches */
-
-struct mb_cache *mb_cache_create(const char *, int);
-void mb_cache_shrink(struct block_device *);
-void mb_cache_destroy(struct mb_cache *);
-
-/* Functions on cache entries */
-
-struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *, gfp_t);
-int mb_cache_entry_insert(struct mb_cache_entry *, struct block_device *,
-			  sector_t, unsigned int);
-void mb_cache_entry_release(struct mb_cache_entry *);
-void mb_cache_entry_free(struct mb_cache_entry *);
-struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *,
-					  struct block_device *,
-					  sector_t);
-struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
-						 struct block_device *, 
-						 unsigned int);
-struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *,
-						struct block_device *, 
-						unsigned int);
-- 
cgit v1.2.3


From b6f5128459a40410f9afefddc0ad688ea5b22c28 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Sat, 20 Feb 2016 13:24:26 +0200
Subject: clk: ti: dpll: convert DPLL support code to use clk_hw instead of clk
 ptrs

Convert DPLL support code to use clk_hw pointers for reference and bypass
clocks. This allows us to use clk_hw_* APIs for accessing any required
parameters for these clocks, avoiding some locking problems at least with
DPLL enable code; this used clk_get_rate which uses mutex but isn't
good under clk_enable / clk_disable.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/ti/apll.c      | 20 ++++++++++++++++----
 drivers/clk/ti/clkt_dpll.c |  6 +++---
 drivers/clk/ti/dpll.c      | 25 +++++++++++++++++++------
 drivers/clk/ti/dpll3xxx.c  | 17 ++++++++---------
 drivers/clk/ti/dpll44xx.c  |  8 ++++----
 include/linux/clk/ti.h     |  8 ++++----
 6 files changed, 54 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/ti/apll.c b/drivers/clk/ti/apll.c
index b336a8c11e2a..6411e132faa2 100644
--- a/drivers/clk/ti/apll.c
+++ b/drivers/clk/ti/apll.c
@@ -140,11 +140,21 @@ static void __init omap_clk_register_apll(struct clk_hw *hw,
 	struct dpll_data *ad = clk_hw->dpll_data;
 	struct clk *clk;
 
-	ad->clk_ref = of_clk_get(node, 0);
-	ad->clk_bypass = of_clk_get(node, 1);
+	clk = of_clk_get(node, 0);
+	if (IS_ERR(clk)) {
+		pr_debug("clk-ref for %s not ready, retry\n",
+			 node->name);
+		if (!ti_clk_retry_init(node, hw, omap_clk_register_apll))
+			return;
+
+		goto cleanup;
+	}
 
-	if (IS_ERR(ad->clk_ref) || IS_ERR(ad->clk_bypass)) {
-		pr_debug("clk-ref or clk-bypass for %s not ready, retry\n",
+	ad->clk_ref = __clk_get_hw(clk);
+
+	clk = of_clk_get(node, 1);
+	if (IS_ERR(clk)) {
+		pr_debug("clk-bypass for %s not ready, retry\n",
 			 node->name);
 		if (!ti_clk_retry_init(node, hw, omap_clk_register_apll))
 			return;
@@ -152,6 +162,8 @@ static void __init omap_clk_register_apll(struct clk_hw *hw,
 		goto cleanup;
 	}
 
+	ad->clk_bypass = __clk_get_hw(clk);
+
 	clk = clk_register(NULL, &clk_hw->hw);
 	if (!IS_ERR(clk)) {
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
diff --git a/drivers/clk/ti/clkt_dpll.c b/drivers/clk/ti/clkt_dpll.c
index b5cc6f66ae5d..032c658a5f5e 100644
--- a/drivers/clk/ti/clkt_dpll.c
+++ b/drivers/clk/ti/clkt_dpll.c
@@ -254,7 +254,7 @@ unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk)
 	v >>= __ffs(dd->enable_mask);
 
 	if (_omap2_dpll_is_in_bypass(v))
-		return clk_get_rate(dd->clk_bypass);
+		return clk_hw_get_rate(dd->clk_bypass);
 
 	v = ti_clk_ll_ops->clk_readl(dd->mult_div1_reg);
 	dpll_mult = v & dd->mult_mask;
@@ -262,7 +262,7 @@ unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk)
 	dpll_div = v & dd->div1_mask;
 	dpll_div >>= __ffs(dd->div1_mask);
 
-	dpll_clk = (u64)clk_get_rate(dd->clk_ref) * dpll_mult;
+	dpll_clk = (u64)clk_hw_get_rate(dd->clk_ref) * dpll_mult;
 	do_div(dpll_clk, dpll_div + 1);
 
 	return dpll_clk;
@@ -301,7 +301,7 @@ long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate,
 
 	dd = clk->dpll_data;
 
-	ref_rate = clk_get_rate(dd->clk_ref);
+	ref_rate = clk_hw_get_rate(dd->clk_ref);
 	clk_name = clk_hw_get_name(hw);
 	pr_debug("clock: %s: starting DPLL round_rate, target rate %lu\n",
 		 clk_name, target_rate);
diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c
index 5519b386edc0..f8306f1c30f1 100644
--- a/drivers/clk/ti/dpll.c
+++ b/drivers/clk/ti/dpll.c
@@ -147,11 +147,22 @@ static void __init _register_dpll(struct clk_hw *hw,
 	struct dpll_data *dd = clk_hw->dpll_data;
 	struct clk *clk;
 
-	dd->clk_ref = of_clk_get(node, 0);
-	dd->clk_bypass = of_clk_get(node, 1);
+	clk = of_clk_get(node, 0);
+	if (IS_ERR(clk)) {
+		pr_debug("clk-ref missing for %s, retry later\n",
+			 node->name);
+		if (!ti_clk_retry_init(node, hw, _register_dpll))
+			return;
 
-	if (IS_ERR(dd->clk_ref) || IS_ERR(dd->clk_bypass)) {
-		pr_debug("clk-ref or clk-bypass missing for %s, retry later\n",
+		goto cleanup;
+	}
+
+	dd->clk_ref = __clk_get_hw(clk);
+
+	clk = of_clk_get(node, 1);
+
+	if (IS_ERR(clk)) {
+		pr_debug("clk-bypass missing for %s, retry later\n",
 			 node->name);
 		if (!ti_clk_retry_init(node, hw, _register_dpll))
 			return;
@@ -159,6 +170,8 @@ static void __init _register_dpll(struct clk_hw *hw,
 		goto cleanup;
 	}
 
+	dd->clk_bypass = __clk_get_hw(clk);
+
 	/* register the clock */
 	clk = clk_register(NULL, &clk_hw->hw);
 
@@ -251,8 +264,8 @@ struct clk *ti_clk_register_dpll(struct ti_clk *setup)
 	dd->recal_en_bit = dpll->recal_en_bit;
 	dd->recal_st_bit = dpll->recal_st_bit;
 
-	dd->clk_ref = clk_ref;
-	dd->clk_bypass = clk_bypass;
+	dd->clk_ref = __clk_get_hw(clk_ref);
+	dd->clk_bypass = __clk_get_hw(clk_bypass);
 
 	if (dpll->flags & CLKF_CORE)
 		ops = &omap3_dpll_core_ck_ops;
diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c
index cc739291a3ce..88f2ce81ba55 100644
--- a/drivers/clk/ti/dpll3xxx.c
+++ b/drivers/clk/ti/dpll3xxx.c
@@ -98,7 +98,7 @@ static u16 _omap3_dpll_compute_freqsel(struct clk_hw_omap *clk, u8 n)
 	unsigned long fint;
 	u16 f = 0;
 
-	fint = clk_get_rate(clk->dpll_data->clk_ref) / n;
+	fint = clk_hw_get_rate(clk->dpll_data->clk_ref) / n;
 
 	pr_debug("clock: fint is %lu\n", fint);
 
@@ -460,12 +460,11 @@ int omap3_noncore_dpll_enable(struct clk_hw *hw)
 
 	parent = clk_hw_get_parent(hw);
 
-	if (clk_hw_get_rate(hw) ==
-	    clk_hw_get_rate(__clk_get_hw(dd->clk_bypass))) {
-		WARN_ON(parent != __clk_get_hw(dd->clk_bypass));
+	if (clk_hw_get_rate(hw) == clk_hw_get_rate(dd->clk_bypass)) {
+		WARN_ON(parent != dd->clk_bypass);
 		r = _omap3_noncore_dpll_bypass(clk);
 	} else {
-		WARN_ON(parent != __clk_get_hw(dd->clk_ref));
+		WARN_ON(parent != dd->clk_ref);
 		r = _omap3_noncore_dpll_lock(clk);
 	}
 
@@ -513,13 +512,13 @@ int omap3_noncore_dpll_determine_rate(struct clk_hw *hw,
 	if (!dd)
 		return -EINVAL;
 
-	if (clk_get_rate(dd->clk_bypass) == req->rate &&
+	if (clk_hw_get_rate(dd->clk_bypass) == req->rate &&
 	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
-		req->best_parent_hw = __clk_get_hw(dd->clk_bypass);
+		req->best_parent_hw = dd->clk_bypass;
 	} else {
 		req->rate = omap2_dpll_round_rate(hw, req->rate,
 					  &req->best_parent_rate);
-		req->best_parent_hw = __clk_get_hw(dd->clk_ref);
+		req->best_parent_hw = dd->clk_ref;
 	}
 
 	req->best_parent_rate = req->rate;
@@ -577,7 +576,7 @@ int omap3_noncore_dpll_set_rate(struct clk_hw *hw, unsigned long rate,
 	if (!dd)
 		return -EINVAL;
 
-	if (clk_hw_get_parent(hw) != __clk_get_hw(dd->clk_ref))
+	if (clk_hw_get_parent(hw) != dd->clk_ref)
 		return -EINVAL;
 
 	if (dd->last_rounded_rate == 0)
diff --git a/drivers/clk/ti/dpll44xx.c b/drivers/clk/ti/dpll44xx.c
index 660d7436ac24..82c05b55a7be 100644
--- a/drivers/clk/ti/dpll44xx.c
+++ b/drivers/clk/ti/dpll44xx.c
@@ -94,7 +94,7 @@ static void omap4_dpll_lpmode_recalc(struct dpll_data *dd)
 {
 	long fint, fout;
 
-	fint = clk_get_rate(dd->clk_ref) / (dd->last_rounded_n + 1);
+	fint = clk_hw_get_rate(dd->clk_ref) / (dd->last_rounded_n + 1);
 	fout = fint * dd->last_rounded_m;
 
 	if ((fint < OMAP4_DPLL_LP_FINT_MAX) && (fout < OMAP4_DPLL_LP_FOUT_MAX))
@@ -212,13 +212,13 @@ int omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw,
 	if (!dd)
 		return -EINVAL;
 
-	if (clk_get_rate(dd->clk_bypass) == req->rate &&
+	if (clk_hw_get_rate(dd->clk_bypass) == req->rate &&
 	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
-		req->best_parent_hw = __clk_get_hw(dd->clk_bypass);
+		req->best_parent_hw = dd->clk_bypass;
 	} else {
 		req->rate = omap4_dpll_regm4xen_round_rate(hw, req->rate,
 						&req->best_parent_rate);
-		req->best_parent_hw = __clk_get_hw(dd->clk_ref);
+		req->best_parent_hw = dd->clk_ref;
 	}
 
 	req->best_parent_rate = req->rate;
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 9a638601cb09..dc5164a6df29 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -23,8 +23,8 @@
  * @mult_div1_reg: register containing the DPLL M and N bitfields
  * @mult_mask: mask of the DPLL M bitfield in @mult_div1_reg
  * @div1_mask: mask of the DPLL N bitfield in @mult_div1_reg
- * @clk_bypass: struct clk pointer to the clock's bypass clock input
- * @clk_ref: struct clk pointer to the clock's reference clock input
+ * @clk_bypass: struct clk_hw pointer to the clock's bypass clock input
+ * @clk_ref: struct clk_hw pointer to the clock's reference clock input
  * @control_reg: register containing the DPLL mode bitfield
  * @enable_mask: mask of the DPLL mode bitfield in @control_reg
  * @last_rounded_rate: cache of the last rate result of omap2_dpll_round_rate()
@@ -69,8 +69,8 @@ struct dpll_data {
 	void __iomem		*mult_div1_reg;
 	u32			mult_mask;
 	u32			div1_mask;
-	struct clk		*clk_bypass;
-	struct clk		*clk_ref;
+	struct clk_hw		*clk_bypass;
+	struct clk_hw		*clk_ref;
 	void __iomem		*control_reg;
 	u32			enable_mask;
 	unsigned long		last_rounded_rate;
-- 
cgit v1.2.3


From d7a8d5ed876970ac7f9bafbb6708500a7838c1d7 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Mon, 22 Feb 2016 16:02:33 -0700
Subject: vfio: Add capability chain helpers

Allow sub-modules to easily reallocate a buffer for managing
capability chains for info ioctls.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio.c  | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/vfio.h | 11 +++++++++++
 2 files changed, 65 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 4cc961b894af..6fd6fa5469de 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1728,6 +1728,60 @@ long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
 }
 EXPORT_SYMBOL_GPL(vfio_external_check_extension);
 
+/**
+ * Sub-module support
+ */
+/*
+ * Helper for managing a buffer of info chain capabilities, allocate or
+ * reallocate a buffer with additional @size, filling in @id and @version
+ * of the capability.  A pointer to the new capability is returned.
+ *
+ * NB. The chain is based at the head of the buffer, so new entries are
+ * added to the tail, vfio_info_cap_shift() should be called to fixup the
+ * next offsets prior to copying to the user buffer.
+ */
+struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
+					       size_t size, u16 id, u16 version)
+{
+	void *buf;
+	struct vfio_info_cap_header *header, *tmp;
+
+	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
+	if (!buf) {
+		kfree(caps->buf);
+		caps->size = 0;
+		return ERR_PTR(-ENOMEM);
+	}
+
+	caps->buf = buf;
+	header = buf + caps->size;
+
+	/* Eventually copied to user buffer, zero */
+	memset(header, 0, size);
+
+	header->id = id;
+	header->version = version;
+
+	/* Add to the end of the capability chain */
+	for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next)
+		; /* nothing */
+
+	tmp->next = caps->size;
+	caps->size += size;
+
+	return header;
+}
+EXPORT_SYMBOL_GPL(vfio_info_cap_add);
+
+void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
+{
+	struct vfio_info_cap_header *tmp;
+
+	for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next - offset)
+		tmp->next += offset;
+}
+EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
+
 /**
  * Module/class support
  */
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 610a86a892b8..0ecae0b1cd34 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -92,6 +92,17 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);
 
+/*
+ * Sub-module helpers
+ */
+struct vfio_info_cap {
+	struct vfio_info_cap_header *buf;
+	size_t size;
+};
+extern struct vfio_info_cap_header *vfio_info_cap_add(
+		struct vfio_info_cap *caps, size_t size, u16 id, u16 version);
+extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
+
 struct pci_dev;
 #ifdef CONFIG_EEH
 extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
-- 
cgit v1.2.3


From f0c8b46238db9d51ef9ea0858259958d0c601cec Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 22 Feb 2016 18:23:47 -0500
Subject: mbcache2: Use referenced bit instead of LRU

Currently we maintain perfect LRU list by moving entry to the tail of
the list when it gets used. However these operations on cache-global
list are relatively expensive.

In this patch we switch to lazy updates of LRU list. Whenever entry gets
used, we set a referenced bit in it. When reclaiming entries, we give
referenced entries another round in the LRU. Since the list is not a
real LRU anymore, rename it to just 'list'.

In my testing this logic gives about 30% boost to workloads with mostly
unique xattr blocks (e.g. xattr-bench with 10 files and 10000 unique
xattr values).

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/mbcache2.c            | 87 +++++++++++++++++++++++++++++++-----------------
 include/linux/mbcache2.h | 11 +++---
 2 files changed, 63 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mbcache2.c b/fs/mbcache2.c
index 3e3198d6b9d6..49f7a6feaa83 100644
--- a/fs/mbcache2.c
+++ b/fs/mbcache2.c
@@ -30,9 +30,9 @@ struct mb2_cache {
 	int			c_bucket_bits;
 	/* Maximum entries in cache to avoid degrading hash too much */
 	int			c_max_entries;
-	/* Protects c_lru_list, c_entry_count */
-	spinlock_t		c_lru_list_lock;
-	struct list_head	c_lru_list;
+	/* Protects c_list, c_entry_count */
+	spinlock_t		c_list_lock;
+	struct list_head	c_list;
 	/* Number of entries in cache */
 	unsigned long		c_entry_count;
 	struct shrinker		c_shrink;
@@ -45,6 +45,29 @@ static struct kmem_cache *mb2_entry_cache;
 static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
 				      unsigned int nr_to_scan);
 
+static inline bool mb2_cache_entry_referenced(struct mb2_cache_entry *entry)
+{
+	return entry->_e_hash_list_head & 1;
+}
+
+static inline void mb2_cache_entry_set_referenced(struct mb2_cache_entry *entry)
+{
+	entry->_e_hash_list_head |= 1;
+}
+
+static inline void mb2_cache_entry_clear_referenced(
+					struct mb2_cache_entry *entry)
+{
+	entry->_e_hash_list_head &= ~1;
+}
+
+static inline struct hlist_bl_head *mb2_cache_entry_head(
+					struct mb2_cache_entry *entry)
+{
+	return (struct hlist_bl_head *)
+			(entry->_e_hash_list_head & ~1);
+}
+
 /*
  * Number of entries to reclaim synchronously when there are too many entries
  * in cache
@@ -80,13 +103,13 @@ int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
 	if (!entry)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&entry->e_lru_list);
+	INIT_LIST_HEAD(&entry->e_list);
 	/* One ref for hash, one ref returned */
 	atomic_set(&entry->e_refcnt, 1);
 	entry->e_key = key;
 	entry->e_block = block;
 	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
-	entry->e_hash_list_head = head;
+	entry->_e_hash_list_head = (unsigned long)head;
 	hlist_bl_lock(head);
 	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
 		if (dup->e_key == key && dup->e_block == block) {
@@ -98,12 +121,12 @@ int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
 	hlist_bl_add_head(&entry->e_hash_list, head);
 	hlist_bl_unlock(head);
 
-	spin_lock(&cache->c_lru_list_lock);
-	list_add_tail(&entry->e_lru_list, &cache->c_lru_list);
+	spin_lock(&cache->c_list_lock);
+	list_add_tail(&entry->e_list, &cache->c_list);
 	/* Grab ref for LRU list */
 	atomic_inc(&entry->e_refcnt);
 	cache->c_entry_count++;
-	spin_unlock(&cache->c_lru_list_lock);
+	spin_unlock(&cache->c_list_lock);
 
 	return 0;
 }
@@ -124,7 +147,7 @@ static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache,
 	struct hlist_bl_head *head;
 
 	if (entry)
-		head = entry->e_hash_list_head;
+		head = mb2_cache_entry_head(entry);
 	else
 		head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
 	hlist_bl_lock(head);
@@ -203,13 +226,13 @@ void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
 			/* We keep hash list reference to keep entry alive */
 			hlist_bl_del_init(&entry->e_hash_list);
 			hlist_bl_unlock(head);
-			spin_lock(&cache->c_lru_list_lock);
-			if (!list_empty(&entry->e_lru_list)) {
-				list_del_init(&entry->e_lru_list);
+			spin_lock(&cache->c_list_lock);
+			if (!list_empty(&entry->e_list)) {
+				list_del_init(&entry->e_list);
 				cache->c_entry_count--;
 				atomic_dec(&entry->e_refcnt);
 			}
-			spin_unlock(&cache->c_lru_list_lock);
+			spin_unlock(&cache->c_list_lock);
 			mb2_cache_entry_put(cache, entry);
 			return;
 		}
@@ -222,15 +245,12 @@ EXPORT_SYMBOL(mb2_cache_entry_delete_block);
  * @cache - cache the entry belongs to
  * @entry - entry that got used
  *
- * Move entry in lru list to reflect the fact that it was used.
+ * Marks entry as used to give hit higher chances of surviving in cache.
  */
 void mb2_cache_entry_touch(struct mb2_cache *cache,
 			   struct mb2_cache_entry *entry)
 {
-	spin_lock(&cache->c_lru_list_lock);
-	if (!list_empty(&entry->e_lru_list))
-		list_move_tail(&cache->c_lru_list, &entry->e_lru_list);
-	spin_unlock(&cache->c_lru_list_lock);
+	mb2_cache_entry_set_referenced(entry);
 }
 EXPORT_SYMBOL(mb2_cache_entry_touch);
 
@@ -251,18 +271,23 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
 	struct hlist_bl_head *head;
 	unsigned int shrunk = 0;
 
-	spin_lock(&cache->c_lru_list_lock);
-	while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) {
-		entry = list_first_entry(&cache->c_lru_list,
-					 struct mb2_cache_entry, e_lru_list);
-		list_del_init(&entry->e_lru_list);
+	spin_lock(&cache->c_list_lock);
+	while (nr_to_scan-- && !list_empty(&cache->c_list)) {
+		entry = list_first_entry(&cache->c_list,
+					 struct mb2_cache_entry, e_list);
+		if (mb2_cache_entry_referenced(entry)) {
+			mb2_cache_entry_clear_referenced(entry);
+			list_move_tail(&cache->c_list, &entry->e_list);
+			continue;
+		}
+		list_del_init(&entry->e_list);
 		cache->c_entry_count--;
 		/*
 		 * We keep LRU list reference so that entry doesn't go away
 		 * from under us.
 		 */
-		spin_unlock(&cache->c_lru_list_lock);
-		head = entry->e_hash_list_head;
+		spin_unlock(&cache->c_list_lock);
+		head = mb2_cache_entry_head(entry);
 		hlist_bl_lock(head);
 		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
 			hlist_bl_del_init(&entry->e_hash_list);
@@ -272,9 +297,9 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
 		if (mb2_cache_entry_put(cache, entry))
 			shrunk++;
 		cond_resched();
-		spin_lock(&cache->c_lru_list_lock);
+		spin_lock(&cache->c_list_lock);
 	}
-	spin_unlock(&cache->c_lru_list_lock);
+	spin_unlock(&cache->c_list_lock);
 
 	return shrunk;
 }
@@ -318,8 +343,8 @@ struct mb2_cache *mb2_cache_create(int bucket_bits)
 		goto err_out;
 	cache->c_bucket_bits = bucket_bits;
 	cache->c_max_entries = bucket_count << 4;
-	INIT_LIST_HEAD(&cache->c_lru_list);
-	spin_lock_init(&cache->c_lru_list_lock);
+	INIT_LIST_HEAD(&cache->c_list);
+	spin_lock_init(&cache->c_list_lock);
 	cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
 				GFP_KERNEL);
 	if (!cache->c_hash) {
@@ -361,13 +386,13 @@ void mb2_cache_destroy(struct mb2_cache *cache)
 	 * We don't bother with any locking. Cache must not be used at this
 	 * point.
 	 */
-	list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) {
+	list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
 		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
 			hlist_bl_del_init(&entry->e_hash_list);
 			atomic_dec(&entry->e_refcnt);
 		} else
 			WARN_ON(1);
-		list_del(&entry->e_lru_list);
+		list_del(&entry->e_list);
 		WARN_ON(atomic_read(&entry->e_refcnt) != 1);
 		mb2_cache_entry_put(cache, entry);
 	}
diff --git a/include/linux/mbcache2.h b/include/linux/mbcache2.h
index b6f160ff2533..c934843a6a31 100644
--- a/include/linux/mbcache2.h
+++ b/include/linux/mbcache2.h
@@ -10,8 +10,8 @@
 struct mb2_cache;
 
 struct mb2_cache_entry {
-	/* LRU list - protected by cache->c_lru_list_lock */
-	struct list_head	e_lru_list;
+	/* List of entries in cache - protected by cache->c_list_lock */
+	struct list_head	e_list;
 	/* Hash table list - protected by bitlock in e_hash_list_head */
 	struct hlist_bl_node	e_hash_list;
 	atomic_t		e_refcnt;
@@ -19,8 +19,11 @@ struct mb2_cache_entry {
 	u32			e_key;
 	/* Block number of hashed block - stable during lifetime of the entry */
 	sector_t		e_block;
-	/* Head of hash list (for list bit lock) - stable */
-	struct hlist_bl_head	*e_hash_list_head;
+	/*
+	 * Head of hash list (for list bit lock) - stable. Combined with
+	 * referenced bit of entry
+	 */
+	unsigned long		_e_hash_list_head;
 };
 
 struct mb2_cache *mb2_cache_create(int bucket_bits);
-- 
cgit v1.2.3


From 8f1dbbbbdfe9bada7e2f8041e07c6373f787c043 Mon Sep 17 00:00:00 2001
From: Shuoran Liu <liushuoran@huawei.com>
Date: Wed, 27 Jan 2016 09:57:30 +0800
Subject: f2fs: introduce lifetime write IO statistics

This patch introduces lifetime IO write statistics exposed to the sysfs interface.
The write IO amount is obtained from block layer, accumulated in the file system and
stored in the hot node summary of checkpoint.

Signed-off-by: Shuoran Liu <liushuoran@huawei.com>
Signed-off-by: Pengyang Hou <houpengyang@huawei.com>
[Jaegeuk Kim: add sysfs documentation]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  6 +++++
 fs/f2fs/checkpoint.c                    | 11 ++++++++++
 fs/f2fs/f2fs.h                          | 11 ++++++++++
 fs/f2fs/super.c                         | 39 +++++++++++++++++++++++++++++++--
 include/linux/f2fs_fs.h                 | 14 +++++++++++-
 5 files changed, 78 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 523cb9d4e272..a809f6005f14 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -106,3 +106,9 @@ Description:
 		 Controls dirty nat entries ratio threshold, if current
 		 ratio exceeds configured threshold, checkpoint will
 		 be triggered for flushing dirty nat entries.
+
+What:		/sys/fs/f2fs/<disk>/lifetime_write_kbytes
+Date:		January 2016
+Contact:	"Shuoran Liu" <liushuoran@huawei.com>
+Description:
+		 Shows total written kbytes issued to disk.
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 4b6f9c08f6fa..112e19fdbe08 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -921,6 +921,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	int cp_payload_blks = __cp_payload(sbi);
 	block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg);
 	bool invalidate = false;
+	struct super_block *sb = sbi->sb;
+	struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
+	u64 kbytes_written;
 
 	/*
 	 * This avoids to conduct wrong roll-forward operations and uses
@@ -1034,6 +1037,14 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	write_data_summaries(sbi, start_blk);
 	start_blk += data_sum_blocks;
+
+	/* Record write statistics in the hot node summary */
+	kbytes_written = sbi->kbytes_written;
+	if (sb->s_bdev->bd_part)
+		kbytes_written += BD_PART_WRITTEN(sbi);
+
+	seg_i->sum_blk->info.kbytes_written = cpu_to_le64(kbytes_written);
+
 	if (__remain_node_summaries(cpc->reason)) {
 		write_node_summaries(sbi, start_blk);
 		start_blk += NR_CURSEG_NODE_TYPE;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4e7eab9f3f5a..5b6fd541551c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -846,8 +846,19 @@ struct f2fs_sb_info {
 	struct list_head s_list;
 	struct mutex umount_mutex;
 	unsigned int shrinker_run_no;
+
+	/* For write statistics */
+	u64 sectors_written_start;
+	u64 kbytes_written;
 };
 
+/* For write statistics. Suppose sector size is 512 bytes,
+ * and the return value is in kbytes. s is of struct f2fs_sb_info.
+ */
+#define BD_PART_WRITTEN(s)						 \
+(((u64)part_stat_read(s->sb->s_bdev->bd_part, sectors[1]) -		 \
+		s->sectors_written_start) >> 1)
+
 static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
 {
 	sbi->last_time[type] = jiffies;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 0ea5bcccab6d..58d9a5574bb0 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -126,6 +126,19 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
 	return NULL;
 }
 
+static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
+		struct f2fs_sb_info *sbi, char *buf)
+{
+	struct super_block *sb = sbi->sb;
+
+	if (!sb->s_bdev->bd_part)
+		return snprintf(buf, PAGE_SIZE, "0\n");
+
+	return snprintf(buf, PAGE_SIZE, "%llu\n",
+		(unsigned long long)(sbi->kbytes_written +
+			BD_PART_WRITTEN(sbi)));
+}
+
 static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
 			struct f2fs_sb_info *sbi, char *buf)
 {
@@ -204,6 +217,9 @@ static struct f2fs_attr f2fs_attr_##_name = {			\
 		f2fs_sbi_show, f2fs_sbi_store,			\
 		offsetof(struct struct_name, elname))
 
+#define F2FS_GENERAL_RO_ATTR(name) \
+static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
+
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
@@ -221,6 +237,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
+F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
@@ -241,6 +258,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(dirty_nats_ratio),
 	ATTR_LIST(cp_interval),
 	ATTR_LIST(idle_interval),
+	ATTR_LIST(lifetime_write_kbytes),
 	NULL,
 };
 
@@ -768,8 +786,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	bool need_stop_gc = false;
 	bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
 
-	sync_filesystem(sb);
-
 	/*
 	 * Save the old mount options in case we
 	 * need to restore them.
@@ -777,6 +793,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	org_mount_opt = sbi->mount_opt;
 	active_logs = sbi->active_logs;
 
+	if (*flags & MS_RDONLY) {
+		set_opt(sbi, FASTBOOT);
+		set_sbi_flag(sbi, SBI_IS_DIRTY);
+	}
+
+	sync_filesystem(sb);
+
 	sbi->mount_opt.opt = 0;
 	default_options(sbi);
 
@@ -1244,6 +1267,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	bool retry = true, need_fsck = false;
 	char *options = NULL;
 	int recovery, i, valid_super_block;
+	struct curseg_info *seg_i;
 
 try_onemore:
 	err = -EINVAL;
@@ -1374,6 +1398,17 @@ try_onemore:
 		goto free_nm;
 	}
 
+	/* For write statistics */
+	if (sb->s_bdev->bd_part)
+		sbi->sectors_written_start =
+			(u64)part_stat_read(sb->s_bdev->bd_part, sectors[1]);
+
+	/* Read accumulated write IO statistics if exists */
+	seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
+	if (__exist_node_summaries(sbi))
+		sbi->kbytes_written =
+			le64_to_cpu(seg_i->sum_blk->info.kbytes_written);
+
 	build_gc_manager(sbi);
 
 	/* get an inode for node space */
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index e59c3be92106..67aa01df777d 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -358,6 +358,12 @@ struct summary_footer {
 				sizeof(struct sit_journal_entry))
 #define SIT_JOURNAL_RESERVED	((SUM_JOURNAL_SIZE - 2) %\
 				sizeof(struct sit_journal_entry))
+
+/* Reserved area should make size of f2fs_extra_info equals to
+ * that of nat_journal and sit_journal.
+ */
+#define EXTRA_INFO_RESERVED	(SUM_JOURNAL_SIZE - 2 - 8)
+
 /*
  * frequently updated NAT/SIT entries can be stored in the spare area in
  * summary blocks
@@ -387,6 +393,11 @@ struct sit_journal {
 	__u8 reserved[SIT_JOURNAL_RESERVED];
 } __packed;
 
+struct f2fs_extra_info {
+	__le64 kbytes_written;
+	__u8 reserved[EXTRA_INFO_RESERVED];
+} __packed;
+
 /* 4KB-sized summary block structure */
 struct f2fs_summary_block {
 	struct f2fs_summary entries[ENTRIES_IN_SUM];
@@ -394,10 +405,11 @@ struct f2fs_summary_block {
 		__le16 n_nats;
 		__le16 n_sits;
 	};
-	/* spare area is used by NAT or SIT journals */
+	/* spare area is used by NAT or SIT journals or extra info */
 	union {
 		struct nat_journal nat_j;
 		struct sit_journal sit_j;
+		struct f2fs_extra_info info;
 	};
 	struct summary_footer footer;
 } __packed;
-- 
cgit v1.2.3


From 81ca7350ce5ed438547ea769b0c33cb0abbd74ba Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Tue, 26 Jan 2016 15:39:35 +0800
Subject: f2fs: remove unneeded pointer conversion

There are redundant pointer conversion in following call stack:
 - at position a, inode was been converted to f2fs_file_info.
 - at position b, f2fs_file_info was been converted to inode again.

 - truncate_blocks(inode,..)
  - fi = F2FS_I(inode)		---a
  - ADDRS_PER_PAGE(node_page, fi)
   - addrs_per_inode(fi)
    - inode = &fi->vfs_inode	---b
    - f2fs_has_inline_xattr(inode)
     - fi = F2FS_I(inode)
     - is_inode_flag_set(fi,..)

In order to avoid unneeded conversion, alter ADDRS_PER_PAGE and
addrs_per_inode to acept parameter with type of inode pointer.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c          |  5 ++---
 fs/f2fs/extent_cache.c  |  5 ++---
 fs/f2fs/f2fs.h          | 14 +++++++-------
 fs/f2fs/file.c          | 11 +++++------
 fs/f2fs/gc.c            |  8 ++++----
 fs/f2fs/node.c          |  8 ++++----
 fs/f2fs/recovery.c      | 10 ++++------
 include/linux/f2fs_fs.h |  6 +++---
 8 files changed, 31 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f89ef4e37510..bb60e6afbb72 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -497,7 +497,6 @@ got_it:
 static int __allocate_data_block(struct dnode_of_data *dn)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
-	struct f2fs_inode_info *fi = F2FS_I(dn->inode);
 	struct f2fs_summary sum;
 	struct node_info ni;
 	int seg = CURSEG_WARM_DATA;
@@ -525,7 +524,7 @@ alloc:
 	set_data_blkaddr(dn);
 
 	/* update i_size */
-	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
+	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
 							dn->ofs_in_node;
 	if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
 		i_size_write(dn->inode,
@@ -592,7 +591,7 @@ next_dnode:
 		goto unlock_out;
 	}
 
-	end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
 
 next_block:
 	blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 365cf950891b..071a1b19e5af 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -673,7 +673,6 @@ bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
 
 void f2fs_update_extent_cache(struct dnode_of_data *dn)
 {
-	struct f2fs_inode_info *fi = F2FS_I(dn->inode);
 	pgoff_t fofs;
 
 	if (!f2fs_may_extent_tree(dn->inode))
@@ -682,8 +681,8 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn)
 	f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
 
 
-	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
-							dn->ofs_in_node;
+	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
+								dn->ofs_in_node;
 
 	if (f2fs_update_extent_tree_range(dn->inode, fofs, dn->data_blkaddr, 1))
 		sync_inode_page(dn);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5b6fd541551c..00bb83fc35e6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1538,9 +1538,9 @@ static inline int f2fs_has_inline_xattr(struct inode *inode)
 	return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);
 }
 
-static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
+static inline unsigned int addrs_per_inode(struct inode *inode)
 {
-	if (f2fs_has_inline_xattr(&fi->vfs_inode))
+	if (f2fs_has_inline_xattr(inode))
 		return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
 	return DEF_ADDRS_PER_INODE;
 }
@@ -1694,10 +1694,10 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
 	 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
 
 /* get offset of first page in next direct node */
-#define PGOFS_OF_NEXT_DNODE(pgofs, fi)				\
-	((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) :	\
-	(pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) /	\
-	ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi))
+#define PGOFS_OF_NEXT_DNODE(pgofs, inode)				\
+	((pgofs < ADDRS_PER_INODE(inode)) ? ADDRS_PER_INODE(inode) :	\
+	(pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) /	\
+	ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode))
 
 /*
  * file.c
@@ -1916,7 +1916,7 @@ int f2fs_release_page(struct page *, gfp_t);
  */
 int start_gc_thread(struct f2fs_sb_info *);
 void stop_gc_thread(struct f2fs_sb_info *);
-block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *);
+block_t start_bidx_of_node(unsigned int, struct inode *);
 int f2fs_gc(struct f2fs_sb_info *, bool);
 void build_gc_manager(struct f2fs_sb_info *);
 
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f9b848a3700f..e52af2d08864 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -358,15 +358,14 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
 		} else if (err == -ENOENT) {
 			/* direct node does not exists */
 			if (whence == SEEK_DATA) {
-				pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
-							F2FS_I(inode));
+				pgofs = PGOFS_OF_NEXT_DNODE(pgofs, inode);
 				continue;
 			} else {
 				goto found;
 			}
 		}
 
-		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+		end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
 
 		/* find data/hole in dnode block */
 		for (; dn.ofs_in_node < end_offset;
@@ -480,7 +479,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
 		 * we will invalidate all blkaddr in the whole range.
 		 */
 		fofs = start_bidx_of_node(ofs_of_node(dn->node_page),
-						F2FS_I(dn->inode)) + ofs;
+							dn->inode) + ofs;
 		f2fs_update_extent_cache_range(dn, fofs, 0, len);
 		dec_valid_block_count(sbi, dn->inode, nr_free);
 		sync_inode_page(dn);
@@ -568,7 +567,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
 		goto out;
 	}
 
-	count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+	count = ADDRS_PER_PAGE(dn.node_page, inode);
 
 	count -= dn.ofs_in_node;
 	f2fs_bug_on(sbi, count < 0);
@@ -768,7 +767,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
 			return err;
 		}
 
-		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+		end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
 		count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
 
 		f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 645273c8b341..47ade3542fbd 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -486,7 +486,7 @@ next_step:
  * as indirect or double indirect node blocks, are given, it must be a caller's
  * bug.
  */
-block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
+block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
 {
 	unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
 	unsigned int bidx;
@@ -503,7 +503,7 @@ block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
 		int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
 		bidx = node_ofs - 5 - dec;
 	}
-	return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi);
+	return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode);
 }
 
 static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -722,7 +722,7 @@ next_step:
 				continue;
 			}
 
-			start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
+			start_bidx = start_bidx_of_node(nofs, inode);
 			data_page = get_read_data_page(inode,
 					start_bidx + ofs_in_node, READA, true);
 			if (IS_ERR(data_page)) {
@@ -738,7 +738,7 @@ next_step:
 		/* phase 3 */
 		inode = find_gc_inode(gc_list, dni.ino);
 		if (inode) {
-			start_bidx = start_bidx_of_node(nofs, F2FS_I(inode))
+			start_bidx = start_bidx_of_node(nofs, inode)
 								+ ofs_in_node;
 			if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
 				move_encrypted_block(inode, start_bidx);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index a004bc0432b1..5e381b2772f2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -407,10 +407,10 @@ cache:
  * The maximum depth is four.
  * Offset[0] will have raw inode offset.
  */
-static int get_node_path(struct f2fs_inode_info *fi, long block,
+static int get_node_path(struct inode *inode, long block,
 				int offset[4], unsigned int noffset[4])
 {
-	const long direct_index = ADDRS_PER_INODE(fi);
+	const long direct_index = ADDRS_PER_INODE(inode);
 	const long direct_blks = ADDRS_PER_BLOCK;
 	const long dptrs_per_blk = NIDS_PER_BLOCK;
 	const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
@@ -498,7 +498,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
 	int level, i;
 	int err = 0;
 
-	level = get_node_path(F2FS_I(dn->inode), index, offset, noffset);
+	level = get_node_path(dn->inode, index, offset, noffset);
 
 	nids[0] = dn->inode->i_ino;
 	npage[0] = dn->inode_page;
@@ -792,7 +792,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
 
 	trace_f2fs_truncate_inode_blocks_enter(inode, from);
 
-	level = get_node_path(F2FS_I(inode), from, offset, noffset);
+	level = get_node_path(inode, from, offset, noffset);
 restart:
 	page = get_node_page(sbi, inode->i_ino);
 	if (IS_ERR(page)) {
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 5a8fd4a99b11..5045dd6a27e9 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -350,8 +350,7 @@ got_it:
 		inode = dn->inode;
 	}
 
-	bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
-			le16_to_cpu(sum.ofs_in_node);
+	bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
 
 	/*
 	 * if inode page is locked, unlock temporarily, but its reference
@@ -386,10 +385,9 @@ truncate_out:
 static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 					struct page *page, block_t blkaddr)
 {
-	struct f2fs_inode_info *fi = F2FS_I(inode);
-	unsigned int start, end;
 	struct dnode_of_data dn;
 	struct node_info ni;
+	unsigned int start, end;
 	int err = 0, recovered = 0;
 
 	/* step 1: recover xattr */
@@ -409,8 +407,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 		goto out;
 
 	/* step 3: recover data indices */
-	start = start_bidx_of_node(ofs_of_node(page), fi);
-	end = start + ADDRS_PER_PAGE(page, fi);
+	start = start_bidx_of_node(ofs_of_node(page), inode);
+	end = start + ADDRS_PER_PAGE(page, inode);
 
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
 
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 67aa01df777d..44ae822e154f 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -170,12 +170,12 @@ struct f2fs_extent {
 #define F2FS_INLINE_XATTR_ADDRS	50	/* 200 bytes for inline xattrs */
 #define DEF_ADDRS_PER_INODE	923	/* Address Pointers in an Inode */
 #define DEF_NIDS_PER_INODE	5	/* Node IDs in an Inode */
-#define ADDRS_PER_INODE(fi)	addrs_per_inode(fi)
+#define ADDRS_PER_INODE(inode)	addrs_per_inode(inode)
 #define ADDRS_PER_BLOCK		1018	/* Address Pointers in a Direct Block */
 #define NIDS_PER_BLOCK		1018	/* Node IDs in an Indirect Block */
 
-#define ADDRS_PER_PAGE(page, fi)	\
-	(IS_INODE(page) ? ADDRS_PER_INODE(fi) : ADDRS_PER_BLOCK)
+#define ADDRS_PER_PAGE(page, inode)	\
+	(IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK)
 
 #define	NODE_DIR1_BLOCK		(DEF_ADDRS_PER_INODE + 1)
 #define	NODE_DIR2_BLOCK		(DEF_ADDRS_PER_INODE + 2)
-- 
cgit v1.2.3


From 479c8bc40c51535bf496440aa1d6af26e4ff9362 Mon Sep 17 00:00:00 2001
From: Sheng Yong <shengyong1@huawei.com>
Date: Thu, 28 Jan 2016 11:40:26 +0000
Subject: f2fs: fix endianness of on-disk summary_footer

Signed-off-by: Sheng Yong <shengyong1@huawei.com>
Reviewed-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 44ae822e154f..ac8040278f69 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -345,7 +345,7 @@ struct f2fs_summary {
 
 struct summary_footer {
 	unsigned char entry_type;	/* SUM_TYPE_XXX */
-	__u32 check_sum;		/* summary checksum */
+	__le32 check_sum;		/* summary checksum */
 } __packed;
 
 #define SUM_JOURNAL_SIZE	(F2FS_BLKSIZE - SUM_FOOTER_SIZE -\
-- 
cgit v1.2.3


From 24b8491251cde66879e74092167cc0f27a1f11ce Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 3 Feb 2016 13:49:44 -0800
Subject: f2fs: preallocate blocks for buffered aio writes

This patch preallocates data blocks for buffered aio writes.
With this patch, we can avoid redundant locking and unlocking of node pages
given consecutive aio request.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c          | 37 ++++++++++++++++++++++++++++++-------
 fs/f2fs/f2fs.h          |  1 +
 include/linux/f2fs_fs.h |  2 +-
 3 files changed, 32 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index e7815ace6053..03f948e84115 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -571,16 +571,25 @@ ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
 	ssize_t ret = 0;
 
 	map.m_lblk = F2FS_BYTES_TO_BLK(iocb->ki_pos);
-	map.m_len = F2FS_BYTES_TO_BLK(iov_iter_count(from));
+	map.m_len = F2FS_BLK_ALIGN(iov_iter_count(from));
 	map.m_next_pgofs = NULL;
 
-	if (iocb->ki_flags & IOCB_DIRECT &&
-		!(f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))) {
+	if (f2fs_encrypted_inode(inode))
+		return 0;
+
+	if (iocb->ki_flags & IOCB_DIRECT) {
+		ret = f2fs_convert_inline_inode(inode);
+		if (ret)
+			return ret;
+		return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+	}
+	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
 		ret = f2fs_convert_inline_inode(inode);
 		if (ret)
 			return ret;
-		ret = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
 	}
+	if (!f2fs_has_inline_data(inode))
+		return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
 	return ret;
 }
 
@@ -612,7 +621,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 	/* it only supports block size == page size */
 	pgofs =	(pgoff_t)map->m_lblk;
 
-	if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
+	if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
 		map->m_pblk = ei.blk + pgofs - ei.fofs;
 		map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
 		map->m_flags = F2FS_MAP_MAPPED;
@@ -647,7 +656,12 @@ next_block:
 				err = -EIO;
 				goto sync_out;
 			}
-			err = __allocate_data_block(&dn);
+			if (flag == F2FS_GET_BLOCK_PRE_AIO) {
+				if (blkaddr == NULL_ADDR)
+					err = reserve_new_block(&dn);
+			} else {
+				err = __allocate_data_block(&dn);
+			}
 			if (err)
 				goto sync_out;
 			allocated = true;
@@ -679,7 +693,8 @@ next_block:
 	} else if ((map->m_pblk != NEW_ADDR &&
 			blkaddr == (map->m_pblk + ofs)) ||
 			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
-			flag == F2FS_GET_BLOCK_PRE_DIO) {
+			flag == F2FS_GET_BLOCK_PRE_DIO ||
+			flag == F2FS_GET_BLOCK_PRE_AIO) {
 		ofs++;
 		map->m_len++;
 	} else {
@@ -1418,6 +1433,14 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
 	struct extent_info ei;
 	int err = 0;
 
+	/*
+	 * we already allocated all the blocks, so we don't need to get
+	 * the block addresses when there is no need to fill the page.
+	 */
+	if (!f2fs_has_inline_data(inode) && !f2fs_encrypted_inode(inode) &&
+					len == PAGE_CACHE_SIZE)
+		return 0;
+
 	if (f2fs_has_inline_data(inode) ||
 			(pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
 		f2fs_lock_op(sbi);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 445179152c3e..f6a841b85d40 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -392,6 +392,7 @@ struct f2fs_map_blocks {
 #define F2FS_GET_BLOCK_FIEMAP		2
 #define F2FS_GET_BLOCK_BMAP		3
 #define F2FS_GET_BLOCK_PRE_DIO		4
+#define F2FS_GET_BLOCK_PRE_AIO		5
 
 /*
  * i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index ac8040278f69..f43e6a01a023 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -21,7 +21,7 @@
 #define F2FS_BLKSIZE			4096	/* support only 4KB block */
 #define F2FS_BLKSIZE_BITS		12	/* bits for F2FS_BLKSIZE */
 #define F2FS_MAX_EXTENSION		64	/* # of extension entries */
-#define F2FS_BLK_ALIGN(x)	(((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
+#define F2FS_BLK_ALIGN(x)	(((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS)
 
 #define NULL_ADDR		((block_t)0)	/* used as block_t addresses */
 #define NEW_ADDR		((block_t)-1)	/* used as block_t addresses */
-- 
cgit v1.2.3


From 5eb385cc5ae1b31fbcdd727854a00c5a083f6b9b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 22 Feb 2016 22:25:46 -0500
Subject: Revert "cgroup: add cgroup_subsys->css_e_css_changed()"

This reverts commit 56c807ba4e91f0980567b6a69de239677879b17f.

cgroup_subsys->css_e_css_changed() was supposed to be used by cgroup
writeback support; however, the change to per-inode cgroup association
made it unnecessary and the callback doesn't have any user.  Remove
it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/cgroup-defs.h |  1 -
 kernel/cgroup.c             | 18 ------------------
 2 files changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 789471dba6fb..4f3c0dac26b5 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -434,7 +434,6 @@ struct cgroup_subsys {
 	void (*css_released)(struct cgroup_subsys_state *css);
 	void (*css_free)(struct cgroup_subsys_state *css);
 	void (*css_reset)(struct cgroup_subsys_state *css);
-	void (*css_e_css_changed)(struct cgroup_subsys_state *css);
 
 	int (*can_attach)(struct cgroup_taskset *tset);
 	void (*cancel_attach)(struct cgroup_taskset *tset);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 68b032df77f5..7727b6e43e10 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3127,24 +3127,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 		}
 	}
 
-	/*
-	 * The effective csses of all the descendants (excluding @cgrp) may
-	 * have changed.  Subsystems can optionally subscribe to this event
-	 * by implementing ->css_e_css_changed() which is invoked if any of
-	 * the effective csses seen from the css's cgroup may have changed.
-	 */
-	for_each_subsys(ss, ssid) {
-		struct cgroup_subsys_state *this_css = cgroup_css(cgrp, ss);
-		struct cgroup_subsys_state *css;
-
-		if (!ss->css_e_css_changed || !this_css)
-			continue;
-
-		css_for_each_descendant_pre(css, this_css)
-			if (css != this_css)
-				ss->css_e_css_changed(css);
-	}
-
 	kernfs_activate(cgrp->kn);
 	ret = 0;
 out_unlock:
-- 
cgit v1.2.3


From 8699b7762a623c46ced891b3cf490058b56cf99c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 22 Feb 2016 22:25:46 -0500
Subject: cgroup: s/child_subsys_mask/subtree_ss_mask/

For consistency with cgroup->subtree_control.

* cgroup->child_subsys_mask -> cgroup->subtree_ss_mask
* cgroup_calc_child_subsys_mask() -> cgroup_calc_subtree_ss_mask()
* cgroup_refresh_child_subsys_mask() -> cgroup_refresh_subtree_ss_mask()

No functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/cgroup-defs.h | 11 +++++------
 kernel/cgroup.c             | 48 ++++++++++++++++++++++-----------------------
 2 files changed, 29 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 4f3c0dac26b5..c68ae7f0fb5f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -253,13 +253,12 @@ struct cgroup {
 	/*
 	 * The bitmask of subsystems enabled on the child cgroups.
 	 * ->subtree_control is the one configured through
-	 * "cgroup.subtree_control" while ->child_subsys_mask is the
-	 * effective one which may have more subsystems enabled.
-	 * Controller knobs are made available iff it's enabled in
-	 * ->subtree_control.
+	 * "cgroup.subtree_control" while ->child_ss_mask is the effective
+	 * one which may have more subsystems enabled.  Controller knobs
+	 * are made available iff it's enabled in ->subtree_control.
 	 */
-	unsigned int subtree_control;
-	unsigned int child_subsys_mask;
+	unsigned long subtree_control;
+	unsigned long subtree_ss_mask;
 
 	/* Private pointers for each registered subsystem */
 	struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7727b6e43e10..f3cd67bfe6c0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -391,10 +391,10 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
 
 	/*
 	 * This function is used while updating css associations and thus
-	 * can't test the csses directly.  Use ->child_subsys_mask.
+	 * can't test the csses directly.  Use ->subtree_ss_mask.
 	 */
 	while (cgroup_parent(cgrp) &&
-	       !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id)))
+	       !(cgroup_parent(cgrp)->subtree_ss_mask & (1 << ss->id)))
 		cgrp = cgroup_parent(cgrp);
 
 	return cgroup_css(cgrp, ss);
@@ -1256,7 +1256,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
 }
 
 /**
- * cgroup_calc_child_subsys_mask - calculate child_subsys_mask
+ * cgroup_calc_subtree_ss_mask - calculate subtree_ss_mask
  * @cgrp: the target cgroup
  * @subtree_control: the new subtree_control mask to consider
  *
@@ -1268,8 +1268,8 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
  * @subtree_control is to be applied to @cgrp.  The returned mask is always
  * a superset of @subtree_control and follows the usual hierarchy rules.
  */
-static unsigned long cgroup_calc_child_subsys_mask(struct cgroup *cgrp,
-						  unsigned long subtree_control)
+static unsigned long cgroup_calc_subtree_ss_mask(struct cgroup *cgrp,
+						 unsigned long subtree_control)
 {
 	struct cgroup *parent = cgroup_parent(cgrp);
 	unsigned long cur_ss_mask = subtree_control;
@@ -1293,7 +1293,7 @@ static unsigned long cgroup_calc_child_subsys_mask(struct cgroup *cgrp,
 		 * to non-default hierarchies.
 		 */
 		if (parent)
-			new_ss_mask &= parent->child_subsys_mask;
+			new_ss_mask &= parent->subtree_ss_mask;
 		else
 			new_ss_mask &= cgrp->root->subsys_mask;
 
@@ -1306,16 +1306,16 @@ static unsigned long cgroup_calc_child_subsys_mask(struct cgroup *cgrp,
 }
 
 /**
- * cgroup_refresh_child_subsys_mask - update child_subsys_mask
+ * cgroup_refresh_subtree_ss_mask - update subtree_ss_mask
  * @cgrp: the target cgroup
  *
- * Update @cgrp->child_subsys_mask according to the current
- * @cgrp->subtree_control using cgroup_calc_child_subsys_mask().
+ * Update @cgrp->subtree_ss_mask according to the current
+ * @cgrp->subtree_control using cgroup_calc_subtree_ss_mask().
  */
-static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
+static void cgroup_refresh_subtree_ss_mask(struct cgroup *cgrp)
 {
-	cgrp->child_subsys_mask =
-		cgroup_calc_child_subsys_mask(cgrp, cgrp->subtree_control);
+	cgrp->subtree_ss_mask =
+		cgroup_calc_subtree_ss_mask(cgrp, cgrp->subtree_control);
 }
 
 /**
@@ -1542,7 +1542,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 
 		src_root->subsys_mask &= ~(1 << ssid);
 		scgrp->subtree_control &= ~(1 << ssid);
-		cgroup_refresh_child_subsys_mask(scgrp);
+		cgroup_refresh_subtree_ss_mask(scgrp);
 
 		/* default hierarchy doesn't enable controllers by default */
 		dst_root->subsys_mask |= 1 << ssid;
@@ -1550,7 +1550,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 			static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
 		} else {
 			dcgrp->subtree_control |= 1 << ssid;
-			cgroup_refresh_child_subsys_mask(dcgrp);
+			cgroup_refresh_subtree_ss_mask(dcgrp);
 			static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
 		}
 
@@ -2523,11 +2523,11 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
 	lockdep_assert_held(&cgroup_mutex);
 
 	/*
-	 * Except for the root, child_subsys_mask must be zero for a cgroup
+	 * Except for the root, subtree_ss_mask must be zero for a cgroup
 	 * with tasks so that child cgroups don't compete against tasks.
 	 */
 	if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && cgroup_parent(dst_cgrp) &&
-	    dst_cgrp->child_subsys_mask)
+	    dst_cgrp->subtree_ss_mask)
 		return -EBUSY;
 
 	/* look up the dst cset for each src cset and link it to src */
@@ -2880,7 +2880,7 @@ static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
  * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy
  * @cgrp: root of the subtree to update csses for
  *
- * @cgrp's child_subsys_mask has changed and its subtree's (self excluded)
+ * @cgrp's subtree_ss_mask has changed and its subtree's (self excluded)
  * css associations need to be updated accordingly.  This function looks up
  * all css_sets which are attached to the subtree, creates the matching
  * updated css_sets and migrates the tasks to the new ones.
@@ -2902,7 +2902,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 	css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
 		struct cgrp_cset_link *link;
 
-		/* self is not affected by child_subsys_mask change */
+		/* self is not affected by subtree_ss_mask change */
 		if (css->cgroup == cgrp)
 			continue;
 
@@ -3034,9 +3034,9 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 	 * depending on subsystem dependencies.
 	 */
 	old_sc = cgrp->subtree_control;
-	old_ss = cgrp->child_subsys_mask;
+	old_ss = cgrp->subtree_ss_mask;
 	new_sc = (old_sc | enable) & ~disable;
-	new_ss = cgroup_calc_child_subsys_mask(cgrp, new_sc);
+	new_ss = cgroup_calc_subtree_ss_mask(cgrp, new_sc);
 
 	css_enable = ~old_ss & new_ss;
 	css_disable = old_ss & ~new_ss;
@@ -3069,7 +3069,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 	}
 
 	cgrp->subtree_control = new_sc;
-	cgrp->child_subsys_mask = new_ss;
+	cgrp->subtree_ss_mask = new_ss;
 
 	/*
 	 * Create new csses or make the existing ones visible.  A css is
@@ -3135,7 +3135,7 @@ out_unlock:
 
 err_undo_css:
 	cgrp->subtree_control = old_sc;
-	cgrp->child_subsys_mask = old_ss;
+	cgrp->subtree_ss_mask = old_ss;
 
 	for_each_subsys(ss, ssid) {
 		if (!(enable & (1 << ssid)))
@@ -4969,7 +4969,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 
 	/* let's create and online css's */
 	for_each_subsys(ss, ssid) {
-		if (parent->child_subsys_mask & (1 << ssid)) {
+		if (parent->subtree_ss_mask & (1 << ssid)) {
 			ret = create_css(cgrp, ss,
 					 parent->subtree_control & (1 << ssid));
 			if (ret)
@@ -4983,7 +4983,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 	 */
 	if (!cgroup_on_dfl(cgrp)) {
 		cgrp->subtree_control = parent->subtree_control;
-		cgroup_refresh_child_subsys_mask(cgrp);
+		cgroup_refresh_subtree_ss_mask(cgrp);
 	}
 
 	kernfs_activate(kn);
-- 
cgit v1.2.3


From 6e5c830770f9045a17b1b931c3e11fbd5591e630 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 22 Feb 2016 22:25:47 -0500
Subject: cgroup: make cgroup subsystem masks u16

After the recent do_each_subsys_mask() conversion, there's no reason
to use ulong for subsystem masks.  We'll be adding more subsystem
masks to persistent data structures, let's reduce its size to u16
which should be enough for now and the foreseeable future.

This doesn't create any noticeable behavior differences.

v2: Johannes spotted that the initial patch missed cgroup_no_v1_mask.
    Converted.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/cgroup-defs.h |  4 ++--
 kernel/cgroup.c             | 50 ++++++++++++++++++++++-----------------------
 2 files changed, 26 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index c68ae7f0fb5f..0abf6aa86c81 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -257,8 +257,8 @@ struct cgroup {
 	 * one which may have more subsystems enabled.  Controller knobs
 	 * are made available iff it's enabled in ->subtree_control.
 	 */
-	unsigned long subtree_control;
-	unsigned long subtree_ss_mask;
+	u16 subtree_control;
+	u16 subtree_ss_mask;
 
 	/* Private pointers for each registered subsystem */
 	struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1e561bd990b9..7669f68077b8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -181,10 +181,10 @@ EXPORT_SYMBOL_GPL(cgrp_dfl_root);
 static bool cgrp_dfl_root_visible;
 
 /* Controllers blocked by the commandline in v1 */
-static unsigned long cgroup_no_v1_mask;
+static u16 cgroup_no_v1_mask;
 
 /* some controllers are not supported in the default hierarchy */
-static unsigned long cgrp_dfl_root_inhibit_ss_mask;
+static u16 cgrp_dfl_root_inhibit_ss_mask;
 
 /* The list of hierarchy roots */
 
@@ -208,19 +208,18 @@ static u64 css_serial_nr_next = 1;
  * fork/exit handlers to call. This avoids us having to do extra work in the
  * fork/exit path to check which subsystems have fork/exit callbacks.
  */
-static unsigned long have_fork_callback __read_mostly;
-static unsigned long have_exit_callback __read_mostly;
-static unsigned long have_free_callback __read_mostly;
+static u16 have_fork_callback __read_mostly;
+static u16 have_exit_callback __read_mostly;
+static u16 have_free_callback __read_mostly;
 
 /* Ditto for the can_fork callback. */
-static unsigned long have_canfork_callback __read_mostly;
+static u16 have_canfork_callback __read_mostly;
 
 static struct file_system_type cgroup2_fs_type;
 static struct cftype cgroup_dfl_base_files[];
 static struct cftype cgroup_legacy_base_files[];
 
-static int rebind_subsystems(struct cgroup_root *dst_root,
-			     unsigned long ss_mask);
+static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
 static void css_task_iter_advance(struct css_task_iter *it);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
 static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
@@ -1274,11 +1273,10 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
  * @subtree_control is to be applied to @cgrp.  The returned mask is always
  * a superset of @subtree_control and follows the usual hierarchy rules.
  */
-static unsigned long cgroup_calc_subtree_ss_mask(struct cgroup *cgrp,
-						 unsigned long subtree_control)
+static u16 cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, u16 subtree_control)
 {
 	struct cgroup *parent = cgroup_parent(cgrp);
-	unsigned long cur_ss_mask = subtree_control;
+	u16 cur_ss_mask = subtree_control;
 	struct cgroup_subsys *ss;
 	int ssid;
 
@@ -1288,7 +1286,7 @@ static unsigned long cgroup_calc_subtree_ss_mask(struct cgroup *cgrp,
 		return cur_ss_mask;
 
 	while (true) {
-		unsigned long new_ss_mask = cur_ss_mask;
+		u16 new_ss_mask = cur_ss_mask;
 
 		do_each_subsys_mask(ss, ssid, cur_ss_mask) {
 			new_ss_mask |= ss->depends_on;
@@ -1466,12 +1464,11 @@ err:
 	return ret;
 }
 
-static int rebind_subsystems(struct cgroup_root *dst_root,
-			     unsigned long ss_mask)
+static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
 {
 	struct cgroup *dcgrp = &dst_root->cgrp;
 	struct cgroup_subsys *ss;
-	unsigned long tmp_ss_mask;
+	u16 tmp_ss_mask;
 	int ssid, i, ret;
 
 	lockdep_assert_held(&cgroup_mutex);
@@ -1507,7 +1504,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 		 */
 		if (dst_root == &cgrp_dfl_root) {
 			if (cgrp_dfl_root_visible) {
-				pr_warn("failed to create files (%d) while rebinding 0x%lx to default root\n",
+				pr_warn("failed to create files (%d) while rebinding 0x%x to default root\n",
 					ret, ss_mask);
 				pr_warn("you may retry by moving them to a different hierarchy and unbinding\n");
 			}
@@ -1599,7 +1596,7 @@ static int cgroup_show_options(struct seq_file *seq,
 }
 
 struct cgroup_sb_opts {
-	unsigned long subsys_mask;
+	u16 subsys_mask;
 	unsigned int flags;
 	char *release_agent;
 	bool cpuset_clone_children;
@@ -1612,13 +1609,13 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 {
 	char *token, *o = data;
 	bool all_ss = false, one_ss = false;
-	unsigned long mask = -1UL;
+	u16 mask = U16_MAX;
 	struct cgroup_subsys *ss;
 	int nr_opts = 0;
 	int i;
 
 #ifdef CONFIG_CPUSETS
-	mask = ~(1U << cpuset_cgrp_id);
+	mask = ~((u16)1 << cpuset_cgrp_id);
 #endif
 
 	memset(opts, 0, sizeof(*opts));
@@ -1745,7 +1742,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
 	int ret = 0;
 	struct cgroup_root *root = cgroup_root_from_kf(kf_root);
 	struct cgroup_sb_opts opts;
-	unsigned long added_mask, removed_mask;
+	u16 added_mask, removed_mask;
 
 	if (root == &cgrp_dfl_root) {
 		pr_err("remount is not allowed\n");
@@ -1893,7 +1890,7 @@ static void init_cgroup_root(struct cgroup_root *root,
 		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
 }
 
-static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask)
+static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
 {
 	LIST_HEAD(tmp_links);
 	struct cgroup *root_cgrp = &root->cgrp;
@@ -2839,7 +2836,7 @@ static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static void cgroup_print_ss_mask(struct seq_file *seq, unsigned long ss_mask)
+static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
 {
 	struct cgroup_subsys *ss;
 	bool printed = false;
@@ -2950,8 +2947,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 					    char *buf, size_t nbytes,
 					    loff_t off)
 {
-	unsigned long enable = 0, disable = 0;
-	unsigned long css_enable, css_disable, old_sc, new_sc, old_ss, new_ss;
+	u16 enable = 0, disable = 0;
+	u16 css_enable, css_disable, old_sc, new_sc, old_ss, new_ss;
 	struct cgroup *cgrp, *child;
 	struct cgroup_subsys *ss;
 	char *tok;
@@ -5255,7 +5252,7 @@ int __init cgroup_init_early(void)
 	return 0;
 }
 
-static unsigned long cgroup_disable_mask __initdata;
+static u16 cgroup_disable_mask __initdata;
 
 /**
  * cgroup_init - cgroup initialization
@@ -5269,6 +5266,7 @@ int __init cgroup_init(void)
 	unsigned long key;
 	int ssid;
 
+	BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
 	BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
@@ -5754,7 +5752,7 @@ static int __init cgroup_no_v1(char *str)
 			continue;
 
 		if (!strcmp(token, "all")) {
-			cgroup_no_v1_mask = ~0UL;
+			cgroup_no_v1_mask = U16_MAX;
 			break;
 		}
 
-- 
cgit v1.2.3


From 30187e1d48a258e304af184c45c3140c8509d219 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Sun, 31 Jan 2016 13:28:26 -0500
Subject: dm: rename target's per_bio_data_size to per_io_data_size

Request-based DM will also make use of per_bio_data_size.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-target.c  |  2 +-
 drivers/md/dm-crypt.c         |  2 +-
 drivers/md/dm-delay.c         |  2 +-
 drivers/md/dm-flakey.c        |  2 +-
 drivers/md/dm-log-writes.c    |  2 +-
 drivers/md/dm-raid1.c         |  2 +-
 drivers/md/dm-snap.c          |  2 +-
 drivers/md/dm-table.c         |  6 +++---
 drivers/md/dm-thin.c          |  2 +-
 drivers/md/dm-verity-fec.c    |  2 +-
 drivers/md/dm-verity-target.c | 12 ++++++------
 drivers/md/dm.c               |  8 ++++----
 include/linux/device-mapper.h |  6 +++---
 13 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 5780accffa30..2238d6f484fe 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2771,7 +2771,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 	ti->split_discard_bios = false;
 
 	cache->features = ca->features;
-	ti->per_bio_data_size = get_per_bio_data_size(cache);
+	ti->per_io_data_size = get_per_bio_data_size(cache);
 
 	cache->callbacks.congested_fn = cache_is_congested;
 	dm_table_add_target_callbacks(ti->table, &cache->callbacks);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 3147c8d09ea8..5c934b670154 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1788,7 +1788,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	cc->per_bio_data_size = ti->per_bio_data_size =
+	cc->per_bio_data_size = ti->per_io_data_size =
 		ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start +
 		      sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size,
 		      ARCH_KMALLOC_MINALIGN);
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index b4c356a21123..cc70871a6d29 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -204,7 +204,7 @@ out:
 
 	ti->num_flush_bios = 1;
 	ti->num_discard_bios = 1;
-	ti->per_bio_data_size = sizeof(struct dm_delay_info);
+	ti->per_io_data_size = sizeof(struct dm_delay_info);
 	ti->private = dc;
 	return 0;
 
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 09e2afcafd2d..b7341de87015 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -220,7 +220,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	ti->num_flush_bios = 1;
 	ti->num_discard_bios = 1;
-	ti->per_bio_data_size = sizeof(struct per_bio_data);
+	ti->per_io_data_size = sizeof(struct per_bio_data);
 	ti->private = fc;
 	return 0;
 
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 624589d51c2c..608302e222af 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -475,7 +475,7 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ti->flush_supported = true;
 	ti->num_discard_bios = 1;
 	ti->discards_supported = true;
-	ti->per_bio_data_size = sizeof(struct per_bio_data);
+	ti->per_io_data_size = sizeof(struct per_bio_data);
 	ti->private = lc;
 	return 0;
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index f2a363a89629..b3ccf1e0d4f2 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1121,7 +1121,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	ti->num_flush_bios = 1;
 	ti->num_discard_bios = 1;
-	ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record);
+	ti->per_io_data_size = sizeof(struct dm_raid1_bio_record);
 	ti->discard_zeroes_data_unsupported = true;
 
 	ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_MEM_RECLAIM, 0);
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 3766386080a4..62479ac4baf7 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1201,7 +1201,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	ti->private = s;
 	ti->num_flush_bios = num_flush_bios;
-	ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk);
+	ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk);
 
 	/* Add snapshot to the list of snapshots for this origin */
 	/* Exceptions aren't triggered till snapshot_resume() is called */
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 89180fdbd28c..7210e5392cc4 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -957,7 +957,7 @@ bool dm_table_mq_request_based(struct dm_table *t)
 static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
 {
 	unsigned type = dm_table_get_type(t);
-	unsigned per_bio_data_size = 0;
+	unsigned per_io_data_size = 0;
 	struct dm_target *tgt;
 	unsigned i;
 
@@ -969,10 +969,10 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
 	if (type == DM_TYPE_BIO_BASED)
 		for (i = 0; i < t->num_targets; i++) {
 			tgt = t->targets + i;
-			per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size);
+			per_io_data_size = max(per_io_data_size, tgt->per_io_data_size);
 		}
 
-	t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, per_bio_data_size);
+	t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, per_io_data_size);
 	if (!t->mempools)
 		return -ENOMEM;
 
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 72d91f477683..4fbbe1fb9f08 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4037,7 +4037,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
 	ti->num_flush_bios = 1;
 	ti->flush_supported = true;
-	ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook);
+	ti->per_io_data_size = sizeof(struct dm_thin_endio_hook);
 
 	/* In case the pool supports discards, pass them on. */
 	ti->discard_zeroes_data_unsupported = true;
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 1cc10c4de701..459a9f8905ed 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -812,7 +812,7 @@ int verity_fec_ctr(struct dm_verity *v)
 	}
 
 	/* Reserve space for our per-bio data */
-	ti->per_bio_data_size += sizeof(struct dm_verity_fec_io);
+	ti->per_io_data_size += sizeof(struct dm_verity_fec_io);
 
 	return 0;
 }
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 5c5d30cb6ec5..0aba34a7b3b3 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -354,7 +354,7 @@ int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
 				       size_t len))
 {
 	unsigned todo = 1 << v->data_dev_block_bits;
-	struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
+	struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
 
 	do {
 		int r;
@@ -460,7 +460,7 @@ static int verity_verify_io(struct dm_verity_io *io)
 static void verity_finish_io(struct dm_verity_io *io, int error)
 {
 	struct dm_verity *v = io->v;
-	struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size);
+	struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
 
 	bio->bi_end_io = io->orig_bi_end_io;
 	bio->bi_error = error;
@@ -574,7 +574,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
 	if (bio_data_dir(bio) == WRITE)
 		return -EIO;
 
-	io = dm_per_bio_data(bio, ti->per_bio_data_size);
+	io = dm_per_bio_data(bio, ti->per_io_data_size);
 	io->v = v;
 	io->orig_bi_end_io = bio->bi_end_io;
 	io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
@@ -1036,15 +1036,15 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		goto bad;
 	}
 
-	ti->per_bio_data_size = sizeof(struct dm_verity_io) +
+	ti->per_io_data_size = sizeof(struct dm_verity_io) +
 				v->shash_descsize + v->digest_size * 2;
 
 	r = verity_fec_ctr(v);
 	if (r)
 		goto bad;
 
-	ti->per_bio_data_size = roundup(ti->per_bio_data_size,
-					__alignof__(struct dm_verity_io));
+	ti->per_io_data_size = roundup(ti->per_io_data_size,
+				       __alignof__(struct dm_verity_io));
 
 	return 0;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d4040e6d4d3d..89aa9618c061 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -3476,7 +3476,7 @@ int dm_noflush_suspending(struct dm_target *ti)
 EXPORT_SYMBOL_GPL(dm_noflush_suspending);
 
 struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
-					    unsigned integrity, unsigned per_bio_data_size)
+					    unsigned integrity, unsigned per_io_data_size)
 {
 	struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
 	struct kmem_cache *cachep = NULL;
@@ -3492,7 +3492,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
 	case DM_TYPE_BIO_BASED:
 		cachep = _io_cache;
 		pool_size = dm_get_reserved_bio_based_ios();
-		front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
+		front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
 		break;
 	case DM_TYPE_REQUEST_BASED:
 		cachep = _rq_tio_cache;
@@ -3505,8 +3505,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
 		if (!pool_size)
 			pool_size = dm_get_reserved_rq_based_ios();
 		front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
-		/* per_bio_data_size is not used. See __bind_mempools(). */
-		WARN_ON(per_bio_data_size != 0);
+		/* per_io_data_size is not used. */
+		WARN_ON(per_io_data_size != 0);
 		break;
 	default:
 		BUG();
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 87d50ecbc9df..82ae3b5e2c45 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -238,10 +238,10 @@ struct dm_target {
 	unsigned num_write_same_bios;
 
 	/*
-	 * The minimum number of extra bytes allocated in each bio for the
-	 * target to use.  dm_per_bio_data returns the data location.
+	 * The minimum number of extra bytes allocated in each io for the
+	 * target to use.
 	 */
-	unsigned per_bio_data_size;
+	unsigned per_io_data_size;
 
 	/*
 	 * If defined, this function is called to find out how many
-- 
cgit v1.2.3


From 7a2508e1b657cfc7e1371550f88c7a7bc4288f32 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 22 Feb 2016 22:35:22 -0500
Subject: mbcache2: rename to mbcache

Since old mbcache code is gone, let's rename new code to mbcache since
number 2 is now meaningless. This is just a mechanical replacement.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/Makefile              |   2 +-
 fs/ext2/ext2.h           |   4 +-
 fs/ext2/xattr.c          |  48 +++---
 fs/ext2/xattr.h          |   8 +-
 fs/ext4/ext4.h           |   2 +-
 fs/ext4/xattr.c          |  54 +++---
 fs/ext4/xattr.h          |   4 +-
 fs/mbcache.c             | 424 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/mbcache2.c            | 424 -----------------------------------------------
 include/linux/mbcache.h  |  53 ++++++
 include/linux/mbcache2.h |  53 ------
 11 files changed, 538 insertions(+), 538 deletions(-)
 create mode 100644 fs/mbcache.c
 delete mode 100644 fs/mbcache2.c
 create mode 100644 include/linux/mbcache.h
 delete mode 100644 include/linux/mbcache2.h

(limited to 'include/linux')

diff --git a/fs/Makefile b/fs/Makefile
index 59b844007fbc..79f522575cba 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF)	+= compat_binfmt_elf.o
 obj-$(CONFIG_BINFMT_ELF_FDPIC)	+= binfmt_elf_fdpic.o
 obj-$(CONFIG_BINFMT_FLAT)	+= binfmt_flat.o
 
-obj-$(CONFIG_FS_MBCACHE)	+= mbcache2.o
+obj-$(CONFIG_FS_MBCACHE)	+= mbcache.o
 obj-$(CONFIG_FS_POSIX_ACL)	+= posix_acl.o
 obj-$(CONFIG_NFS_COMMON)	+= nfs_common/
 obj-$(CONFIG_COREDUMP)		+= coredump.o
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index f98ce7e60a0f..170939f379d7 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -61,7 +61,7 @@ struct ext2_block_alloc_info {
 #define rsv_start rsv_window._rsv_start
 #define rsv_end rsv_window._rsv_end
 
-struct mb2_cache;
+struct mb_cache;
 
 /*
  * second extended-fs super-block data in memory
@@ -113,7 +113,7 @@ struct ext2_sb_info {
 	 * of the mount options.
 	 */
 	spinlock_t s_lock;
-	struct mb2_cache *s_mb_cache;
+	struct mb_cache *s_mb_cache;
 };
 
 static inline spinlock_t *
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 7162b4869bc3..71d58c2d7a19 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -56,7 +56,7 @@
 #include <linux/buffer_head.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/mbcache2.h>
+#include <linux/mbcache.h>
 #include <linux/quotaops.h>
 #include <linux/rwsem.h>
 #include <linux/security.h>
@@ -90,7 +90,7 @@
 static int ext2_xattr_set2(struct inode *, struct buffer_head *,
 			   struct ext2_xattr_header *);
 
-static int ext2_xattr_cache_insert(struct mb2_cache *, struct buffer_head *);
+static int ext2_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
 static struct buffer_head *ext2_xattr_cache_find(struct inode *,
 						 struct ext2_xattr_header *);
 static void ext2_xattr_rehash(struct ext2_xattr_header *,
@@ -150,7 +150,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
 	size_t name_len, size;
 	char *end;
 	int error;
-	struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
+	struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
 
 	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
 		  name_index, name, buffer, (long)buffer_size);
@@ -246,7 +246,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 	char *end;
 	size_t rest = buffer_size;
 	int error;
-	struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
+	struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
 
 	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
 		  buffer, (long)buffer_size);
@@ -493,8 +493,8 @@ bad_block:		ext2_error(sb, "ext2_xattr_set",
 			 * This must happen under buffer lock for
 			 * ext2_xattr_set2() to reliably detect modified block
 			 */
-			mb2_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache,
-						     hash, bh->b_blocknr);
+			mb_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache,
+						    hash, bh->b_blocknr);
 
 			/* keep the buffer locked while modifying it. */
 		} else {
@@ -627,7 +627,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 	struct super_block *sb = inode->i_sb;
 	struct buffer_head *new_bh = NULL;
 	int error;
-	struct mb2_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache;
+	struct mb_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache;
 
 	if (header) {
 		new_bh = ext2_xattr_cache_find(inode, header);
@@ -721,8 +721,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 			 * This must happen under buffer lock for
 			 * ext2_xattr_set2() to reliably detect freed block
 			 */
-			mb2_cache_entry_delete_block(ext2_mb_cache,
-						     hash, old_bh->b_blocknr);
+			mb_cache_entry_delete_block(ext2_mb_cache,
+						    hash, old_bh->b_blocknr);
 			/* Free the old block. */
 			ea_bdebug(old_bh, "freeing");
 			ext2_free_blocks(inode, old_bh->b_blocknr, 1);
@@ -786,8 +786,8 @@ ext2_xattr_delete_inode(struct inode *inode)
 		 * This must happen under buffer lock for ext2_xattr_set2() to
 		 * reliably detect freed block
 		 */
-		mb2_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache,
-					     hash, bh->b_blocknr);
+		mb_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache,
+					    hash, bh->b_blocknr);
 		ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
 		get_bh(bh);
 		bforget(bh);
@@ -818,12 +818,12 @@ cleanup:
  * Returns 0, or a negative error number on failure.
  */
 static int
-ext2_xattr_cache_insert(struct mb2_cache *cache, struct buffer_head *bh)
+ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh)
 {
 	__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
 	int error;
 
-	error = mb2_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr);
+	error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr);
 	if (error) {
 		if (error == -EBUSY) {
 			ea_bdebug(bh, "already in cache (%d cache entries)",
@@ -887,14 +887,14 @@ static struct buffer_head *
 ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
 {
 	__u32 hash = le32_to_cpu(header->h_hash);
-	struct mb2_cache_entry *ce;
-	struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
+	struct mb_cache_entry *ce;
+	struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
 
 	if (!header->h_hash)
 		return NULL;  /* never share */
 	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
 again:
-	ce = mb2_cache_entry_find_first(ext2_mb_cache, hash);
+	ce = mb_cache_entry_find_first(ext2_mb_cache, hash);
 	while (ce) {
 		struct buffer_head *bh;
 
@@ -915,7 +915,7 @@ again:
 			 * entry is still hashed is reliable.
 			 */
 			if (hlist_bl_unhashed(&ce->e_hash_list)) {
-				mb2_cache_entry_put(ext2_mb_cache, ce);
+				mb_cache_entry_put(ext2_mb_cache, ce);
 				unlock_buffer(bh);
 				brelse(bh);
 				goto again;
@@ -928,14 +928,14 @@ again:
 			} else if (!ext2_xattr_cmp(header, HDR(bh))) {
 				ea_bdebug(bh, "b_count=%d",
 					  atomic_read(&(bh->b_count)));
-				mb2_cache_entry_touch(ext2_mb_cache, ce);
-				mb2_cache_entry_put(ext2_mb_cache, ce);
+				mb_cache_entry_touch(ext2_mb_cache, ce);
+				mb_cache_entry_put(ext2_mb_cache, ce);
 				return bh;
 			}
 			unlock_buffer(bh);
 			brelse(bh);
 		}
-		ce = mb2_cache_entry_find_next(ext2_mb_cache, ce);
+		ce = mb_cache_entry_find_next(ext2_mb_cache, ce);
 	}
 	return NULL;
 }
@@ -1010,13 +1010,13 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header,
 
 #define HASH_BUCKET_BITS 10
 
-struct mb2_cache *ext2_xattr_create_cache(void)
+struct mb_cache *ext2_xattr_create_cache(void)
 {
-	return mb2_cache_create(HASH_BUCKET_BITS);
+	return mb_cache_create(HASH_BUCKET_BITS);
 }
 
-void ext2_xattr_destroy_cache(struct mb2_cache *cache)
+void ext2_xattr_destroy_cache(struct mb_cache *cache)
 {
 	if (cache)
-		mb2_cache_destroy(cache);
+		mb_cache_destroy(cache);
 }
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index 6ea38aa9563a..6f82ab1b00ca 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -53,7 +53,7 @@ struct ext2_xattr_entry {
 #define EXT2_XATTR_SIZE(size) \
 	(((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND)
 
-struct mb2_cache;
+struct mb_cache;
 
 # ifdef CONFIG_EXT2_FS_XATTR
 
@@ -68,8 +68,8 @@ extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_
 
 extern void ext2_xattr_delete_inode(struct inode *);
 
-extern struct mb2_cache *ext2_xattr_create_cache(void);
-extern void ext2_xattr_destroy_cache(struct mb2_cache *cache);
+extern struct mb_cache *ext2_xattr_create_cache(void);
+extern void ext2_xattr_destroy_cache(struct mb_cache *cache);
 
 extern const struct xattr_handler *ext2_xattr_handlers[];
 
@@ -94,7 +94,7 @@ ext2_xattr_delete_inode(struct inode *inode)
 {
 }
 
-static inline void ext2_xattr_destroy_cache(struct mb2_cache *cache)
+static inline void ext2_xattr_destroy_cache(struct mb_cache *cache)
 {
 }
 
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9ac9e62569ef..157b458a69d4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1468,7 +1468,7 @@ struct ext4_sb_info {
 	struct list_head s_es_list;	/* List of inodes with reclaimable extents */
 	long s_es_nr_inode;
 	struct ext4_es_stats s_es_stats;
-	struct mb2_cache *s_mb_cache;
+	struct mb_cache *s_mb_cache;
 	spinlock_t s_es_lock ____cacheline_aligned_in_smp;
 
 	/* Ratelimit ext4 messages. */
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fe9f8d6ab6c9..c6af8a7a436a 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -53,7 +53,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
-#include <linux/mbcache2.h>
+#include <linux/mbcache.h>
 #include <linux/quotaops.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
@@ -78,10 +78,10 @@
 # define ea_bdebug(bh, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
-static void ext4_xattr_cache_insert(struct mb2_cache *, struct buffer_head *);
+static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
 static struct buffer_head *ext4_xattr_cache_find(struct inode *,
 						 struct ext4_xattr_header *,
-						 struct mb2_cache_entry **);
+						 struct mb_cache_entry **);
 static void ext4_xattr_rehash(struct ext4_xattr_header *,
 			      struct ext4_xattr_entry *);
 static int ext4_xattr_list(struct dentry *dentry, char *buffer,
@@ -276,7 +276,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
 	struct ext4_xattr_entry *entry;
 	size_t size;
 	int error;
-	struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
 
 	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
 		  name_index, name, buffer, (long)buffer_size);
@@ -428,7 +428,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 	struct inode *inode = d_inode(dentry);
 	struct buffer_head *bh = NULL;
 	int error;
-	struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
 
 	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
 		  buffer, (long)buffer_size);
@@ -561,8 +561,8 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 		 * This must happen under buffer lock for
 		 * ext4_xattr_block_set() to reliably detect freed block
 		 */
-		mb2_cache_entry_delete_block(EXT4_GET_MB_CACHE(inode), hash,
-					     bh->b_blocknr);
+		mb_cache_entry_delete_block(EXT4_GET_MB_CACHE(inode), hash,
+					    bh->b_blocknr);
 		get_bh(bh);
 		unlock_buffer(bh);
 		ext4_free_blocks(handle, inode, bh, 0, 1,
@@ -782,9 +782,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 	struct super_block *sb = inode->i_sb;
 	struct buffer_head *new_bh = NULL;
 	struct ext4_xattr_search *s = &bs->s;
-	struct mb2_cache_entry *ce = NULL;
+	struct mb_cache_entry *ce = NULL;
 	int error = 0;
-	struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
 
 #define header(x) ((struct ext4_xattr_header *)(x))
 
@@ -805,8 +805,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 			 * ext4_xattr_block_set() to reliably detect modified
 			 * block
 			 */
-			mb2_cache_entry_delete_block(ext4_mb_cache, hash,
-						     bs->bh->b_blocknr);
+			mb_cache_entry_delete_block(ext4_mb_cache, hash,
+						    bs->bh->b_blocknr);
 			ea_bdebug(bs->bh, "modifying in-place");
 			error = ext4_xattr_set_entry(i, s);
 			if (!error) {
@@ -904,7 +904,7 @@ inserted:
 							 EXT4_C2B(EXT4_SB(sb),
 								  1));
 					brelse(new_bh);
-					mb2_cache_entry_put(ext4_mb_cache, ce);
+					mb_cache_entry_put(ext4_mb_cache, ce);
 					ce = NULL;
 					new_bh = NULL;
 					goto inserted;
@@ -919,8 +919,8 @@ inserted:
 				if (error)
 					goto cleanup_dquot;
 			}
-			mb2_cache_entry_touch(ext4_mb_cache, ce);
-			mb2_cache_entry_put(ext4_mb_cache, ce);
+			mb_cache_entry_touch(ext4_mb_cache, ce);
+			mb_cache_entry_put(ext4_mb_cache, ce);
 			ce = NULL;
 		} else if (bs->bh && s->base == bs->bh->b_data) {
 			/* We were modifying this block in-place. */
@@ -985,7 +985,7 @@ getblk_failed:
 
 cleanup:
 	if (ce)
-		mb2_cache_entry_put(ext4_mb_cache, ce);
+		mb_cache_entry_put(ext4_mb_cache, ce);
 	brelse(new_bh);
 	if (!(bs->bh && s->base == bs->bh->b_data))
 		kfree(s->base);
@@ -1546,13 +1546,13 @@ cleanup:
  * Returns 0, or a negative error number on failure.
  */
 static void
-ext4_xattr_cache_insert(struct mb2_cache *ext4_mb_cache, struct buffer_head *bh)
+ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
 {
 	__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
 	int error;
 
-	error = mb2_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
-				       bh->b_blocknr);
+	error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
+				      bh->b_blocknr);
 	if (error) {
 		if (error == -EBUSY)
 			ea_bdebug(bh, "already in cache");
@@ -1610,16 +1610,16 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
  */
 static struct buffer_head *
 ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
-		      struct mb2_cache_entry **pce)
+		      struct mb_cache_entry **pce)
 {
 	__u32 hash = le32_to_cpu(header->h_hash);
-	struct mb2_cache_entry *ce;
-	struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+	struct mb_cache_entry *ce;
+	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
 
 	if (!header->h_hash)
 		return NULL;  /* never share */
 	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
-	ce = mb2_cache_entry_find_first(ext4_mb_cache, hash);
+	ce = mb_cache_entry_find_first(ext4_mb_cache, hash);
 	while (ce) {
 		struct buffer_head *bh;
 
@@ -1638,7 +1638,7 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
 			return bh;
 		}
 		brelse(bh);
-		ce = mb2_cache_entry_find_next(ext4_mb_cache, ce);
+		ce = mb_cache_entry_find_next(ext4_mb_cache, ce);
 	}
 	return NULL;
 }
@@ -1713,15 +1713,15 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
 
 #define	HASH_BUCKET_BITS	10
 
-struct mb2_cache *
+struct mb_cache *
 ext4_xattr_create_cache(void)
 {
-	return mb2_cache_create(HASH_BUCKET_BITS);
+	return mb_cache_create(HASH_BUCKET_BITS);
 }
 
-void ext4_xattr_destroy_cache(struct mb2_cache *cache)
+void ext4_xattr_destroy_cache(struct mb_cache *cache)
 {
 	if (cache)
-		mb2_cache_destroy(cache);
+		mb_cache_destroy(cache);
 }
 
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 10b0f7323ed6..69dd3e6566e0 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -123,8 +123,8 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
 				       struct ext4_xattr_info *i,
 				       struct ext4_xattr_ibody_find *is);
 
-extern struct mb2_cache *ext4_xattr_create_cache(void);
-extern void ext4_xattr_destroy_cache(struct mb2_cache *);
+extern struct mb_cache *ext4_xattr_create_cache(void);
+extern void ext4_xattr_destroy_cache(struct mb_cache *);
 
 #ifdef CONFIG_EXT4_FS_SECURITY
 extern int ext4_init_security(handle_t *handle, struct inode *inode,
diff --git a/fs/mbcache.c b/fs/mbcache.c
new file mode 100644
index 000000000000..4241b633f155
--- /dev/null
+++ b/fs/mbcache.c
@@ -0,0 +1,424 @@
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/list_bl.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/mbcache.h>
+
+/*
+ * Mbcache is a simple key-value store. Keys need not be unique, however
+ * key-value pairs are expected to be unique (we use this fact in
+ * mb_cache_entry_delete_block()).
+ *
+ * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
+ * They use hash of a block contents as a key and block number as a value.
+ * That's why keys need not be unique (different xattr blocks may end up having
+ * the same hash). However block number always uniquely identifies a cache
+ * entry.
+ *
+ * We provide functions for creation and removal of entries, search by key,
+ * and a special "delete entry with given key-value pair" operation. Fixed
+ * size hash table is used for fast key lookups.
+ */
+
+struct mb_cache {
+	/* Hash table of entries */
+	struct hlist_bl_head	*c_hash;
+	/* log2 of hash table size */
+	int			c_bucket_bits;
+	/* Maximum entries in cache to avoid degrading hash too much */
+	int			c_max_entries;
+	/* Protects c_list, c_entry_count */
+	spinlock_t		c_list_lock;
+	struct list_head	c_list;
+	/* Number of entries in cache */
+	unsigned long		c_entry_count;
+	struct shrinker		c_shrink;
+	/* Work for shrinking when the cache has too many entries */
+	struct work_struct	c_shrink_work;
+};
+
+static struct kmem_cache *mb_entry_cache;
+
+static unsigned long mb_cache_shrink(struct mb_cache *cache,
+				     unsigned int nr_to_scan);
+
+static inline bool mb_cache_entry_referenced(struct mb_cache_entry *entry)
+{
+	return entry->_e_hash_list_head & 1;
+}
+
+static inline void mb_cache_entry_set_referenced(struct mb_cache_entry *entry)
+{
+	entry->_e_hash_list_head |= 1;
+}
+
+static inline void mb_cache_entry_clear_referenced(
+					struct mb_cache_entry *entry)
+{
+	entry->_e_hash_list_head &= ~1;
+}
+
+static inline struct hlist_bl_head *mb_cache_entry_head(
+					struct mb_cache_entry *entry)
+{
+	return (struct hlist_bl_head *)
+			(entry->_e_hash_list_head & ~1);
+}
+
+/*
+ * Number of entries to reclaim synchronously when there are too many entries
+ * in cache
+ */
+#define SYNC_SHRINK_BATCH 64
+
+/*
+ * mb_cache_entry_create - create entry in cache
+ * @cache - cache where the entry should be created
+ * @mask - gfp mask with which the entry should be allocated
+ * @key - key of the entry
+ * @block - block that contains data
+ *
+ * Creates entry in @cache with key @key and records that data is stored in
+ * block @block. The function returns -EBUSY if entry with the same key
+ * and for the same block already exists in cache. Otherwise 0 is returned.
+ */
+int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+			  sector_t block)
+{
+	struct mb_cache_entry *entry, *dup;
+	struct hlist_bl_node *dup_node;
+	struct hlist_bl_head *head;
+
+	/* Schedule background reclaim if there are too many entries */
+	if (cache->c_entry_count >= cache->c_max_entries)
+		schedule_work(&cache->c_shrink_work);
+	/* Do some sync reclaim if background reclaim cannot keep up */
+	if (cache->c_entry_count >= 2*cache->c_max_entries)
+		mb_cache_shrink(cache, SYNC_SHRINK_BATCH);
+
+	entry = kmem_cache_alloc(mb_entry_cache, mask);
+	if (!entry)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&entry->e_list);
+	/* One ref for hash, one ref returned */
+	atomic_set(&entry->e_refcnt, 1);
+	entry->e_key = key;
+	entry->e_block = block;
+	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+	entry->_e_hash_list_head = (unsigned long)head;
+	hlist_bl_lock(head);
+	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
+		if (dup->e_key == key && dup->e_block == block) {
+			hlist_bl_unlock(head);
+			kmem_cache_free(mb_entry_cache, entry);
+			return -EBUSY;
+		}
+	}
+	hlist_bl_add_head(&entry->e_hash_list, head);
+	hlist_bl_unlock(head);
+
+	spin_lock(&cache->c_list_lock);
+	list_add_tail(&entry->e_list, &cache->c_list);
+	/* Grab ref for LRU list */
+	atomic_inc(&entry->e_refcnt);
+	cache->c_entry_count++;
+	spin_unlock(&cache->c_list_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(mb_cache_entry_create);
+
+void __mb_cache_entry_free(struct mb_cache_entry *entry)
+{
+	kmem_cache_free(mb_entry_cache, entry);
+}
+EXPORT_SYMBOL(__mb_cache_entry_free);
+
+static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
+					   struct mb_cache_entry *entry,
+					   u32 key)
+{
+	struct mb_cache_entry *old_entry = entry;
+	struct hlist_bl_node *node;
+	struct hlist_bl_head *head;
+
+	if (entry)
+		head = mb_cache_entry_head(entry);
+	else
+		head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+	hlist_bl_lock(head);
+	if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
+		node = entry->e_hash_list.next;
+	else
+		node = hlist_bl_first(head);
+	while (node) {
+		entry = hlist_bl_entry(node, struct mb_cache_entry,
+				       e_hash_list);
+		if (entry->e_key == key) {
+			atomic_inc(&entry->e_refcnt);
+			goto out;
+		}
+		node = node->next;
+	}
+	entry = NULL;
+out:
+	hlist_bl_unlock(head);
+	if (old_entry)
+		mb_cache_entry_put(cache, old_entry);
+
+	return entry;
+}
+
+/*
+ * mb_cache_entry_find_first - find the first entry in cache with given key
+ * @cache: cache where we should search
+ * @key: key to look for
+ *
+ * Search in @cache for entry with key @key. Grabs reference to the first
+ * entry found and returns the entry.
+ */
+struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
+						 u32 key)
+{
+	return __entry_find(cache, NULL, key);
+}
+EXPORT_SYMBOL(mb_cache_entry_find_first);
+
+/*
+ * mb_cache_entry_find_next - find next entry in cache with the same
+ * @cache: cache where we should search
+ * @entry: entry to start search from
+ *
+ * Finds next entry in the hash chain which has the same key as @entry.
+ * If @entry is unhashed (which can happen when deletion of entry races
+ * with the search), finds the first entry in the hash chain. The function
+ * drops reference to @entry and returns with a reference to the found entry.
+ */
+struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
+						struct mb_cache_entry *entry)
+{
+	return __entry_find(cache, entry, entry->e_key);
+}
+EXPORT_SYMBOL(mb_cache_entry_find_next);
+
+/* mb_cache_entry_delete_block - remove information about block from cache
+ * @cache - cache we work with
+ * @key - key of the entry to remove
+ * @block - block containing data for @key
+ *
+ * Remove entry from cache @cache with key @key with data stored in @block.
+ */
+void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
+				 sector_t block)
+{
+	struct hlist_bl_node *node;
+	struct hlist_bl_head *head;
+	struct mb_cache_entry *entry;
+
+	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+	hlist_bl_lock(head);
+	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+		if (entry->e_key == key && entry->e_block == block) {
+			/* We keep hash list reference to keep entry alive */
+			hlist_bl_del_init(&entry->e_hash_list);
+			hlist_bl_unlock(head);
+			spin_lock(&cache->c_list_lock);
+			if (!list_empty(&entry->e_list)) {
+				list_del_init(&entry->e_list);
+				cache->c_entry_count--;
+				atomic_dec(&entry->e_refcnt);
+			}
+			spin_unlock(&cache->c_list_lock);
+			mb_cache_entry_put(cache, entry);
+			return;
+		}
+	}
+	hlist_bl_unlock(head);
+}
+EXPORT_SYMBOL(mb_cache_entry_delete_block);
+
+/* mb_cache_entry_touch - cache entry got used
+ * @cache - cache the entry belongs to
+ * @entry - entry that got used
+ *
+ * Marks entry as used to give hit higher chances of surviving in cache.
+ */
+void mb_cache_entry_touch(struct mb_cache *cache,
+			  struct mb_cache_entry *entry)
+{
+	mb_cache_entry_set_referenced(entry);
+}
+EXPORT_SYMBOL(mb_cache_entry_touch);
+
+static unsigned long mb_cache_count(struct shrinker *shrink,
+				    struct shrink_control *sc)
+{
+	struct mb_cache *cache = container_of(shrink, struct mb_cache,
+					      c_shrink);
+
+	return cache->c_entry_count;
+}
+
+/* Shrink number of entries in cache */
+static unsigned long mb_cache_shrink(struct mb_cache *cache,
+				     unsigned int nr_to_scan)
+{
+	struct mb_cache_entry *entry;
+	struct hlist_bl_head *head;
+	unsigned int shrunk = 0;
+
+	spin_lock(&cache->c_list_lock);
+	while (nr_to_scan-- && !list_empty(&cache->c_list)) {
+		entry = list_first_entry(&cache->c_list,
+					 struct mb_cache_entry, e_list);
+		if (mb_cache_entry_referenced(entry)) {
+			mb_cache_entry_clear_referenced(entry);
+			list_move_tail(&cache->c_list, &entry->e_list);
+			continue;
+		}
+		list_del_init(&entry->e_list);
+		cache->c_entry_count--;
+		/*
+		 * We keep LRU list reference so that entry doesn't go away
+		 * from under us.
+		 */
+		spin_unlock(&cache->c_list_lock);
+		head = mb_cache_entry_head(entry);
+		hlist_bl_lock(head);
+		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+			hlist_bl_del_init(&entry->e_hash_list);
+			atomic_dec(&entry->e_refcnt);
+		}
+		hlist_bl_unlock(head);
+		if (mb_cache_entry_put(cache, entry))
+			shrunk++;
+		cond_resched();
+		spin_lock(&cache->c_list_lock);
+	}
+	spin_unlock(&cache->c_list_lock);
+
+	return shrunk;
+}
+
+static unsigned long mb_cache_scan(struct shrinker *shrink,
+				   struct shrink_control *sc)
+{
+	int nr_to_scan = sc->nr_to_scan;
+	struct mb_cache *cache = container_of(shrink, struct mb_cache,
+					      c_shrink);
+	return mb_cache_shrink(cache, nr_to_scan);
+}
+
+/* We shrink 1/X of the cache when we have too many entries in it */
+#define SHRINK_DIVISOR 16
+
+static void mb_cache_shrink_worker(struct work_struct *work)
+{
+	struct mb_cache *cache = container_of(work, struct mb_cache,
+					      c_shrink_work);
+	mb_cache_shrink(cache, cache->c_max_entries / SHRINK_DIVISOR);
+}
+
+/*
+ * mb_cache_create - create cache
+ * @bucket_bits: log2 of the hash table size
+ *
+ * Create cache for keys with 2^bucket_bits hash entries.
+ */
+struct mb_cache *mb_cache_create(int bucket_bits)
+{
+	struct mb_cache *cache;
+	int bucket_count = 1 << bucket_bits;
+	int i;
+
+	if (!try_module_get(THIS_MODULE))
+		return NULL;
+
+	cache = kzalloc(sizeof(struct mb_cache), GFP_KERNEL);
+	if (!cache)
+		goto err_out;
+	cache->c_bucket_bits = bucket_bits;
+	cache->c_max_entries = bucket_count << 4;
+	INIT_LIST_HEAD(&cache->c_list);
+	spin_lock_init(&cache->c_list_lock);
+	cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
+				GFP_KERNEL);
+	if (!cache->c_hash) {
+		kfree(cache);
+		goto err_out;
+	}
+	for (i = 0; i < bucket_count; i++)
+		INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
+
+	cache->c_shrink.count_objects = mb_cache_count;
+	cache->c_shrink.scan_objects = mb_cache_scan;
+	cache->c_shrink.seeks = DEFAULT_SEEKS;
+	register_shrinker(&cache->c_shrink);
+
+	INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker);
+
+	return cache;
+
+err_out:
+	module_put(THIS_MODULE);
+	return NULL;
+}
+EXPORT_SYMBOL(mb_cache_create);
+
+/*
+ * mb_cache_destroy - destroy cache
+ * @cache: the cache to destroy
+ *
+ * Free all entries in cache and cache itself. Caller must make sure nobody
+ * (except shrinker) can reach @cache when calling this.
+ */
+void mb_cache_destroy(struct mb_cache *cache)
+{
+	struct mb_cache_entry *entry, *next;
+
+	unregister_shrinker(&cache->c_shrink);
+
+	/*
+	 * We don't bother with any locking. Cache must not be used at this
+	 * point.
+	 */
+	list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
+		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+			hlist_bl_del_init(&entry->e_hash_list);
+			atomic_dec(&entry->e_refcnt);
+		} else
+			WARN_ON(1);
+		list_del(&entry->e_list);
+		WARN_ON(atomic_read(&entry->e_refcnt) != 1);
+		mb_cache_entry_put(cache, entry);
+	}
+	kfree(cache->c_hash);
+	kfree(cache);
+	module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL(mb_cache_destroy);
+
+static int __init mbcache_init(void)
+{
+	mb_entry_cache = kmem_cache_create("mbcache",
+				sizeof(struct mb_cache_entry), 0,
+				SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
+	BUG_ON(!mb_entry_cache);
+	return 0;
+}
+
+static void __exit mbcache_exit(void)
+{
+	kmem_cache_destroy(mb_entry_cache);
+}
+
+module_init(mbcache_init)
+module_exit(mbcache_exit)
+
+MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
+MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
+MODULE_LICENSE("GPL");
diff --git a/fs/mbcache2.c b/fs/mbcache2.c
deleted file mode 100644
index 49f7a6feaa83..000000000000
--- a/fs/mbcache2.c
+++ /dev/null
@@ -1,424 +0,0 @@
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/list_bl.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/workqueue.h>
-#include <linux/mbcache2.h>
-
-/*
- * Mbcache is a simple key-value store. Keys need not be unique, however
- * key-value pairs are expected to be unique (we use this fact in
- * mb2_cache_entry_delete_block()).
- *
- * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
- * They use hash of a block contents as a key and block number as a value.
- * That's why keys need not be unique (different xattr blocks may end up having
- * the same hash). However block number always uniquely identifies a cache
- * entry.
- *
- * We provide functions for creation and removal of entries, search by key,
- * and a special "delete entry with given key-value pair" operation. Fixed
- * size hash table is used for fast key lookups.
- */
-
-struct mb2_cache {
-	/* Hash table of entries */
-	struct hlist_bl_head	*c_hash;
-	/* log2 of hash table size */
-	int			c_bucket_bits;
-	/* Maximum entries in cache to avoid degrading hash too much */
-	int			c_max_entries;
-	/* Protects c_list, c_entry_count */
-	spinlock_t		c_list_lock;
-	struct list_head	c_list;
-	/* Number of entries in cache */
-	unsigned long		c_entry_count;
-	struct shrinker		c_shrink;
-	/* Work for shrinking when the cache has too many entries */
-	struct work_struct	c_shrink_work;
-};
-
-static struct kmem_cache *mb2_entry_cache;
-
-static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
-				      unsigned int nr_to_scan);
-
-static inline bool mb2_cache_entry_referenced(struct mb2_cache_entry *entry)
-{
-	return entry->_e_hash_list_head & 1;
-}
-
-static inline void mb2_cache_entry_set_referenced(struct mb2_cache_entry *entry)
-{
-	entry->_e_hash_list_head |= 1;
-}
-
-static inline void mb2_cache_entry_clear_referenced(
-					struct mb2_cache_entry *entry)
-{
-	entry->_e_hash_list_head &= ~1;
-}
-
-static inline struct hlist_bl_head *mb2_cache_entry_head(
-					struct mb2_cache_entry *entry)
-{
-	return (struct hlist_bl_head *)
-			(entry->_e_hash_list_head & ~1);
-}
-
-/*
- * Number of entries to reclaim synchronously when there are too many entries
- * in cache
- */
-#define SYNC_SHRINK_BATCH 64
-
-/*
- * mb2_cache_entry_create - create entry in cache
- * @cache - cache where the entry should be created
- * @mask - gfp mask with which the entry should be allocated
- * @key - key of the entry
- * @block - block that contains data
- *
- * Creates entry in @cache with key @key and records that data is stored in
- * block @block. The function returns -EBUSY if entry with the same key
- * and for the same block already exists in cache. Otherwise 0 is returned.
- */
-int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
-			   sector_t block)
-{
-	struct mb2_cache_entry *entry, *dup;
-	struct hlist_bl_node *dup_node;
-	struct hlist_bl_head *head;
-
-	/* Schedule background reclaim if there are too many entries */
-	if (cache->c_entry_count >= cache->c_max_entries)
-		schedule_work(&cache->c_shrink_work);
-	/* Do some sync reclaim if background reclaim cannot keep up */
-	if (cache->c_entry_count >= 2*cache->c_max_entries)
-		mb2_cache_shrink(cache, SYNC_SHRINK_BATCH);
-
-	entry = kmem_cache_alloc(mb2_entry_cache, mask);
-	if (!entry)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&entry->e_list);
-	/* One ref for hash, one ref returned */
-	atomic_set(&entry->e_refcnt, 1);
-	entry->e_key = key;
-	entry->e_block = block;
-	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
-	entry->_e_hash_list_head = (unsigned long)head;
-	hlist_bl_lock(head);
-	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
-		if (dup->e_key == key && dup->e_block == block) {
-			hlist_bl_unlock(head);
-			kmem_cache_free(mb2_entry_cache, entry);
-			return -EBUSY;
-		}
-	}
-	hlist_bl_add_head(&entry->e_hash_list, head);
-	hlist_bl_unlock(head);
-
-	spin_lock(&cache->c_list_lock);
-	list_add_tail(&entry->e_list, &cache->c_list);
-	/* Grab ref for LRU list */
-	atomic_inc(&entry->e_refcnt);
-	cache->c_entry_count++;
-	spin_unlock(&cache->c_list_lock);
-
-	return 0;
-}
-EXPORT_SYMBOL(mb2_cache_entry_create);
-
-void __mb2_cache_entry_free(struct mb2_cache_entry *entry)
-{
-	kmem_cache_free(mb2_entry_cache, entry);
-}
-EXPORT_SYMBOL(__mb2_cache_entry_free);
-
-static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache,
-					    struct mb2_cache_entry *entry,
-					    u32 key)
-{
-	struct mb2_cache_entry *old_entry = entry;
-	struct hlist_bl_node *node;
-	struct hlist_bl_head *head;
-
-	if (entry)
-		head = mb2_cache_entry_head(entry);
-	else
-		head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
-	hlist_bl_lock(head);
-	if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
-		node = entry->e_hash_list.next;
-	else
-		node = hlist_bl_first(head);
-	while (node) {
-		entry = hlist_bl_entry(node, struct mb2_cache_entry,
-				       e_hash_list);
-		if (entry->e_key == key) {
-			atomic_inc(&entry->e_refcnt);
-			goto out;
-		}
-		node = node->next;
-	}
-	entry = NULL;
-out:
-	hlist_bl_unlock(head);
-	if (old_entry)
-		mb2_cache_entry_put(cache, old_entry);
-
-	return entry;
-}
-
-/*
- * mb2_cache_entry_find_first - find the first entry in cache with given key
- * @cache: cache where we should search
- * @key: key to look for
- *
- * Search in @cache for entry with key @key. Grabs reference to the first
- * entry found and returns the entry.
- */
-struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache,
-						   u32 key)
-{
-	return __entry_find(cache, NULL, key);
-}
-EXPORT_SYMBOL(mb2_cache_entry_find_first);
-
-/*
- * mb2_cache_entry_find_next - find next entry in cache with the same
- * @cache: cache where we should search
- * @entry: entry to start search from
- *
- * Finds next entry in the hash chain which has the same key as @entry.
- * If @entry is unhashed (which can happen when deletion of entry races
- * with the search), finds the first entry in the hash chain. The function
- * drops reference to @entry and returns with a reference to the found entry.
- */
-struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache,
-						  struct mb2_cache_entry *entry)
-{
-	return __entry_find(cache, entry, entry->e_key);
-}
-EXPORT_SYMBOL(mb2_cache_entry_find_next);
-
-/* mb2_cache_entry_delete_block - remove information about block from cache
- * @cache - cache we work with
- * @key - key of the entry to remove
- * @block - block containing data for @key
- *
- * Remove entry from cache @cache with key @key with data stored in @block.
- */
-void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
-				  sector_t block)
-{
-	struct hlist_bl_node *node;
-	struct hlist_bl_head *head;
-	struct mb2_cache_entry *entry;
-
-	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
-	hlist_bl_lock(head);
-	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
-		if (entry->e_key == key && entry->e_block == block) {
-			/* We keep hash list reference to keep entry alive */
-			hlist_bl_del_init(&entry->e_hash_list);
-			hlist_bl_unlock(head);
-			spin_lock(&cache->c_list_lock);
-			if (!list_empty(&entry->e_list)) {
-				list_del_init(&entry->e_list);
-				cache->c_entry_count--;
-				atomic_dec(&entry->e_refcnt);
-			}
-			spin_unlock(&cache->c_list_lock);
-			mb2_cache_entry_put(cache, entry);
-			return;
-		}
-	}
-	hlist_bl_unlock(head);
-}
-EXPORT_SYMBOL(mb2_cache_entry_delete_block);
-
-/* mb2_cache_entry_touch - cache entry got used
- * @cache - cache the entry belongs to
- * @entry - entry that got used
- *
- * Marks entry as used to give hit higher chances of surviving in cache.
- */
-void mb2_cache_entry_touch(struct mb2_cache *cache,
-			   struct mb2_cache_entry *entry)
-{
-	mb2_cache_entry_set_referenced(entry);
-}
-EXPORT_SYMBOL(mb2_cache_entry_touch);
-
-static unsigned long mb2_cache_count(struct shrinker *shrink,
-				     struct shrink_control *sc)
-{
-	struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
-					       c_shrink);
-
-	return cache->c_entry_count;
-}
-
-/* Shrink number of entries in cache */
-static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
-				      unsigned int nr_to_scan)
-{
-	struct mb2_cache_entry *entry;
-	struct hlist_bl_head *head;
-	unsigned int shrunk = 0;
-
-	spin_lock(&cache->c_list_lock);
-	while (nr_to_scan-- && !list_empty(&cache->c_list)) {
-		entry = list_first_entry(&cache->c_list,
-					 struct mb2_cache_entry, e_list);
-		if (mb2_cache_entry_referenced(entry)) {
-			mb2_cache_entry_clear_referenced(entry);
-			list_move_tail(&cache->c_list, &entry->e_list);
-			continue;
-		}
-		list_del_init(&entry->e_list);
-		cache->c_entry_count--;
-		/*
-		 * We keep LRU list reference so that entry doesn't go away
-		 * from under us.
-		 */
-		spin_unlock(&cache->c_list_lock);
-		head = mb2_cache_entry_head(entry);
-		hlist_bl_lock(head);
-		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
-			hlist_bl_del_init(&entry->e_hash_list);
-			atomic_dec(&entry->e_refcnt);
-		}
-		hlist_bl_unlock(head);
-		if (mb2_cache_entry_put(cache, entry))
-			shrunk++;
-		cond_resched();
-		spin_lock(&cache->c_list_lock);
-	}
-	spin_unlock(&cache->c_list_lock);
-
-	return shrunk;
-}
-
-static unsigned long mb2_cache_scan(struct shrinker *shrink,
-				    struct shrink_control *sc)
-{
-	int nr_to_scan = sc->nr_to_scan;
-	struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
-					      c_shrink);
-	return mb2_cache_shrink(cache, nr_to_scan);
-}
-
-/* We shrink 1/X of the cache when we have too many entries in it */
-#define SHRINK_DIVISOR 16
-
-static void mb2_cache_shrink_worker(struct work_struct *work)
-{
-	struct mb2_cache *cache = container_of(work, struct mb2_cache,
-					       c_shrink_work);
-	mb2_cache_shrink(cache, cache->c_max_entries / SHRINK_DIVISOR);
-}
-
-/*
- * mb2_cache_create - create cache
- * @bucket_bits: log2 of the hash table size
- *
- * Create cache for keys with 2^bucket_bits hash entries.
- */
-struct mb2_cache *mb2_cache_create(int bucket_bits)
-{
-	struct mb2_cache *cache;
-	int bucket_count = 1 << bucket_bits;
-	int i;
-
-	if (!try_module_get(THIS_MODULE))
-		return NULL;
-
-	cache = kzalloc(sizeof(struct mb2_cache), GFP_KERNEL);
-	if (!cache)
-		goto err_out;
-	cache->c_bucket_bits = bucket_bits;
-	cache->c_max_entries = bucket_count << 4;
-	INIT_LIST_HEAD(&cache->c_list);
-	spin_lock_init(&cache->c_list_lock);
-	cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
-				GFP_KERNEL);
-	if (!cache->c_hash) {
-		kfree(cache);
-		goto err_out;
-	}
-	for (i = 0; i < bucket_count; i++)
-		INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
-
-	cache->c_shrink.count_objects = mb2_cache_count;
-	cache->c_shrink.scan_objects = mb2_cache_scan;
-	cache->c_shrink.seeks = DEFAULT_SEEKS;
-	register_shrinker(&cache->c_shrink);
-
-	INIT_WORK(&cache->c_shrink_work, mb2_cache_shrink_worker);
-
-	return cache;
-
-err_out:
-	module_put(THIS_MODULE);
-	return NULL;
-}
-EXPORT_SYMBOL(mb2_cache_create);
-
-/*
- * mb2_cache_destroy - destroy cache
- * @cache: the cache to destroy
- *
- * Free all entries in cache and cache itself. Caller must make sure nobody
- * (except shrinker) can reach @cache when calling this.
- */
-void mb2_cache_destroy(struct mb2_cache *cache)
-{
-	struct mb2_cache_entry *entry, *next;
-
-	unregister_shrinker(&cache->c_shrink);
-
-	/*
-	 * We don't bother with any locking. Cache must not be used at this
-	 * point.
-	 */
-	list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
-		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
-			hlist_bl_del_init(&entry->e_hash_list);
-			atomic_dec(&entry->e_refcnt);
-		} else
-			WARN_ON(1);
-		list_del(&entry->e_list);
-		WARN_ON(atomic_read(&entry->e_refcnt) != 1);
-		mb2_cache_entry_put(cache, entry);
-	}
-	kfree(cache->c_hash);
-	kfree(cache);
-	module_put(THIS_MODULE);
-}
-EXPORT_SYMBOL(mb2_cache_destroy);
-
-static int __init mb2cache_init(void)
-{
-	mb2_entry_cache = kmem_cache_create("mbcache",
-				sizeof(struct mb2_cache_entry), 0,
-				SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
-	BUG_ON(!mb2_entry_cache);
-	return 0;
-}
-
-static void __exit mb2cache_exit(void)
-{
-	kmem_cache_destroy(mb2_entry_cache);
-}
-
-module_init(mb2cache_init)
-module_exit(mb2cache_exit)
-
-MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
-MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
-MODULE_LICENSE("GPL");
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
new file mode 100644
index 000000000000..a74a1f3082fb
--- /dev/null
+++ b/include/linux/mbcache.h
@@ -0,0 +1,53 @@
+#ifndef _LINUX_MBCACHE_H
+#define _LINUX_MBCACHE_H
+
+#include <linux/hash.h>
+#include <linux/list_bl.h>
+#include <linux/list.h>
+#include <linux/atomic.h>
+#include <linux/fs.h>
+
+struct mb_cache;
+
+struct mb_cache_entry {
+	/* List of entries in cache - protected by cache->c_list_lock */
+	struct list_head	e_list;
+	/* Hash table list - protected by bitlock in e_hash_list_head */
+	struct hlist_bl_node	e_hash_list;
+	atomic_t		e_refcnt;
+	/* Key in hash - stable during lifetime of the entry */
+	u32			e_key;
+	/* Block number of hashed block - stable during lifetime of the entry */
+	sector_t		e_block;
+	/*
+	 * Head of hash list (for list bit lock) - stable. Combined with
+	 * referenced bit of entry
+	 */
+	unsigned long		_e_hash_list_head;
+};
+
+struct mb_cache *mb_cache_create(int bucket_bits);
+void mb_cache_destroy(struct mb_cache *cache);
+
+int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+			  sector_t block);
+void __mb_cache_entry_free(struct mb_cache_entry *entry);
+static inline int mb_cache_entry_put(struct mb_cache *cache,
+				     struct mb_cache_entry *entry)
+{
+	if (!atomic_dec_and_test(&entry->e_refcnt))
+		return 0;
+	__mb_cache_entry_free(entry);
+	return 1;
+}
+
+void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
+				  sector_t block);
+struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
+						 u32 key);
+struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
+						struct mb_cache_entry *entry);
+void mb_cache_entry_touch(struct mb_cache *cache,
+			  struct mb_cache_entry *entry);
+
+#endif	/* _LINUX_MBCACHE_H */
diff --git a/include/linux/mbcache2.h b/include/linux/mbcache2.h
deleted file mode 100644
index c934843a6a31..000000000000
--- a/include/linux/mbcache2.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef _LINUX_MB2CACHE_H
-#define _LINUX_MB2CACHE_H
-
-#include <linux/hash.h>
-#include <linux/list_bl.h>
-#include <linux/list.h>
-#include <linux/atomic.h>
-#include <linux/fs.h>
-
-struct mb2_cache;
-
-struct mb2_cache_entry {
-	/* List of entries in cache - protected by cache->c_list_lock */
-	struct list_head	e_list;
-	/* Hash table list - protected by bitlock in e_hash_list_head */
-	struct hlist_bl_node	e_hash_list;
-	atomic_t		e_refcnt;
-	/* Key in hash - stable during lifetime of the entry */
-	u32			e_key;
-	/* Block number of hashed block - stable during lifetime of the entry */
-	sector_t		e_block;
-	/*
-	 * Head of hash list (for list bit lock) - stable. Combined with
-	 * referenced bit of entry
-	 */
-	unsigned long		_e_hash_list_head;
-};
-
-struct mb2_cache *mb2_cache_create(int bucket_bits);
-void mb2_cache_destroy(struct mb2_cache *cache);
-
-int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
-			   sector_t block);
-void __mb2_cache_entry_free(struct mb2_cache_entry *entry);
-static inline int mb2_cache_entry_put(struct mb2_cache *cache,
-				      struct mb2_cache_entry *entry)
-{
-	if (!atomic_dec_and_test(&entry->e_refcnt))
-		return 0;
-	__mb2_cache_entry_free(entry);
-	return 1;
-}
-
-void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
-				  sector_t block);
-struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache,
-						   u32 key);
-struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache,
-						  struct mb2_cache_entry *entry);
-void mb2_cache_entry_touch(struct mb2_cache *cache,
-			   struct mb2_cache_entry *entry);
-
-#endif	/* _LINUX_MB2CACHE_H */
-- 
cgit v1.2.3


From dc8d5e565f00c9442fa1cbf9acc115475628527c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 22 Feb 2016 22:42:05 -0500
Subject: mbcache: get rid of _e_hash_list_head

Get rid of field _e_hash_list_head in cache entries and add bit field
e_referenced instead.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/mbcache.c            | 41 ++++++++++-------------------------------
 include/linux/mbcache.h |  8 ++------
 2 files changed, 12 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mbcache.c b/fs/mbcache.c
index 4241b633f155..903be151dcfe 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -45,27 +45,10 @@ static struct kmem_cache *mb_entry_cache;
 static unsigned long mb_cache_shrink(struct mb_cache *cache,
 				     unsigned int nr_to_scan);
 
-static inline bool mb_cache_entry_referenced(struct mb_cache_entry *entry)
+static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
+							u32 key)
 {
-	return entry->_e_hash_list_head & 1;
-}
-
-static inline void mb_cache_entry_set_referenced(struct mb_cache_entry *entry)
-{
-	entry->_e_hash_list_head |= 1;
-}
-
-static inline void mb_cache_entry_clear_referenced(
-					struct mb_cache_entry *entry)
-{
-	entry->_e_hash_list_head &= ~1;
-}
-
-static inline struct hlist_bl_head *mb_cache_entry_head(
-					struct mb_cache_entry *entry)
-{
-	return (struct hlist_bl_head *)
-			(entry->_e_hash_list_head & ~1);
+	return &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
 }
 
 /*
@@ -108,8 +91,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
 	atomic_set(&entry->e_refcnt, 1);
 	entry->e_key = key;
 	entry->e_block = block;
-	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
-	entry->_e_hash_list_head = (unsigned long)head;
+	head = mb_cache_entry_head(cache, key);
 	hlist_bl_lock(head);
 	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
 		if (dup->e_key == key && dup->e_block == block) {
@@ -146,10 +128,7 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
 	struct hlist_bl_node *node;
 	struct hlist_bl_head *head;
 
-	if (entry)
-		head = mb_cache_entry_head(entry);
-	else
-		head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+	head = mb_cache_entry_head(cache, key);
 	hlist_bl_lock(head);
 	if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
 		node = entry->e_hash_list.next;
@@ -219,7 +198,7 @@ void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
 	struct hlist_bl_head *head;
 	struct mb_cache_entry *entry;
 
-	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+	head = mb_cache_entry_head(cache, key);
 	hlist_bl_lock(head);
 	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
 		if (entry->e_key == key && entry->e_block == block) {
@@ -250,7 +229,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete_block);
 void mb_cache_entry_touch(struct mb_cache *cache,
 			  struct mb_cache_entry *entry)
 {
-	mb_cache_entry_set_referenced(entry);
+	entry->e_referenced = 1;
 }
 EXPORT_SYMBOL(mb_cache_entry_touch);
 
@@ -275,8 +254,8 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
 	while (nr_to_scan-- && !list_empty(&cache->c_list)) {
 		entry = list_first_entry(&cache->c_list,
 					 struct mb_cache_entry, e_list);
-		if (mb_cache_entry_referenced(entry)) {
-			mb_cache_entry_clear_referenced(entry);
+		if (entry->e_referenced) {
+			entry->e_referenced = 0;
 			list_move_tail(&cache->c_list, &entry->e_list);
 			continue;
 		}
@@ -287,7 +266,7 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
 		 * from under us.
 		 */
 		spin_unlock(&cache->c_list_lock);
-		head = mb_cache_entry_head(entry);
+		head = mb_cache_entry_head(cache, entry->e_key);
 		hlist_bl_lock(head);
 		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
 			hlist_bl_del_init(&entry->e_hash_list);
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index a74a1f3082fb..607e6968542e 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -12,18 +12,14 @@ struct mb_cache;
 struct mb_cache_entry {
 	/* List of entries in cache - protected by cache->c_list_lock */
 	struct list_head	e_list;
-	/* Hash table list - protected by bitlock in e_hash_list_head */
+	/* Hash table list - protected by hash chain bitlock */
 	struct hlist_bl_node	e_hash_list;
 	atomic_t		e_refcnt;
 	/* Key in hash - stable during lifetime of the entry */
 	u32			e_key;
+	u32			e_referenced:1;
 	/* Block number of hashed block - stable during lifetime of the entry */
 	sector_t		e_block;
-	/*
-	 * Head of hash list (for list bit lock) - stable. Combined with
-	 * referenced bit of entry
-	 */
-	unsigned long		_e_hash_list_head;
 };
 
 struct mb_cache *mb_cache_create(int bucket_bits);
-- 
cgit v1.2.3


From 6048c64b26097a0ffbd966866b599f990e674e9b Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 22 Feb 2016 22:44:04 -0500
Subject: mbcache: add reusable flag to cache entries

To reduce amount of damage caused by single bad block, we limit number
of inodes sharing an xattr block to 1024. Thus there can be more xattr
blocks with the same contents when there are lots of files with the same
extended attributes. These xattr blocks naturally result in hash
collisions and can form long hash chains and we unnecessarily check each
such block only to find out we cannot use it because it is already
shared by too many inodes.

Add a reusable flag to cache entries which is cleared when a cache entry
has reached its maximum refcount.  Cache entries which are not marked
reusable are skipped by mb_cache_entry_find_{first,next}. This
significantly speeds up mbcache when there are many same xattr blocks.
For example for xattr-bench with 5 values and each process handling
20000 files, the run for 64 processes is 25x faster with this patch.
Even for 8 processes the speedup is almost 3x. We have also verified
that for situations where there is only one xattr block of each kind,
the patch doesn't have a measurable cost.

[JK: Remove handling of setting the same value since it is not needed
anymore, check for races in e_reusable setting, improve changelog,
add measurements]

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext2/xattr.c         |  2 +-
 fs/ext4/xattr.c         | 66 +++++++++++++++++++++++++++++++------------------
 fs/mbcache.c            | 38 +++++++++++++++++++++++++---
 include/linux/mbcache.h |  5 +++-
 4 files changed, 81 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 71d58c2d7a19..1a5e3bff0b63 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -823,7 +823,7 @@ ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh)
 	__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
 	int error;
 
-	error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr);
+	error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr, 1);
 	if (error) {
 		if (error == -EBUSY) {
 			ea_bdebug(bh, "already in cache (%d cache entries)",
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index b661ae8332e3..0441e055c8e8 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -545,6 +545,8 @@ static void
 ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 			 struct buffer_head *bh)
 {
+	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+	u32 hash, ref;
 	int error = 0;
 
 	BUFFER_TRACE(bh, "get_write_access");
@@ -553,23 +555,34 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 		goto out;
 
 	lock_buffer(bh);
-	if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
-		__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
-
+	hash = le32_to_cpu(BHDR(bh)->h_hash);
+	ref = le32_to_cpu(BHDR(bh)->h_refcount);
+	if (ref == 1) {
 		ea_bdebug(bh, "refcount now=0; freeing");
 		/*
 		 * This must happen under buffer lock for
 		 * ext4_xattr_block_set() to reliably detect freed block
 		 */
-		mb_cache_entry_delete_block(EXT4_GET_MB_CACHE(inode), hash,
-					    bh->b_blocknr);
+		mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
 		get_bh(bh);
 		unlock_buffer(bh);
 		ext4_free_blocks(handle, inode, bh, 0, 1,
 				 EXT4_FREE_BLOCKS_METADATA |
 				 EXT4_FREE_BLOCKS_FORGET);
 	} else {
-		le32_add_cpu(&BHDR(bh)->h_refcount, -1);
+		ref--;
+		BHDR(bh)->h_refcount = cpu_to_le32(ref);
+		if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
+			struct mb_cache_entry *ce;
+
+			ce = mb_cache_entry_get(ext4_mb_cache, hash,
+						bh->b_blocknr);
+			if (ce) {
+				ce->e_reusable = 1;
+				mb_cache_entry_put(ext4_mb_cache, ce);
+			}
+		}
+
 		/*
 		 * Beware of this ugliness: Releasing of xattr block references
 		 * from different inodes can race and so we have to protect
@@ -872,6 +885,8 @@ inserted:
 			if (new_bh == bs->bh)
 				ea_bdebug(new_bh, "keeping");
 			else {
+				u32 ref;
+
 				/* The old block is released after updating
 				   the inode. */
 				error = dquot_alloc_block(inode,
@@ -886,15 +901,18 @@ inserted:
 				lock_buffer(new_bh);
 				/*
 				 * We have to be careful about races with
-				 * freeing or rehashing of xattr block. Once we
-				 * hold buffer lock xattr block's state is
-				 * stable so we can check whether the block got
-				 * freed / rehashed or not.  Since we unhash
-				 * mbcache entry under buffer lock when freeing
-				 * / rehashing xattr block, checking whether
-				 * entry is still hashed is reliable.
+				 * freeing, rehashing or adding references to
+				 * xattr block. Once we hold buffer lock xattr
+				 * block's state is stable so we can check
+				 * whether the block got freed / rehashed or
+				 * not.  Since we unhash mbcache entry under
+				 * buffer lock when freeing / rehashing xattr
+				 * block, checking whether entry is still
+				 * hashed is reliable. Same rules hold for
+				 * e_reusable handling.
 				 */
-				if (hlist_bl_unhashed(&ce->e_hash_list)) {
+				if (hlist_bl_unhashed(&ce->e_hash_list) ||
+				    !ce->e_reusable) {
 					/*
 					 * Undo everything and check mbcache
 					 * again.
@@ -909,9 +927,12 @@ inserted:
 					new_bh = NULL;
 					goto inserted;
 				}
-				le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
+				ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+				BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
+				if (ref >= EXT4_XATTR_REFCOUNT_MAX)
+					ce->e_reusable = 0;
 				ea_bdebug(new_bh, "reusing; refcount now=%d",
-					le32_to_cpu(BHDR(new_bh)->h_refcount));
+					  ref);
 				unlock_buffer(new_bh);
 				error = ext4_handle_dirty_xattr_block(handle,
 								      inode,
@@ -1566,11 +1587,14 @@ cleanup:
 static void
 ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
 {
-	__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
+	struct ext4_xattr_header *header = BHDR(bh);
+	__u32 hash = le32_to_cpu(header->h_hash);
+	int reusable = le32_to_cpu(header->h_refcount) <
+		       EXT4_XATTR_REFCOUNT_MAX;
 	int error;
 
 	error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
-				      bh->b_blocknr);
+				      bh->b_blocknr, reusable);
 	if (error) {
 		if (error == -EBUSY)
 			ea_bdebug(bh, "already in cache");
@@ -1645,12 +1669,6 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
 		if (!bh) {
 			EXT4_ERROR_INODE(inode, "block %lu read error",
 					 (unsigned long) ce->e_block);
-		} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
-				EXT4_XATTR_REFCOUNT_MAX) {
-			ea_idebug(inode, "block %lu refcount %d>=%d",
-				  (unsigned long) ce->e_block,
-				  le32_to_cpu(BHDR(bh)->h_refcount),
-					  EXT4_XATTR_REFCOUNT_MAX);
 		} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
 			*pce = ce;
 			return bh;
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 903be151dcfe..eccda3a02de6 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -63,13 +63,14 @@ static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
  * @mask - gfp mask with which the entry should be allocated
  * @key - key of the entry
  * @block - block that contains data
+ * @reusable - is the block reusable by other inodes?
  *
  * Creates entry in @cache with key @key and records that data is stored in
  * block @block. The function returns -EBUSY if entry with the same key
  * and for the same block already exists in cache. Otherwise 0 is returned.
  */
 int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
-			  sector_t block)
+			  sector_t block, bool reusable)
 {
 	struct mb_cache_entry *entry, *dup;
 	struct hlist_bl_node *dup_node;
@@ -91,6 +92,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
 	atomic_set(&entry->e_refcnt, 1);
 	entry->e_key = key;
 	entry->e_block = block;
+	entry->e_reusable = reusable;
 	head = mb_cache_entry_head(cache, key);
 	hlist_bl_lock(head);
 	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
@@ -137,7 +139,7 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
 	while (node) {
 		entry = hlist_bl_entry(node, struct mb_cache_entry,
 				       e_hash_list);
-		if (entry->e_key == key) {
+		if (entry->e_key == key && entry->e_reusable) {
 			atomic_inc(&entry->e_refcnt);
 			goto out;
 		}
@@ -184,10 +186,38 @@ struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
 }
 EXPORT_SYMBOL(mb_cache_entry_find_next);
 
+/*
+ * mb_cache_entry_get - get a cache entry by block number (and key)
+ * @cache - cache we work with
+ * @key - key of block number @block
+ * @block - block number
+ */
+struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
+					  sector_t block)
+{
+	struct hlist_bl_node *node;
+	struct hlist_bl_head *head;
+	struct mb_cache_entry *entry;
+
+	head = mb_cache_entry_head(cache, key);
+	hlist_bl_lock(head);
+	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+		if (entry->e_key == key && entry->e_block == block) {
+			atomic_inc(&entry->e_refcnt);
+			goto out;
+		}
+	}
+	entry = NULL;
+out:
+	hlist_bl_unlock(head);
+	return entry;
+}
+EXPORT_SYMBOL(mb_cache_entry_get);
+
 /* mb_cache_entry_delete_block - remove information about block from cache
  * @cache - cache we work with
- * @key - key of the entry to remove
- * @block - block containing data for @key
+ * @key - key of block @block
+ * @block - block number
  *
  * Remove entry from cache @cache with key @key with data stored in @block.
  */
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 607e6968542e..86c9a8b480c5 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -18,6 +18,7 @@ struct mb_cache_entry {
 	/* Key in hash - stable during lifetime of the entry */
 	u32			e_key;
 	u32			e_referenced:1;
+	u32			e_reusable:1;
 	/* Block number of hashed block - stable during lifetime of the entry */
 	sector_t		e_block;
 };
@@ -26,7 +27,7 @@ struct mb_cache *mb_cache_create(int bucket_bits);
 void mb_cache_destroy(struct mb_cache *cache);
 
 int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
-			  sector_t block);
+			  sector_t block, bool reusable);
 void __mb_cache_entry_free(struct mb_cache_entry *entry);
 static inline int mb_cache_entry_put(struct mb_cache *cache,
 				     struct mb_cache_entry *entry)
@@ -39,6 +40,8 @@ static inline int mb_cache_entry_put(struct mb_cache *cache,
 
 void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
 				  sector_t block);
+struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
+					  sector_t block);
 struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
 						 u32 key);
 struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
-- 
cgit v1.2.3


From 9bcf976cb8b86eb40e0c0b495a14e4cb967b9c6e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 22 Feb 2016 23:07:30 -0500
Subject: jbd2: remove unnecessary arguments of
 jbd2_journal_write_revoke_records

jbd2_journal_write_revoke_records() takes journal pointer and write_op,
although journal can be obtained from the passed transaction and
write_op is always WRITE_SYNC. Remove these superfluous arguments.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/commit.c     |  3 +--
 fs/jbd2/revoke.c     | 33 +++++++++++++++------------------
 include/linux/jbd2.h |  6 ++----
 3 files changed, 18 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 36345fefa3ff..ae4402d15d46 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -554,8 +554,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 		jbd2_journal_abort(journal, err);
 
 	blk_start_plug(&plug);
-	jbd2_journal_write_revoke_records(journal, commit_transaction,
-					  &log_bufs, WRITE_SYNC);
+	jbd2_journal_write_revoke_records(commit_transaction, &log_bufs);
 
 	jbd_debug(3, "JBD2: commit phase 2b\n");
 
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 705ae577882b..c839332be56b 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -122,11 +122,11 @@ struct jbd2_revoke_table_s
 
 
 #ifdef __KERNEL__
-static void write_one_revoke_record(journal_t *, transaction_t *,
+static void write_one_revoke_record(transaction_t *,
 				    struct list_head *,
 				    struct buffer_head **, int *,
-				    struct jbd2_revoke_record_s *, int);
-static void flush_descriptor(journal_t *, struct buffer_head *, int, int);
+				    struct jbd2_revoke_record_s *);
+static void flush_descriptor(journal_t *, struct buffer_head *, int);
 #endif
 
 /* Utility functions to maintain the revoke table */
@@ -519,11 +519,10 @@ void jbd2_journal_switch_revoke_table(journal_t *journal)
  * Write revoke records to the journal for all entries in the current
  * revoke hash, deleting the entries as we go.
  */
-void jbd2_journal_write_revoke_records(journal_t *journal,
-				       transaction_t *transaction,
-				       struct list_head *log_bufs,
-				       int write_op)
+void jbd2_journal_write_revoke_records(transaction_t *transaction,
+				       struct list_head *log_bufs)
 {
+	journal_t *journal = transaction->t_journal;
 	struct buffer_head *descriptor;
 	struct jbd2_revoke_record_s *record;
 	struct jbd2_revoke_table_s *revoke;
@@ -544,16 +543,15 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
 		while (!list_empty(hash_list)) {
 			record = (struct jbd2_revoke_record_s *)
 				hash_list->next;
-			write_one_revoke_record(journal, transaction, log_bufs,
-						&descriptor, &offset,
-						record, write_op);
+			write_one_revoke_record(transaction, log_bufs,
+						&descriptor, &offset, record);
 			count++;
 			list_del(&record->hash);
 			kmem_cache_free(jbd2_revoke_record_cache, record);
 		}
 	}
 	if (descriptor)
-		flush_descriptor(journal, descriptor, offset, write_op);
+		flush_descriptor(journal, descriptor, offset);
 	jbd_debug(1, "Wrote %d revoke records\n", count);
 }
 
@@ -562,14 +560,13 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
  * block if the old one is full or if we have not already created one.
  */
 
-static void write_one_revoke_record(journal_t *journal,
-				    transaction_t *transaction,
+static void write_one_revoke_record(transaction_t *transaction,
 				    struct list_head *log_bufs,
 				    struct buffer_head **descriptorp,
 				    int *offsetp,
-				    struct jbd2_revoke_record_s *record,
-				    int write_op)
+				    struct jbd2_revoke_record_s *record)
 {
+	journal_t *journal = transaction->t_journal;
 	int csum_size = 0;
 	struct buffer_head *descriptor;
 	int sz, offset;
@@ -597,7 +594,7 @@ static void write_one_revoke_record(journal_t *journal,
 	/* Make sure we have a descriptor with space left for the record */
 	if (descriptor) {
 		if (offset + sz > journal->j_blocksize - csum_size) {
-			flush_descriptor(journal, descriptor, offset, write_op);
+			flush_descriptor(journal, descriptor, offset);
 			descriptor = NULL;
 		}
 	}
@@ -654,7 +651,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
 
 static void flush_descriptor(journal_t *journal,
 			     struct buffer_head *descriptor,
-			     int offset, int write_op)
+			     int offset)
 {
 	jbd2_journal_revoke_header_t *header;
 
@@ -670,7 +667,7 @@ static void flush_descriptor(journal_t *journal,
 	set_buffer_jwrite(descriptor);
 	BUFFER_TRACE(descriptor, "write");
 	set_buffer_dirty(descriptor);
-	write_dirty_buffer(descriptor, write_op);
+	write_dirty_buffer(descriptor, WRITE_SYNC);
 }
 #endif
 
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 65407f6c9120..5716d9554e77 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1327,10 +1327,8 @@ extern int	   jbd2_journal_init_revoke_caches(void);
 extern void	   jbd2_journal_destroy_revoke(journal_t *);
 extern int	   jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *);
 extern int	   jbd2_journal_cancel_revoke(handle_t *, struct journal_head *);
-extern void	   jbd2_journal_write_revoke_records(journal_t *journal,
-						     transaction_t *transaction,
-						     struct list_head *log_bufs,
-						     int write_op);
+extern void	   jbd2_journal_write_revoke_records(transaction_t *transaction,
+						     struct list_head *log_bufs);
 
 /* Recovery revoke support */
 extern int	jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
-- 
cgit v1.2.3


From 32ab671599a89534f37e97d146c27690e371b661 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 22 Feb 2016 23:17:15 -0500
Subject: jbd2: factor out common descriptor block initialization

Descriptor block header is initialized in several places. Factor out the
common code into jbd2_journal_get_descriptor_buffer().

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/commit.c     | 16 +++++-----------
 fs/jbd2/journal.c    |  9 ++++++++-
 fs/jbd2/revoke.c     |  8 ++------
 include/linux/jbd2.h |  2 +-
 4 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index ae4402d15d46..cf221f3d955a 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -131,14 +131,12 @@ static int journal_submit_commit_record(journal_t *journal,
 	if (is_journal_aborted(journal))
 		return 0;
 
-	bh = jbd2_journal_get_descriptor_buffer(journal);
+	bh = jbd2_journal_get_descriptor_buffer(commit_transaction,
+						JBD2_COMMIT_BLOCK);
 	if (!bh)
 		return 1;
 
 	tmp = (struct commit_header *)bh->b_data;
-	tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
-	tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
-	tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
 	tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
 	tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
 
@@ -379,7 +377,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	ktime_t start_time;
 	u64 commit_time;
 	char *tagp = NULL;
-	journal_header_t *header;
 	journal_block_tag_t *tag = NULL;
 	int space_left = 0;
 	int first_tag = 0;
@@ -615,7 +612,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
 			jbd_debug(4, "JBD2: get descriptor\n");
 
-			descriptor = jbd2_journal_get_descriptor_buffer(journal);
+			descriptor = jbd2_journal_get_descriptor_buffer(
+							commit_transaction,
+							JBD2_DESCRIPTOR_BLOCK);
 			if (!descriptor) {
 				jbd2_journal_abort(journal, -EIO);
 				continue;
@@ -624,11 +623,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 			jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
 				(unsigned long long)descriptor->b_blocknr,
 				descriptor->b_data);
-			header = (journal_header_t *)descriptor->b_data;
-			header->h_magic     = cpu_to_be32(JBD2_MAGIC_NUMBER);
-			header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK);
-			header->h_sequence  = cpu_to_be32(commit_transaction->t_tid);
-
 			tagp = &descriptor->b_data[sizeof(journal_header_t)];
 			space_left = descriptor->b_size -
 						sizeof(journal_header_t);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 81e622681c82..28d05bd9a588 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -805,10 +805,13 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
  * But we don't bother doing that, so there will be coherency problems with
  * mmaps of blockdevs which hold live JBD-controlled filesystems.
  */
-struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
+struct buffer_head *
+jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type)
 {
+	journal_t *journal = transaction->t_journal;
 	struct buffer_head *bh;
 	unsigned long long blocknr;
+	journal_header_t *header;
 	int err;
 
 	err = jbd2_journal_next_log_block(journal, &blocknr);
@@ -821,6 +824,10 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
 		return NULL;
 	lock_buffer(bh);
 	memset(bh->b_data, 0, journal->j_blocksize);
+	header = (journal_header_t *)bh->b_data;
+	header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
+	header->h_blocktype = cpu_to_be32(type);
+	header->h_sequence = cpu_to_be32(transaction->t_tid);
 	set_buffer_uptodate(bh);
 	unlock_buffer(bh);
 	BUFFER_TRACE(bh, "return this buffer");
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index c839332be56b..d1ebb1d41d17 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -570,7 +570,6 @@ static void write_one_revoke_record(transaction_t *transaction,
 	int csum_size = 0;
 	struct buffer_head *descriptor;
 	int sz, offset;
-	journal_header_t *header;
 
 	/* If we are already aborting, this all becomes a noop.  We
            still need to go round the loop in
@@ -600,13 +599,10 @@ static void write_one_revoke_record(transaction_t *transaction,
 	}
 
 	if (!descriptor) {
-		descriptor = jbd2_journal_get_descriptor_buffer(journal);
+		descriptor = jbd2_journal_get_descriptor_buffer(transaction,
+							JBD2_REVOKE_BLOCK);
 		if (!descriptor)
 			return;
-		header = (journal_header_t *)descriptor->b_data;
-		header->h_magic     = cpu_to_be32(JBD2_MAGIC_NUMBER);
-		header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK);
-		header->h_sequence  = cpu_to_be32(transaction->t_tid);
 
 		/* Record it so that we can wait for IO completion later */
 		BUFFER_TRACE(descriptor, "file in log_bufs");
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 5716d9554e77..3649cb8d3a41 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1137,7 +1137,7 @@ static inline void jbd2_unfile_log_bh(struct buffer_head *bh)
 }
 
 /* Log buffer allocation */
-struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal);
+struct buffer_head *jbd2_journal_get_descriptor_buffer(transaction_t *, int);
 int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
 int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
 			      unsigned long *block);
-- 
cgit v1.2.3


From 1101cd4d13ba2f42c5da4c1b9081f35a73b5df31 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 22 Feb 2016 23:19:09 -0500
Subject: jbd2: unify revoke and tag block checksum handling

Revoke and tag descriptor blocks are just different kinds of descriptor
blocks and thus have checksum in the same place. Unify computation and
checking of checksums for these.

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/commit.c     | 18 +-----------------
 fs/jbd2/journal.c    | 15 +++++++++++++++
 fs/jbd2/recovery.c   | 31 +++++--------------------------
 fs/jbd2/revoke.c     | 19 ++-----------------
 include/linux/jbd2.h |  8 ++------
 5 files changed, 25 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index cf221f3d955a..ae832cd44dd8 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -317,22 +317,6 @@ static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
 		tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
 }
 
-static void jbd2_descr_block_csum_set(journal_t *j,
-				      struct buffer_head *bh)
-{
-	struct jbd2_journal_block_tail *tail;
-	__u32 csum;
-
-	if (!jbd2_journal_has_csum_v2or3(j))
-		return;
-
-	tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
-			sizeof(struct jbd2_journal_block_tail));
-	tail->t_checksum = 0;
-	csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
-	tail->t_checksum = cpu_to_be32(csum);
-}
-
 static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
 				    struct buffer_head *bh, __u32 sequence)
 {
@@ -714,7 +698,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
 			tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
 
-			jbd2_descr_block_csum_set(journal, descriptor);
+			jbd2_descriptor_block_csum_set(journal, descriptor);
 start_journal_io:
 			for (i = 0; i < bufs; i++) {
 				struct buffer_head *bh = wbuf[i];
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 28d05bd9a588..defa962a8e15 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -834,6 +834,21 @@ jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type)
 	return bh;
 }
 
+void jbd2_descriptor_block_csum_set(journal_t *j, struct buffer_head *bh)
+{
+	struct jbd2_journal_block_tail *tail;
+	__u32 csum;
+
+	if (!jbd2_journal_has_csum_v2or3(j))
+		return;
+
+	tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
+			sizeof(struct jbd2_journal_block_tail));
+	tail->t_checksum = 0;
+	csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
+	tail->t_checksum = cpu_to_be32(csum);
+}
+
 /*
  * Return tid of the oldest transaction in the journal and block in the journal
  * where the transaction starts.
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 7f277e49fe88..08a456b96e4e 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -174,8 +174,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
 	return 0;
 }
 
-static int jbd2_descr_block_csum_verify(journal_t *j,
-					void *buf)
+static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
 {
 	struct jbd2_journal_block_tail *tail;
 	__be32 provided;
@@ -522,8 +521,8 @@ static int do_one_pass(journal_t *journal,
 				descr_csum_size =
 					sizeof(struct jbd2_journal_block_tail);
 			if (descr_csum_size > 0 &&
-			    !jbd2_descr_block_csum_verify(journal,
-							  bh->b_data)) {
+			    !jbd2_descriptor_block_csum_verify(journal,
+							       bh->b_data)) {
 				printk(KERN_ERR "JBD2: Invalid checksum "
 				       "recovering block %lu in log\n",
 				       next_log_block);
@@ -811,26 +810,6 @@ static int do_one_pass(journal_t *journal,
 	return err;
 }
 
-static int jbd2_revoke_block_csum_verify(journal_t *j,
-					 void *buf)
-{
-	struct jbd2_journal_revoke_tail *tail;
-	__be32 provided;
-	__u32 calculated;
-
-	if (!jbd2_journal_has_csum_v2or3(j))
-		return 1;
-
-	tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
-			sizeof(struct jbd2_journal_revoke_tail));
-	provided = tail->r_checksum;
-	tail->r_checksum = 0;
-	calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
-	tail->r_checksum = provided;
-
-	return provided == cpu_to_be32(calculated);
-}
-
 /* Scan a revoke record, marking all blocks mentioned as revoked. */
 
 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
@@ -846,11 +825,11 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 	offset = sizeof(jbd2_journal_revoke_header_t);
 	rcount = be32_to_cpu(header->r_count);
 
-	if (!jbd2_revoke_block_csum_verify(journal, header))
+	if (!jbd2_descriptor_block_csum_verify(journal, header))
 		return -EFSBADCRC;
 
 	if (jbd2_journal_has_csum_v2or3(journal))
-		csum_size = sizeof(struct jbd2_journal_revoke_tail);
+		csum_size = sizeof(struct jbd2_journal_block_tail);
 	if (rcount > journal->j_blocksize - csum_size)
 		return -EINVAL;
 	max = rcount;
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index d1ebb1d41d17..91171dc352cb 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -583,7 +583,7 @@ static void write_one_revoke_record(transaction_t *transaction,
 
 	/* Do we need to leave space at the end for a checksum? */
 	if (jbd2_journal_has_csum_v2or3(journal))
-		csum_size = sizeof(struct jbd2_journal_revoke_tail);
+		csum_size = sizeof(struct jbd2_journal_block_tail);
 
 	if (jbd2_has_feature_64bit(journal))
 		sz = 8;
@@ -623,21 +623,6 @@ static void write_one_revoke_record(transaction_t *transaction,
 	*offsetp = offset;
 }
 
-static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
-{
-	struct jbd2_journal_revoke_tail *tail;
-	__u32 csum;
-
-	if (!jbd2_journal_has_csum_v2or3(j))
-		return;
-
-	tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize -
-			sizeof(struct jbd2_journal_revoke_tail));
-	tail->r_checksum = 0;
-	csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
-	tail->r_checksum = cpu_to_be32(csum);
-}
-
 /*
  * Flush a revoke descriptor out to the journal.  If we are aborting,
  * this is a noop; otherwise we are generating a buffer which needs to
@@ -658,7 +643,7 @@ static void flush_descriptor(journal_t *journal,
 
 	header = (jbd2_journal_revoke_header_t *)descriptor->b_data;
 	header->r_count = cpu_to_be32(offset);
-	jbd2_revoke_csum_set(journal, descriptor);
+	jbd2_descriptor_block_csum_set(journal, descriptor);
 
 	set_buffer_jwrite(descriptor);
 	BUFFER_TRACE(descriptor, "write");
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 3649cb8d3a41..fd1083c46c61 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -200,7 +200,7 @@ typedef struct journal_block_tag_s
 	__be32		t_blocknr_high; /* most-significant high 32bits. */
 } journal_block_tag_t;
 
-/* Tail of descriptor block, for checksumming */
+/* Tail of descriptor or revoke block, for checksumming */
 struct jbd2_journal_block_tail {
 	__be32		t_checksum;	/* crc32c(uuid+descr_block) */
 };
@@ -215,11 +215,6 @@ typedef struct jbd2_journal_revoke_header_s
 	__be32		 r_count;	/* Count of bytes used in the block */
 } jbd2_journal_revoke_header_t;
 
-/* Tail of revoke block, for checksumming */
-struct jbd2_journal_revoke_tail {
-	__be32		r_checksum;	/* crc32c(uuid+revoke_block) */
-};
-
 /* Definitions for the journal tag flags word: */
 #define JBD2_FLAG_ESCAPE		1	/* on-disk block is escaped */
 #define JBD2_FLAG_SAME_UUID	2	/* block has same uuid as previous */
@@ -1138,6 +1133,7 @@ static inline void jbd2_unfile_log_bh(struct buffer_head *bh)
 
 /* Log buffer allocation */
 struct buffer_head *jbd2_journal_get_descriptor_buffer(transaction_t *, int);
+void jbd2_descriptor_block_csum_set(journal_t *, struct buffer_head *);
 int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
 int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
 			      unsigned long *block);
-- 
cgit v1.2.3


From dfc08a12e49a64f97d8b474da1d7745230cec5eb Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Sun, 14 Feb 2016 18:50:40 +0800
Subject: f2fs: introduce f2fs_journal struct to wrap journal info

Introduce a new structure f2fs_journal to wrap journal info in struct
f2fs_summary_block for readability.

struct f2fs_journal {
	union {
		__le16 n_nats;
		__le16 n_sits;
	};
	union {
		struct nat_journal nat_j;
		struct sit_journal sit_j;
		struct f2fs_extra_info info;
	};
} __packed;

struct f2fs_summary_block {
	struct f2fs_summary entries[ENTRIES_IN_SUM];
	struct f2fs_journal journal;
	struct summary_footer footer;
} __packed;

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c    |  2 +-
 fs/f2fs/f2fs.h          | 39 +++++++++++++++++------------------
 fs/f2fs/node.c          | 42 ++++++++++++++++++++------------------
 fs/f2fs/segment.c       | 54 ++++++++++++++++++++++++-------------------------
 fs/f2fs/super.c         |  2 +-
 include/linux/f2fs_fs.h | 10 ++++++---
 6 files changed, 77 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 536bec99bd64..3cdcdaf1d0fe 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1051,7 +1051,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	if (sb->s_bdev->bd_part)
 		kbytes_written += BD_PART_WRITTEN(sbi);
 
-	seg_i->sum_blk->info.kbytes_written = cpu_to_le64(kbytes_written);
+	seg_i->sum_blk->journal.info.kbytes_written = cpu_to_le64(kbytes_written);
 
 	if (__remain_node_summaries(cpc->reason)) {
 		write_node_summaries(sbi, start_blk);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f9c294eee0f1..fc302ea9b1fb 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -183,37 +183,37 @@ struct fsync_inode_entry {
 	block_t last_inode;	/* block address locating the last inode */
 };
 
-#define nats_in_cursum(sum)		(le16_to_cpu(sum->n_nats))
-#define sits_in_cursum(sum)		(le16_to_cpu(sum->n_sits))
+#define nats_in_cursum(jnl)		(le16_to_cpu(jnl->n_nats))
+#define sits_in_cursum(jnl)		(le16_to_cpu(jnl->n_sits))
 
-#define nat_in_journal(sum, i)		(sum->nat_j.entries[i].ne)
-#define nid_in_journal(sum, i)		(sum->nat_j.entries[i].nid)
-#define sit_in_journal(sum, i)		(sum->sit_j.entries[i].se)
-#define segno_in_journal(sum, i)	(sum->sit_j.entries[i].segno)
+#define nat_in_journal(jnl, i)		(jnl->nat_j.entries[i].ne)
+#define nid_in_journal(jnl, i)		(jnl->nat_j.entries[i].nid)
+#define sit_in_journal(jnl, i)		(jnl->sit_j.entries[i].se)
+#define segno_in_journal(jnl, i)	(jnl->sit_j.entries[i].segno)
 
-#define MAX_NAT_JENTRIES(sum)	(NAT_JOURNAL_ENTRIES - nats_in_cursum(sum))
-#define MAX_SIT_JENTRIES(sum)	(SIT_JOURNAL_ENTRIES - sits_in_cursum(sum))
+#define MAX_NAT_JENTRIES(jnl)	(NAT_JOURNAL_ENTRIES - nats_in_cursum(jnl))
+#define MAX_SIT_JENTRIES(jnl)	(SIT_JOURNAL_ENTRIES - sits_in_cursum(jnl))
 
-static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i)
+static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i)
 {
-	int before = nats_in_cursum(rs);
-	rs->n_nats = cpu_to_le16(before + i);
+	int before = nats_in_cursum(journal);
+	journal->n_nats = cpu_to_le16(before + i);
 	return before;
 }
 
-static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
+static inline int update_sits_in_cursum(struct f2fs_journal *journal, int i)
 {
-	int before = sits_in_cursum(rs);
-	rs->n_sits = cpu_to_le16(before + i);
+	int before = sits_in_cursum(journal);
+	journal->n_sits = cpu_to_le16(before + i);
 	return before;
 }
 
-static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
-								int type)
+static inline bool __has_cursum_space(struct f2fs_journal *journal,
+							int size, int type)
 {
 	if (type == NAT_JOURNAL)
-		return size <= MAX_NAT_JENTRIES(sum);
-	return size <= MAX_SIT_JENTRIES(sum);
+		return size <= MAX_NAT_JENTRIES(journal);
+	return size <= MAX_SIT_JENTRIES(journal);
 }
 
 /*
@@ -1862,8 +1862,7 @@ void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
 void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *, block_t);
 void write_data_summaries(struct f2fs_sb_info *, block_t);
 void write_node_summaries(struct f2fs_sb_info *, block_t);
-int lookup_journal_in_cursum(struct f2fs_summary_block *,
-					int, unsigned int, int);
+int lookup_journal_in_cursum(struct f2fs_journal *, int, unsigned int, int);
 void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *);
 int build_segment_manager(struct f2fs_sb_info *);
 void destroy_segment_manager(struct f2fs_sb_info *);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 150907ffa7aa..8230e35fc979 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -354,7 +354,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
-	struct f2fs_summary_block *sum = curseg->sum_blk;
+	struct f2fs_journal *journal = &curseg->sum_blk->journal;
 	nid_t start_nid = START_NID(nid);
 	struct f2fs_nat_block *nat_blk;
 	struct page *page = NULL;
@@ -382,9 +382,9 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
 
 	/* Check current segment summary */
 	mutex_lock(&curseg->curseg_mutex);
-	i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
+	i = lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
 	if (i >= 0) {
-		ne = nat_in_journal(sum, i);
+		ne = nat_in_journal(journal, i);
 		node_info_from_raw_nat(ni, &ne);
 	}
 	mutex_unlock(&curseg->curseg_mutex);
@@ -1613,7 +1613,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
-	struct f2fs_summary_block *sum = curseg->sum_blk;
+	struct f2fs_journal *journal = &curseg->sum_blk->journal;
 	int i = 0;
 	nid_t nid = nm_i->next_scan_nid;
 
@@ -1646,9 +1646,11 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
 
 	/* find free nids from current sum_pages */
 	mutex_lock(&curseg->curseg_mutex);
-	for (i = 0; i < nats_in_cursum(sum); i++) {
-		block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
-		nid = le32_to_cpu(nid_in_journal(sum, i));
+	for (i = 0; i < nats_in_cursum(journal); i++) {
+		block_t addr;
+
+		addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
+		nid = le32_to_cpu(nid_in_journal(journal, i));
 		if (addr == NULL_ADDR)
 			add_free_nid(sbi, nid, true);
 		else
@@ -1918,16 +1920,16 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
-	struct f2fs_summary_block *sum = curseg->sum_blk;
+	struct f2fs_journal *journal = &curseg->sum_blk->journal;
 	int i;
 
 	mutex_lock(&curseg->curseg_mutex);
-	for (i = 0; i < nats_in_cursum(sum); i++) {
+	for (i = 0; i < nats_in_cursum(journal); i++) {
 		struct nat_entry *ne;
 		struct f2fs_nat_entry raw_ne;
-		nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
+		nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
 
-		raw_ne = nat_in_journal(sum, i);
+		raw_ne = nat_in_journal(journal, i);
 
 		ne = __lookup_nat_cache(nm_i, nid);
 		if (!ne) {
@@ -1936,7 +1938,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 		}
 		__set_nat_cache_dirty(nm_i, ne);
 	}
-	update_nats_in_cursum(sum, -i);
+	update_nats_in_cursum(journal, -i);
 	mutex_unlock(&curseg->curseg_mutex);
 }
 
@@ -1962,7 +1964,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 					struct nat_entry_set *set)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
-	struct f2fs_summary_block *sum = curseg->sum_blk;
+	struct f2fs_journal *journal = &curseg->sum_blk->journal;
 	nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
 	bool to_journal = true;
 	struct f2fs_nat_block *nat_blk;
@@ -1974,7 +1976,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 	 * #1, flush nat entries to journal in current hot data summary block.
 	 * #2, flush nat entries to nat page.
 	 */
-	if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL))
+	if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
 		to_journal = false;
 
 	if (to_journal) {
@@ -1995,11 +1997,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 			continue;
 
 		if (to_journal) {
-			offset = lookup_journal_in_cursum(sum,
+			offset = lookup_journal_in_cursum(journal,
 							NAT_JOURNAL, nid, 1);
 			f2fs_bug_on(sbi, offset < 0);
-			raw_ne = &nat_in_journal(sum, offset);
-			nid_in_journal(sum, offset) = cpu_to_le32(nid);
+			raw_ne = &nat_in_journal(journal, offset);
+			nid_in_journal(journal, offset) = cpu_to_le32(nid);
 		} else {
 			raw_ne = &nat_blk->entries[nid - start_nid];
 		}
@@ -2028,7 +2030,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
-	struct f2fs_summary_block *sum = curseg->sum_blk;
+	struct f2fs_journal *journal = &curseg->sum_blk->journal;
 	struct nat_entry_set *setvec[SETVEC_SIZE];
 	struct nat_entry_set *set, *tmp;
 	unsigned int found;
@@ -2045,7 +2047,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 	 * entries, remove all entries from journal and merge them
 	 * into nat entry set.
 	 */
-	if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+	if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
 		remove_nats_in_journal(sbi);
 
 	while ((found = __gang_lookup_nat_set(nm_i,
@@ -2054,7 +2056,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 		set_idx = setvec[found - 1]->set + 1;
 		for (idx = 0; idx < found; idx++)
 			__adjust_nat_entry_set(setvec[idx], &sets,
-							MAX_NAT_JENTRIES(sum));
+						MAX_NAT_JENTRIES(journal));
 	}
 
 	/* flush dirty nats in nat entry set */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index ad5da895260a..5902a67c5a1c 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1539,11 +1539,11 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi)
 
 	/* Step 1: restore nat cache */
 	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
-	memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
+	memcpy(&seg_i->sum_blk->journal.n_nats, kaddr, SUM_JOURNAL_SIZE);
 
 	/* Step 2: restore sit cache */
 	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
-	memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
+	memcpy(&seg_i->sum_blk->journal.n_sits, kaddr + SUM_JOURNAL_SIZE,
 						SUM_JOURNAL_SIZE);
 	offset = 2 * SUM_JOURNAL_SIZE;
 
@@ -1695,12 +1695,12 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
 
 	/* Step 1: write nat cache */
 	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
-	memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
+	memcpy(kaddr, &seg_i->sum_blk->journal.n_nats, SUM_JOURNAL_SIZE);
 	written_size += SUM_JOURNAL_SIZE;
 
 	/* Step 2: write sit cache */
 	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
-	memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
+	memcpy(kaddr + written_size, &seg_i->sum_blk->journal.n_sits,
 						SUM_JOURNAL_SIZE);
 	written_size += SUM_JOURNAL_SIZE;
 
@@ -1768,24 +1768,24 @@ void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
 	write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
 }
 
-int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
+int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
 					unsigned int val, int alloc)
 {
 	int i;
 
 	if (type == NAT_JOURNAL) {
-		for (i = 0; i < nats_in_cursum(sum); i++) {
-			if (le32_to_cpu(nid_in_journal(sum, i)) == val)
+		for (i = 0; i < nats_in_cursum(journal); i++) {
+			if (le32_to_cpu(nid_in_journal(journal, i)) == val)
 				return i;
 		}
-		if (alloc && __has_cursum_space(sum, 1, NAT_JOURNAL))
-			return update_nats_in_cursum(sum, 1);
+		if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
+			return update_nats_in_cursum(journal, 1);
 	} else if (type == SIT_JOURNAL) {
-		for (i = 0; i < sits_in_cursum(sum); i++)
-			if (le32_to_cpu(segno_in_journal(sum, i)) == val)
+		for (i = 0; i < sits_in_cursum(journal); i++)
+			if (le32_to_cpu(segno_in_journal(journal, i)) == val)
 				return i;
-		if (alloc && __has_cursum_space(sum, 1, SIT_JOURNAL))
-			return update_sits_in_cursum(sum, 1);
+		if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
+			return update_sits_in_cursum(journal, 1);
 	}
 	return -1;
 }
@@ -1889,20 +1889,20 @@ static void add_sits_in_set(struct f2fs_sb_info *sbi)
 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
-	struct f2fs_summary_block *sum = curseg->sum_blk;
+	struct f2fs_journal *journal = &curseg->sum_blk->journal;
 	int i;
 
-	for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
+	for (i = 0; i < sits_in_cursum(journal); i++) {
 		unsigned int segno;
 		bool dirtied;
 
-		segno = le32_to_cpu(segno_in_journal(sum, i));
+		segno = le32_to_cpu(segno_in_journal(journal, i));
 		dirtied = __mark_sit_entry_dirty(sbi, segno);
 
 		if (!dirtied)
 			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
 	}
-	update_sits_in_cursum(sum, -sits_in_cursum(sum));
+	update_sits_in_cursum(journal, -i);
 }
 
 /*
@@ -1914,7 +1914,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	struct sit_info *sit_i = SIT_I(sbi);
 	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
-	struct f2fs_summary_block *sum = curseg->sum_blk;
+	struct f2fs_journal *journal = &curseg->sum_blk->journal;
 	struct sit_entry_set *ses, *tmp;
 	struct list_head *head = &SM_I(sbi)->sit_entry_set;
 	bool to_journal = true;
@@ -1937,7 +1937,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	 * entries, remove all entries from journal and add and account
 	 * them in sit entry set.
 	 */
-	if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
+	if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
 		remove_sits_in_journal(sbi);
 
 	/*
@@ -1954,7 +1954,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		unsigned int segno = start_segno;
 
 		if (to_journal &&
-			!__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
+			!__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
 			to_journal = false;
 
 		if (!to_journal) {
@@ -1975,13 +1975,13 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 			}
 
 			if (to_journal) {
-				offset = lookup_journal_in_cursum(sum,
+				offset = lookup_journal_in_cursum(journal,
 							SIT_JOURNAL, segno, 1);
 				f2fs_bug_on(sbi, offset < 0);
-				segno_in_journal(sum, offset) =
+				segno_in_journal(journal, offset) =
 							cpu_to_le32(segno);
 				seg_info_to_raw_sit(se,
-						&sit_in_journal(sum, offset));
+					&sit_in_journal(journal, offset));
 			} else {
 				sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
 				seg_info_to_raw_sit(se,
@@ -2150,7 +2150,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
-	struct f2fs_summary_block *sum = curseg->sum_blk;
+	struct f2fs_journal *journal = &curseg->sum_blk->journal;
 	int sit_blk_cnt = SIT_BLK_CNT(sbi);
 	unsigned int i, start, end;
 	unsigned int readed, start_blk = 0;
@@ -2169,10 +2169,10 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
 			struct page *page;
 
 			mutex_lock(&curseg->curseg_mutex);
-			for (i = 0; i < sits_in_cursum(sum); i++) {
-				if (le32_to_cpu(segno_in_journal(sum, i))
+			for (i = 0; i < sits_in_cursum(journal); i++) {
+				if (le32_to_cpu(segno_in_journal(journal, i))
 								== start) {
-					sit = sit_in_journal(sum, i);
+					sit = sit_in_journal(journal, i);
 					mutex_unlock(&curseg->curseg_mutex);
 					goto got_it;
 				}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fa4bbe0eab7a..ecee9ef74266 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1414,7 +1414,7 @@ try_onemore:
 	seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
 	if (__exist_node_summaries(sbi))
 		sbi->kbytes_written =
-			le64_to_cpu(seg_i->sum_blk->info.kbytes_written);
+			le64_to_cpu(seg_i->sum_blk->journal.info.kbytes_written);
 
 	build_gc_manager(sbi);
 
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index f43e6a01a023..9eb215a155e0 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -398,9 +398,7 @@ struct f2fs_extra_info {
 	__u8 reserved[EXTRA_INFO_RESERVED];
 } __packed;
 
-/* 4KB-sized summary block structure */
-struct f2fs_summary_block {
-	struct f2fs_summary entries[ENTRIES_IN_SUM];
+struct f2fs_journal {
 	union {
 		__le16 n_nats;
 		__le16 n_sits;
@@ -411,6 +409,12 @@ struct f2fs_summary_block {
 		struct sit_journal sit_j;
 		struct f2fs_extra_info info;
 	};
+} __packed;
+
+/* 4KB-sized summary block structure */
+struct f2fs_summary_block {
+	struct f2fs_summary entries[ENTRIES_IN_SUM];
+	struct f2fs_journal journal;
 	struct summary_footer footer;
 } __packed;
 
-- 
cgit v1.2.3


From 0cf3292cde22f8843ae5d1eeb8466d8121243c1a Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Mon, 15 Feb 2016 16:32:09 +0530
Subject: gpio: Add devm_ apis for gpiochip_add_data and gpiochip_remove

Add device managed APIs devm_gpiochip_add_data() and
devm_gpiochip_remove() for the APIs gpiochip_add_data()
and gpiochip_remove().

This helps in reducing code in error path and sometimes
removal of .remove callback for driver unbind.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
---
 drivers/gpio/gpiolib.c      | 74 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/gpio/driver.h |  4 +++
 2 files changed, 78 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 872774a404f1..e31d0a1e6f7c 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -723,6 +723,80 @@ void gpiochip_remove(struct gpio_chip *chip)
 }
 EXPORT_SYMBOL_GPL(gpiochip_remove);
 
+static void devm_gpio_chip_release(struct device *dev, void *res)
+{
+	struct gpio_chip *chip = *(struct gpio_chip **)res;
+
+	gpiochip_remove(chip);
+}
+
+static int devm_gpio_chip_match(struct device *dev, void *res, void *data)
+
+{
+	struct gpio_chip **r = res;
+
+	if (!r || !*r) {
+		WARN_ON(!r || !*r);
+		return 0;
+	}
+
+	return *r == data;
+}
+
+/**
+ * devm_gpiochip_add_data() - Resource manager piochip_add_data()
+ * @dev: the device pointer on which irq_chip belongs to.
+ * @chip: the chip to register, with chip->base initialized
+ * Context: potentially before irqs will work
+ *
+ * Returns a negative errno if the chip can't be registered, such as
+ * because the chip->base is invalid or already associated with a
+ * different chip.  Otherwise it returns zero as a success code.
+ *
+ * The gpio chip automatically be released when the device is unbound.
+ */
+int devm_gpiochip_add_data(struct device *dev, struct gpio_chip *chip,
+			   void *data)
+{
+	struct gpio_chip **ptr;
+	int ret;
+
+	ptr = devres_alloc(devm_gpio_chip_release, sizeof(*ptr),
+			     GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = gpiochip_add_data(chip, data);
+	if (ret < 0) {
+		devres_free(ptr);
+		return ret;
+	}
+
+	*ptr = chip;
+	devres_add(dev, ptr);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devm_gpiochip_add_data);
+
+/**
+ * devm_gpiochip_remove() - Resource manager of gpiochip_remove()
+ * @dev: device for which which resource was allocated
+ * @chip: the chip to remove
+ *
+ * A gpio_chip with any GPIOs still requested may not be removed.
+ */
+void devm_gpiochip_remove(struct device *dev, struct gpio_chip *chip)
+{
+	int ret;
+
+	ret = devres_release(dev, devm_gpio_chip_release,
+			     devm_gpio_chip_match, chip);
+	if (!ret)
+		WARN_ON(ret);
+}
+EXPORT_SYMBOL_GPL(devm_gpiochip_remove);
+
 /**
  * gpiochip_find() - iterator for locating a specific gpio_chip
  * @data: data to pass to match function
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 639607658ed8..bee976f82788 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -192,6 +192,10 @@ static inline int gpiochip_add(struct gpio_chip *chip)
 	return gpiochip_add_data(chip, NULL);
 }
 extern void gpiochip_remove(struct gpio_chip *chip);
+extern int devm_gpiochip_add_data(struct device *dev, struct gpio_chip *chip,
+				  void *data);
+extern void devm_gpiochip_remove(struct device *dev, struct gpio_chip *chip);
+
 extern struct gpio_chip *gpiochip_find(void *data,
 			      int (*match)(struct gpio_chip *chip, void *data));
 
-- 
cgit v1.2.3


From b38e42e962dbc2fbc3839ce70750881db7c9277e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 23 Feb 2016 10:00:50 -0500
Subject: cgroup: convert cgroup_subsys flag fields to bool bitfields

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/cgroup-defs.h | 6 +++---
 kernel/cpuset.c             | 2 +-
 kernel/sched/core.c         | 2 +-
 kernel/sched/cpuacct.c      | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 0abf6aa86c81..8fc3f0452289 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -444,7 +444,7 @@ struct cgroup_subsys {
 	void (*free)(struct task_struct *task);
 	void (*bind)(struct cgroup_subsys_state *root_css);
 
-	int early_init;
+	bool early_init:1;
 
 	/*
 	 * If %false, this subsystem is properly hierarchical -
@@ -458,8 +458,8 @@ struct cgroup_subsys {
 	 * cases.  Eventually, all subsystems will be made properly
 	 * hierarchical and this will go away.
 	 */
-	bool broken_hierarchy;
-	bool warned_broken_hierarchy;
+	bool broken_hierarchy:1;
+	bool warned_broken_hierarchy:1;
 
 	/* the following two fields are initialized automtically during boot */
 	int id;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 41989ab4db57..90899837ea78 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2089,7 +2089,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
 	.attach		= cpuset_attach,
 	.bind		= cpuset_bind,
 	.legacy_cftypes	= files,
-	.early_init	= 1,
+	.early_init	= true,
 };
 
 /**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 44253adb3c36..0f5abc6e4ff3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8706,7 +8706,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
 	.can_attach	= cpu_cgroup_can_attach,
 	.attach		= cpu_cgroup_attach,
 	.legacy_cftypes	= cpu_files,
-	.early_init	= 1,
+	.early_init	= true,
 };
 
 #endif	/* CONFIG_CGROUP_SCHED */
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index dd7cbb55bbf2..2ddaebf7469a 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -279,5 +279,5 @@ struct cgroup_subsys cpuacct_cgrp_subsys = {
 	.css_alloc	= cpuacct_css_alloc,
 	.css_free	= cpuacct_css_free,
 	.legacy_cftypes	= files,
-	.early_init	= 1,
+	.early_init	= true,
 };
-- 
cgit v1.2.3


From d6dde63e90cf0d34ae61e885a5eb254c59eb3358 Mon Sep 17 00:00:00 2001
From: Andreas Irestål <andreas.irestal@axis.com>
Date: Tue, 16 Feb 2016 13:56:42 +0100
Subject: ASoC: adau17x1: Correct typos in file headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Andreas Irestål <andire@axis.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/platform_data/adau17x1.h | 2 +-
 sound/soc/codecs/adau1761-i2c.c        | 2 +-
 sound/soc/codecs/adau1761-spi.c        | 2 +-
 sound/soc/codecs/adau1761.c            | 2 +-
 sound/soc/codecs/adau1781.c            | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/adau17x1.h b/include/linux/platform_data/adau17x1.h
index a81766cae230..9db1b905df24 100644
--- a/include/linux/platform_data/adau17x1.h
+++ b/include/linux/platform_data/adau17x1.h
@@ -1,5 +1,5 @@
 /*
- * Driver for ADAU1761/ADAU1461/ADAU1761/ADAU1961/ADAU1781/ADAU1781 codecs
+ * Driver for ADAU1361/ADAU1461/ADAU1761/ADAU1961/ADAU1381/ADAU1781 codecs
  *
  * Copyright 2011-2014 Analog Devices Inc.
  * Author: Lars-Peter Clausen <lars@metafoo.de>
diff --git a/sound/soc/codecs/adau1761-i2c.c b/sound/soc/codecs/adau1761-i2c.c
index 348ccb17d3cc..a1b12671c307 100644
--- a/sound/soc/codecs/adau1761-i2c.c
+++ b/sound/soc/codecs/adau1761-i2c.c
@@ -1,5 +1,5 @@
 /*
- * Driver for ADAU1761/ADAU1461/ADAU1761/ADAU1961 codec
+ * Driver for ADAU1361/ADAU1461/ADAU1761/ADAU1961 codec
  *
  * Copyright 2014 Analog Devices Inc.
  *  Author: Lars-Peter Clausen <lars@metafoo.de>
diff --git a/sound/soc/codecs/adau1761-spi.c b/sound/soc/codecs/adau1761-spi.c
index 8bc1fbd25fcc..188334928c23 100644
--- a/sound/soc/codecs/adau1761-spi.c
+++ b/sound/soc/codecs/adau1761-spi.c
@@ -1,5 +1,5 @@
 /*
- * Driver for ADAU1761/ADAU1461/ADAU1761/ADAU1961 codec
+ * Driver for ADAU1361/ADAU1461/ADAU1761/ADAU1961 codec
  *
  * Copyright 2014 Analog Devices Inc.
  *  Author: Lars-Peter Clausen <lars@metafoo.de>
diff --git a/sound/soc/codecs/adau1761.c b/sound/soc/codecs/adau1761.c
index e7136b1956a3..b95d29dbd13d 100644
--- a/sound/soc/codecs/adau1761.c
+++ b/sound/soc/codecs/adau1761.c
@@ -1,5 +1,5 @@
 /*
- * Driver for ADAU1761/ADAU1461/ADAU1761/ADAU1961 codec
+ * Driver for ADAU1361/ADAU1461/ADAU1761/ADAU1961 codec
  *
  * Copyright 2011-2013 Analog Devices Inc.
  * Author: Lars-Peter Clausen <lars@metafoo.de>
diff --git a/sound/soc/codecs/adau1781.c b/sound/soc/codecs/adau1781.c
index fde9068550a6..bc1bb56dae63 100644
--- a/sound/soc/codecs/adau1781.c
+++ b/sound/soc/codecs/adau1781.c
@@ -1,5 +1,5 @@
 /*
- * Driver for ADAU1781/ADAU1781 codec
+ * Driver for ADAU1381/ADAU1781 codec
  *
  * Copyright 2011-2013 Analog Devices Inc.
  * Author: Lars-Peter Clausen <lars@metafoo.de>
-- 
cgit v1.2.3


From 506bcfa8abebdbcebdc17b03e96e38dc0b8ce765 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 13 Dec 2015 15:41:05 +0200
Subject: mac80211: limit the A-MSDU Tx based on peer's capabilities

In VHT, the specification allows to limit the number of
MSDUs in an A-MSDU in the Extended Capabilities IE. There
is also a limitation on the byte size in the VHT IE.
In HT, the only limitation is on the byte size.
Parse the capabilities from the peer and make them
available to the driver.

In HT, there is another limitation when a BA agreement
is active: the byte size can't be greater than 4095.
This is not enforced here.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 19 +++++++++++++++++++
 include/net/mac80211.h    | 14 ++++++++++++++
 net/mac80211/cfg.c        | 29 +++++++++++++++++++++++++++++
 net/mac80211/ht.c         |  5 +++++
 net/mac80211/vht.c        | 17 +++++++++++++++++
 5 files changed, 84 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d9ddb89533a7..3b1f6cef9513 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -163,6 +163,14 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2)
 /* 30 byte 4 addr hdr, 2 byte QoS, 2304 byte MSDU, 12 byte crypt, 4 byte FCS */
 #define IEEE80211_MAX_FRAME_LEN		2352
 
+/* Maximal size of an A-MSDU */
+#define IEEE80211_MAX_MPDU_LEN_HT_3839		3839
+#define IEEE80211_MAX_MPDU_LEN_HT_7935		7935
+
+#define IEEE80211_MAX_MPDU_LEN_VHT_3895		3895
+#define IEEE80211_MAX_MPDU_LEN_VHT_7991		7991
+#define IEEE80211_MAX_MPDU_LEN_VHT_11454	11454
+
 #define IEEE80211_MAX_SSID_LEN		32
 
 #define IEEE80211_MAX_MESH_ID_LEN	32
@@ -1505,6 +1513,7 @@ struct ieee80211_vht_operation {
 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895			0x00000000
 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991			0x00000001
 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454			0x00000002
+#define IEEE80211_VHT_CAP_MAX_MPDU_MASK				0x00000003
 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ		0x00000004
 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ	0x00000008
 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK			0x0000000C
@@ -2086,6 +2095,16 @@ enum ieee80211_tdls_actioncode {
 #define WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED	BIT(5)
 #define WLAN_EXT_CAPA8_OPMODE_NOTIF	BIT(6)
 
+/* Defines the maximal number of MSDUs in an A-MSDU. */
+#define WLAN_EXT_CAPA8_MAX_MSDU_IN_AMSDU_LSB	BIT(7)
+#define WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB	BIT(0)
+
+/*
+ * Fine Timing Measurement Initiator - bit 71 of @WLAN_EID_EXT_CAPABILITY
+ * information element
+ */
+#define WLAN_EXT_CAPA9_FTM_INITIATOR	BIT(7)
+
 /* TDLS specific payload type in the LLC/SNAP header */
 #define WLAN_TDLS_SNAP_RFTYPE	0x2
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 5910085af9e6..df5698ed8052 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1714,6 +1714,18 @@ struct ieee80211_sta_rates {
  * @tdls_initiator: indicates the STA is an initiator of the TDLS link. Only
  *	valid if the STA is a TDLS peer in the first place.
  * @mfp: indicates whether the STA uses management frame protection or not.
+ * @max_amsdu_subframes: indicates the maximal number of MSDUs in a single
+ *	A-MSDU. Taken from the Extended Capabilities element. 0 means
+ *	unlimited.
+ * @max_amsdu_len: indicates the maximal length of an A-MSDU in bytes. This
+ *	field is always valid for packets with a VHT preamble. For packets
+ *	with a HT preamble, additional limits apply:
+ *		+ If the skb is transmitted as part of a BA agreement, the
+ *		  A-MSDU maximal size is min(max_amsdu_len, 4065) bytes.
+ *		+ If the skb is not part of a BA aggreement, the A-MSDU maximal
+ *		  size is min(max_amsdu_len, 7935) bytes.
+ *	Both additional HT limits must be enforced by the low level driver.
+ *	This is defined by the spec (IEEE 802.11-2012 section 8.3.2.2 NOTE 2).
  * @txq: per-TID data TX queues (if driver uses the TXQ abstraction)
  */
 struct ieee80211_sta {
@@ -1732,6 +1744,8 @@ struct ieee80211_sta {
 	bool tdls;
 	bool tdls_initiator;
 	bool mfp;
+	u8 max_amsdu_subframes;
+	u16 max_amsdu_len;
 
 	struct ieee80211_txq *txq[IEEE80211_NUM_TIDS];
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 166a29fe6c35..66d22de93c8d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1131,6 +1131,34 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 		sta->sta.max_sp = params->max_sp;
 	}
 
+	/* The sender might not have sent the last bit, consider it to be 0 */
+	if (params->ext_capab_len >= 8) {
+		u8 val = (params->ext_capab[7] &
+			  WLAN_EXT_CAPA8_MAX_MSDU_IN_AMSDU_LSB) >> 7;
+
+		/* we did get all the bits, take the MSB as well */
+		if (params->ext_capab_len >= 9) {
+			u8 val_msb = params->ext_capab[8] &
+				WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB;
+			val_msb <<= 1;
+			val |= val_msb;
+		}
+
+		switch (val) {
+		case 1:
+			sta->sta.max_amsdu_subframes = 32;
+			break;
+		case 2:
+			sta->sta.max_amsdu_subframes = 16;
+			break;
+		case 3:
+			sta->sta.max_amsdu_subframes = 8;
+			break;
+		default:
+			sta->sta.max_amsdu_subframes = 0;
+		}
+	}
+
 	/*
 	 * cfg80211 validates this (1-2007) and allows setting the AID
 	 * only when creating a new station entry
@@ -1160,6 +1188,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 		ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
 						  params->ht_capa, sta);
 
+	/* VHT can override some HT caps such as the A-MSDU max length */
 	if (params->vht_capa)
 		ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
 						    params->vht_capa, sta);
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 7a76ce639d58..f4a528773563 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -230,6 +230,11 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
 	/* set Rx highest rate */
 	ht_cap.mcs.rx_highest = ht_cap_ie->mcs.rx_highest;
 
+	if (ht_cap.cap & IEEE80211_HT_CAP_MAX_AMSDU)
+		sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_7935;
+	else
+		sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_3839;
+
  apply:
 	changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
 
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 050de08bf82e..204cf9ad3019 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -281,6 +281,23 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 	}
 
 	sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
+
+	/* If HT IE reported 3839 bytes only, stay with that size. */
+	if (sta->sta.max_amsdu_len == IEEE80211_MAX_MPDU_LEN_HT_3839)
+		return;
+
+	switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) {
+	case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
+		sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454;
+		break;
+	case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991:
+		sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991;
+		break;
+	case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895:
+	default:
+		sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895;
+		break;
+	}
 }
 
 enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta)
-- 
cgit v1.2.3


From dd21dfc645d5dce0657af78761b3fa11a3a95398 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 20 Jan 2016 10:39:23 +0100
Subject: rfkill: disentangle polling pause and suspend

When suspended while polling is paused, polling will erroneously
resume at resume time. Fix this by tracking pause and suspend in
separate state variable and adding the necessary checks.

Clarify the documentation on this as well.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/rfkill.h |  3 ++-
 net/rfkill/core.c      | 17 +++++++++++++++--
 2 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index d9010789b4e8..7af625f6d226 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -104,7 +104,8 @@ int __must_check rfkill_register(struct rfkill *rfkill);
  *
  * Pause polling -- say transmitter is off for other reasons.
  * NOTE: not necessary for suspend/resume -- in that case the
- * core stops polling anyway
+ * core stops polling anyway (but will also correctly handle
+ * the case of polling having been paused before suspend.)
  */
 void rfkill_pause_polling(struct rfkill *rfkill);
 
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index f53bf3b6558b..166439995f34 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -57,6 +57,8 @@ struct rfkill {
 
 	bool			registered;
 	bool			persistent;
+	bool			polling_paused;
+	bool			suspended;
 
 	const struct rfkill_ops	*ops;
 	void			*data;
@@ -786,6 +788,7 @@ void rfkill_pause_polling(struct rfkill *rfkill)
 	if (!rfkill->ops->poll)
 		return;
 
+	rfkill->polling_paused = true;
 	cancel_delayed_work_sync(&rfkill->poll_work);
 }
 EXPORT_SYMBOL(rfkill_pause_polling);
@@ -797,6 +800,11 @@ void rfkill_resume_polling(struct rfkill *rfkill)
 	if (!rfkill->ops->poll)
 		return;
 
+	rfkill->polling_paused = false;
+
+	if (rfkill->suspended)
+		return;
+
 	queue_delayed_work(system_power_efficient_wq,
 			   &rfkill->poll_work, 0);
 }
@@ -807,7 +815,8 @@ static int rfkill_suspend(struct device *dev)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
 
-	rfkill_pause_polling(rfkill);
+	rfkill->suspended = true;
+	cancel_delayed_work_sync(&rfkill->poll_work);
 
 	return 0;
 }
@@ -817,12 +826,16 @@ static int rfkill_resume(struct device *dev)
 	struct rfkill *rfkill = to_rfkill(dev);
 	bool cur;
 
+	rfkill->suspended = false;
+
 	if (!rfkill->persistent) {
 		cur = !!(rfkill->state & RFKILL_BLOCK_SW);
 		rfkill_set_block(rfkill, cur);
 	}
 
-	rfkill_resume_polling(rfkill);
+	if (rfkill->ops->poll && !rfkill->polling_paused)
+		queue_delayed_work(system_power_efficient_wq,
+				   &rfkill->poll_work, 0);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 648b50dd6abf8e6e5b589bb8e6873a4596389dbe Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Mon, 25 Jan 2016 12:03:46 +0300
Subject: net: rfkill: add rfkill_find_type function

Helper for finding the type based on name. Useful if the
type needs to be determined based on device property.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
[modify rfkill_types array and BUILD_BUG_ON to not cause errors]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/rfkill.h | 15 +++++++++++++
 net/rfkill/core.c      | 58 ++++++++++++++++++++++++++------------------------
 2 files changed, 45 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 7af625f6d226..e6a0031d1b1f 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -213,6 +213,15 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw);
  * @rfkill: rfkill struct to query
  */
 bool rfkill_blocked(struct rfkill *rfkill);
+
+/**
+ * rfkill_find_type - Helpper for finding rfkill type by name
+ * @name: the name of the type
+ *
+ * Returns enum rfkill_type that conrresponds the name.
+ */
+enum rfkill_type rfkill_find_type(const char *name);
+
 #else /* !RFKILL */
 static inline struct rfkill * __must_check
 rfkill_alloc(const char *name,
@@ -269,6 +278,12 @@ static inline bool rfkill_blocked(struct rfkill *rfkill)
 {
 	return false;
 }
+
+static inline enum rfkill_type rfkill_find_type(const char *name)
+{
+	return RFKILL_TYPE_ALL;
+}
+
 #endif /* RFKILL || RFKILL_MODULE */
 
 
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index a805831d5d9b..2a23479a49f2 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -572,6 +572,34 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw)
 }
 EXPORT_SYMBOL(rfkill_set_states);
 
+static const char * const rfkill_types[] = {
+	NULL, /* RFKILL_TYPE_ALL */
+	"wlan",
+	"bluetooth",
+	"ultrawideband",
+	"wimax",
+	"wwan",
+	"gps",
+	"fm",
+	"nfc",
+};
+
+enum rfkill_type rfkill_find_type(const char *name)
+{
+	int i;
+
+	BUILD_BUG_ON(ARRAY_SIZE(rfkill_types) != NUM_RFKILL_TYPES);
+
+	if (!name)
+		return RFKILL_TYPE_ALL;
+
+	for (i = 1; i < NUM_RFKILL_TYPES; i++)
+		if (!strcmp(name, rfkill_types[i]))
+			return i;
+	return RFKILL_TYPE_ALL;
+}
+EXPORT_SYMBOL(rfkill_find_type);
+
 static ssize_t name_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
@@ -581,38 +609,12 @@ static ssize_t name_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(name);
 
-static const char *rfkill_get_type_str(enum rfkill_type type)
-{
-	BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_NFC + 1);
-
-	switch (type) {
-	case RFKILL_TYPE_WLAN:
-		return "wlan";
-	case RFKILL_TYPE_BLUETOOTH:
-		return "bluetooth";
-	case RFKILL_TYPE_UWB:
-		return "ultrawideband";
-	case RFKILL_TYPE_WIMAX:
-		return "wimax";
-	case RFKILL_TYPE_WWAN:
-		return "wwan";
-	case RFKILL_TYPE_GPS:
-		return "gps";
-	case RFKILL_TYPE_FM:
-		return "fm";
-	case RFKILL_TYPE_NFC:
-		return "nfc";
-	default:
-		BUG();
-	}
-}
-
 static ssize_t type_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
 
-	return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type));
+	return sprintf(buf, "%s\n", rfkill_types[rfkill->type]);
 }
 static DEVICE_ATTR_RO(type);
 
@@ -750,7 +752,7 @@ static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
 	if (error)
 		return error;
 	error = add_uevent_var(env, "RFKILL_TYPE=%s",
-			       rfkill_get_type_str(rfkill->type));
+			       rfkill_types[rfkill->type]);
 	if (error)
 		return error;
 	spin_lock_irqsave(&rfkill->lock, flags);
-- 
cgit v1.2.3


From fb2e6b7b7b02ab35a9d5355a69097a6f60c69d38 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Mon, 25 Jan 2016 12:03:49 +0300
Subject: net: rfkill: gpio: remove rfkill_gpio_platform_data

No more users for it.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/rfkill-gpio.h | 37 -------------------------------------
 net/rfkill/Kconfig          |  3 +--
 net/rfkill/rfkill-gpio.c    |  8 --------
 3 files changed, 1 insertion(+), 47 deletions(-)
 delete mode 100644 include/linux/rfkill-gpio.h

(limited to 'include/linux')

diff --git a/include/linux/rfkill-gpio.h b/include/linux/rfkill-gpio.h
deleted file mode 100644
index 20bcb55498cd..000000000000
--- a/include/linux/rfkill-gpio.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2011, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-
-#ifndef __RFKILL_GPIO_H
-#define __RFKILL_GPIO_H
-
-#include <linux/types.h>
-#include <linux/rfkill.h>
-
-/**
- * struct rfkill_gpio_platform_data - platform data for rfkill gpio device.
- * for unused gpio's, the expected value is -1.
- * @name:		name for the gpio rf kill instance
- */
-
-struct rfkill_gpio_platform_data {
-	char			*name;
-	enum rfkill_type	type;
-};
-
-#endif /* __RFKILL_GPIO_H */
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 598d374f6a35..868f1ad0415a 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -41,5 +41,4 @@ config RFKILL_GPIO
 	default n
 	help
 	  If you say yes here you get support of a generic gpio RFKILL
-	  driver. The platform should fill in the appropriate fields in the
-	  rfkill_gpio_platform_data structure and pass that to the driver.
+	  driver.
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 1a9c0316aad1..76c01cbd56e3 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -27,8 +27,6 @@
 #include <linux/acpi.h>
 #include <linux/gpio/consumer.h>
 
-#include <linux/rfkill-gpio.h>
-
 struct rfkill_gpio_data {
 	const char		*name;
 	enum rfkill_type	type;
@@ -89,7 +87,6 @@ static int rfkill_gpio_acpi_probe(struct device *dev,
 
 static int rfkill_gpio_probe(struct platform_device *pdev)
 {
-	struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
 	struct rfkill_gpio_data *rfkill;
 	struct gpio_desc *gpio;
 	const char *type_name;
@@ -111,11 +108,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
 		ret = rfkill_gpio_acpi_probe(&pdev->dev, rfkill);
 		if (ret)
 			return ret;
-	} else if (pdata) {
-		rfkill->name = pdata->name;
-		rfkill->type = pdata->type;
-	} else {
-		return -ENODEV;
 	}
 
 	rfkill->clk = devm_clk_get(&pdev->dev, NULL);
-- 
cgit v1.2.3


From e4fbf4767440472f9d23b0f25a2b905e1c63b6a8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sun, 10 Jan 2016 11:29:07 +0100
Subject: efi: stub: implement efi_get_random_bytes() based on EFI_RNG_PROTOCOL

This exposes the firmware's implementation of EFI_RNG_PROTOCOL via a new
function efi_get_random_bytes().

Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/firmware/efi/libstub/Makefile  |  2 +-
 drivers/firmware/efi/libstub/efistub.h |  3 +++
 drivers/firmware/efi/libstub/random.c  | 35 ++++++++++++++++++++++++++++++++++
 include/linux/efi.h                    |  6 +++++-
 4 files changed, 44 insertions(+), 2 deletions(-)
 create mode 100644 drivers/firmware/efi/libstub/random.c

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index aaf9c0bab42e..ad077944aa0e 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -36,7 +36,7 @@ lib-$(CONFIG_EFI_ARMSTUB)	+= arm-stub.o fdt.o string.o \
 				   $(patsubst %.c,lib-%.o,$(arm-deps))
 
 lib-$(CONFIG_ARM)		+= arm32-stub.o
-lib-$(CONFIG_ARM64)		+= arm64-stub.o
+lib-$(CONFIG_ARM64)		+= arm64-stub.o random.o
 CFLAGS_arm64-stub.o 		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 
 #
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 6b6548fda089..206b7252b9d1 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -43,4 +43,7 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
 		     unsigned long desc_size, efi_memory_desc_t *runtime_map,
 		     int *count);
 
+efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table,
+				  unsigned long size, u8 *out);
+
 #endif
diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c
new file mode 100644
index 000000000000..97941ee5954f
--- /dev/null
+++ b/drivers/firmware/efi/libstub/random.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2016 Linaro Ltd;  <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/efi.h>
+#include <asm/efi.h>
+
+#include "efistub.h"
+
+struct efi_rng_protocol {
+	efi_status_t (*get_info)(struct efi_rng_protocol *,
+				 unsigned long *, efi_guid_t *);
+	efi_status_t (*get_rng)(struct efi_rng_protocol *,
+				efi_guid_t *, unsigned long, u8 *out);
+};
+
+efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table_arg,
+				  unsigned long size, u8 *out)
+{
+	efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID;
+	efi_status_t status;
+	struct efi_rng_protocol *rng;
+
+	status = efi_call_early(locate_protocol, &rng_proto, NULL,
+				(void **)&rng);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	return rng->get_rng(rng, NULL, size, out);
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 569b5a866bb1..e747eb08b2be 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -299,7 +299,7 @@ typedef struct {
 	void *open_protocol_information;
 	void *protocols_per_handle;
 	void *locate_handle_buffer;
-	void *locate_protocol;
+	efi_status_t (*locate_protocol)(efi_guid_t *, void *, void **);
 	void *install_multiple_protocol_interfaces;
 	void *uninstall_multiple_protocol_interfaces;
 	void *calculate_crc32;
@@ -599,6 +599,10 @@ void efi_native_runtime_setup(void);
 #define EFI_PROPERTIES_TABLE_GUID \
     EFI_GUID(  0x880aaca3, 0x4adc, 0x4a04, 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5 )
 
+#define EFI_RNG_PROTOCOL_GUID \
+	EFI_GUID(0x3152bca5, 0xeade, 0x433d, \
+		 0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44)
+
 typedef struct {
 	efi_guid_t guid;
 	u64 table;
-- 
cgit v1.2.3


From ada68c31ba9c02d7aabdd87db979fe670b499d54 Mon Sep 17 00:00:00 2001
From: Achiad Shochat <achiad@mellanox.com>
Date: Mon, 22 Feb 2016 18:17:23 +0200
Subject: net/mlx5: Introduce a new header file for physical port functions

All the device physical port access functions are implemented in the
port.c file.
We just extract the exposure of these functions from driver.h into a
dedicated header file called port.h.

Signed-off-by: Achiad Shochat <achiad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx5/main.c              |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/port.c |  1 +
 include/linux/mlx5/driver.h                    | 31 ------------
 include/linux/mlx5/port.h                      | 69 ++++++++++++++++++++++++++
 5 files changed, 72 insertions(+), 31 deletions(-)
 create mode 100644 include/linux/mlx5/port.h

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 03c418ccbc98..e1cea4415704 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -42,6 +42,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_addr.h>
 #include <rdma/ib_cache.h>
+#include <linux/mlx5/port.h>
 #include <linux/mlx5/vport.h>
 #include <rdma/ib_smi.h>
 #include <rdma/ib_umem.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index aac071a7e830..15f6cdb842d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -38,6 +38,7 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/cq.h>
+#include <linux/mlx5/port.h>
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/transobj.h>
 #include "wq.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index a87e773e93f3..1e863216ac4a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -32,6 +32,7 @@
 
 #include <linux/module.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/port.h>
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 1e3006dcf35d..02adc67720ce 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -794,37 +794,6 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 			 int size_in, void *data_out, int size_out,
 			 u16 reg_num, int arg, int write);
 
-int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
-int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
-			 int ptys_size, int proto_mask, u8 local_port);
-int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
-			      u32 *proto_cap, int proto_mask);
-int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
-				u32 *proto_admin, int proto_mask);
-int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
-				    u8 *link_width_oper, u8 local_port);
-int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev,
-			       u8 *proto_oper, int proto_mask,
-			       u8 local_port);
-int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin,
-			int proto_mask);
-int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
-			       enum mlx5_port_status status);
-int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
-				 enum mlx5_port_status *status);
-
-int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port);
-void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port);
-void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu,
-			      u8 port);
-
-int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
-			      u8 *vl_hw_cap, u8 local_port);
-
-int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause);
-int mlx5_query_port_pause(struct mlx5_core_dev *dev,
-			  u32 *rx_pause, u32 *tx_pause);
-
 int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
new file mode 100644
index 000000000000..7accd4a65da5
--- /dev/null
+++ b/include/linux/mlx5/port.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_PORT_H__
+#define __MLX5_PORT_H__
+
+#include <linux/mlx5/driver.h>
+
+int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
+int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
+			 int ptys_size, int proto_mask, u8 local_port);
+int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
+			      u32 *proto_cap, int proto_mask);
+int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
+				u32 *proto_admin, int proto_mask);
+int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
+				    u8 *link_width_oper, u8 local_port);
+int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev,
+			       u8 *proto_oper, int proto_mask,
+			       u8 local_port);
+int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin,
+			int proto_mask);
+int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
+			       enum mlx5_port_status status);
+int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
+				 enum mlx5_port_status *status);
+
+int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port);
+void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port);
+void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu,
+			      u8 port);
+
+int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
+			      u8 *vl_hw_cap, u8 local_port);
+
+int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause);
+int mlx5_query_port_pause(struct mlx5_core_dev *dev,
+			  u32 *rx_pause, u32 *tx_pause);
+
+#endif /* __MLX5_PORT_H__ */
-- 
cgit v1.2.3


From ad909eb064219a64fd10e9c7d9f39a3042760025 Mon Sep 17 00:00:00 2001
From: Achiad Shochat <achiad@mellanox.com>
Date: Mon, 22 Feb 2016 18:17:24 +0200
Subject: net/mlx5: Introduce physical port PFC access functions

Add access functions to set and query a physical port PFC
(Priority Flow Control) parameters.

Signed-off-by: Achiad Shochat <achiad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 41 ++++++++++++++++++++++++++
 include/linux/mlx5/port.h                      |  4 +++
 2 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 1e863216ac4a..dae70500b6a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -364,3 +364,44 @@ int mlx5_query_port_pause(struct mlx5_core_dev *dev,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_pause);
+
+int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx)
+{
+	u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
+	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+
+	memset(in, 0, sizeof(in));
+	MLX5_SET(pfcc_reg, in, local_port, 1);
+	MLX5_SET(pfcc_reg, in, pfctx, pfc_en_tx);
+	MLX5_SET(pfcc_reg, in, pfcrx, pfc_en_rx);
+	MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_tx);
+	MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_rx);
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), out,
+				    sizeof(out), MLX5_REG_PFCC, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_pfc);
+
+int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
+{
+	u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
+	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+	int err;
+
+	memset(in, 0, sizeof(in));
+	MLX5_SET(pfcc_reg, in, local_port, 1);
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+				   sizeof(out), MLX5_REG_PFCC, 0, 0);
+	if (err)
+		return err;
+
+	if (pfc_en_tx)
+		*pfc_en_tx = MLX5_GET(pfcc_reg, out, pfctx);
+
+	if (pfc_en_rx)
+		*pfc_en_rx = MLX5_GET(pfcc_reg, out, pfcrx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_pfc);
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 7accd4a65da5..4b3644caa936 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -66,4 +66,8 @@ int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause);
 int mlx5_query_port_pause(struct mlx5_core_dev *dev,
 			  u32 *rx_pause, u32 *tx_pause);
 
+int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx);
+int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx,
+			u8 *pfc_en_rx);
+
 #endif /* __MLX5_PORT_H__ */
-- 
cgit v1.2.3


From 4f3961eeafe0aca8f6b0933899ef0d91f561352d Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Mon, 22 Feb 2016 18:17:25 +0200
Subject: net/mlx5: Introduce physical port TC/prio access functions

Add access functions to set and query a physical port TC groups
and prio parameters.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 76 ++++++++++++++++++++++++++
 include/linux/mlx5/driver.h                    |  2 +
 include/linux/mlx5/mlx5_ifc.h                  | 49 ++++++++++++++++-
 include/linux/mlx5/port.h                      |  6 ++
 4 files changed, 132 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index dae70500b6a9..569100d3f57b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -405,3 +405,79 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_pfc);
+
+int mlx5_max_tc(struct mlx5_core_dev *mdev)
+{
+	u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8;
+
+	return num_tc - 1;
+}
+
+int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc)
+{
+	u32 in[MLX5_ST_SZ_DW(qtct_reg)];
+	u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+	int err;
+	int i;
+
+	memset(in, 0, sizeof(in));
+	for (i = 0; i < 8; i++) {
+		if (prio_tc[i] > mlx5_max_tc(mdev))
+			return -EINVAL;
+
+		MLX5_SET(qtct_reg, in, prio, i);
+		MLX5_SET(qtct_reg, in, tclass, prio_tc[i]);
+
+		err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+					   sizeof(out), MLX5_REG_QTCT, 0, 1);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc);
+
+static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
+				   int inlen)
+{
+	u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+
+	if (!MLX5_CAP_GEN(mdev, ets))
+		return -ENOTSUPP;
+
+	return mlx5_core_access_reg(mdev, in, inlen, out, sizeof(out),
+				    MLX5_REG_QETCR, 0, 1);
+}
+
+int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group)
+{
+	u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+	int i;
+
+	memset(in, 0, sizeof(in));
+
+	for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+		MLX5_SET(qetc_reg, in, tc_configuration[i].g, 1);
+		MLX5_SET(qetc_reg, in, tc_configuration[i].group, tc_group[i]);
+	}
+
+	return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group);
+
+int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw)
+{
+	u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+	int i;
+
+	memset(in, 0, sizeof(in));
+
+	for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+		MLX5_SET(qetc_reg, in, tc_configuration[i].b, 1);
+		MLX5_SET(qetc_reg, in, tc_configuration[i].bw_allocation, tc_bw[i]);
+	}
+
+	return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 02adc67720ce..a815da92d4eb 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -99,6 +99,8 @@ enum {
 };
 
 enum {
+	MLX5_REG_QETCR		 = 0x4005,
+	MLX5_REG_QTCT		 = 0x400a,
 	MLX5_REG_PCAP		 = 0x5001,
 	MLX5_REG_PMTU		 = 0x5003,
 	MLX5_REG_PTYS		 = 0x5004,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 51f1e540fc2b..ec957e059de8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -729,7 +729,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         reserved_at_1bf[0x3];
 	u8         log_max_msg[0x5];
-	u8         reserved_at_1c7[0x18];
+	u8         reserved_at_1c7[0x4];
+	u8         max_tc[0x4];
+	u8         reserved_at_1cf[0x10];
 
 	u8         stat_rate_support[0x10];
 	u8         reserved_at_1ef[0xc];
@@ -7061,4 +7063,49 @@ struct mlx5_ifc_modify_flow_table_in_bits {
 	u8         reserved_at_100[0x100];
 };
 
+struct mlx5_ifc_ets_tcn_config_reg_bits {
+	u8         g[0x1];
+	u8         b[0x1];
+	u8         r[0x1];
+	u8         reserved_at_3[0x9];
+	u8         group[0x4];
+	u8         reserved_at_10[0x9];
+	u8         bw_allocation[0x7];
+
+	u8         reserved_at_20[0xc];
+	u8         max_bw_units[0x4];
+	u8         reserved_at_30[0x8];
+	u8         max_bw_value[0x8];
+};
+
+struct mlx5_ifc_ets_global_config_reg_bits {
+	u8         reserved_at_0[0x2];
+	u8         r[0x1];
+	u8         reserved_at_3[0x1d];
+
+	u8         reserved_at_20[0xc];
+	u8         max_bw_units[0x4];
+	u8         reserved_at_30[0x8];
+	u8         max_bw_value[0x8];
+};
+
+struct mlx5_ifc_qetc_reg_bits {
+	u8                                         reserved_at_0[0x8];
+	u8                                         port_number[0x8];
+	u8                                         reserved_at_10[0x30];
+
+	struct mlx5_ifc_ets_tcn_config_reg_bits    tc_configuration[0x8];
+	struct mlx5_ifc_ets_global_config_reg_bits global_configuration;
+};
+
+struct mlx5_ifc_qtct_reg_bits {
+	u8         reserved_at_0[0x8];
+	u8         port_number[0x8];
+	u8         reserved_at_10[0xd];
+	u8         prio[0x3];
+
+	u8         reserved_at_20[0x1d];
+	u8         tclass[0x3];
+};
+
 #endif /* MLX5_IFC_H */
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 4b3644caa936..0c67e699d017 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -70,4 +70,10 @@ int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx);
 int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx,
 			u8 *pfc_en_rx);
 
+int mlx5_max_tc(struct mlx5_core_dev *mdev);
+
+int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc);
+int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group);
+int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw);
+
 #endif /* __MLX5_PORT_H__ */
-- 
cgit v1.2.3


From d8880795dabf2381ed1e98348f6d9c7ea6fab950 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Mon, 22 Feb 2016 18:17:28 +0200
Subject: net/mlx5e: Implement DCBNL IEEE max rate

Add support for DCBNL IEEE get/set max rate.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 73 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/port.c     | 65 +++++++++++++++++++
 include/linux/mlx5/device.h                        |  6 ++
 include/linux/mlx5/port.h                          |  6 ++
 4 files changed, 150 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 39d8069ba9e3..3036f279a8fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -35,6 +35,9 @@
 
 #define MLX5E_MAX_PRIORITY 8
 
+#define MLX5E_100MB (100000)
+#define MLX5E_1GB   (1000000)
+
 static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev,
 				   struct ieee_ets *ets)
 {
@@ -219,9 +222,79 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 	return 0;
 }
 
+static int mlx5e_dcbnl_ieee_getmaxrate(struct net_device *netdev,
+				       struct ieee_maxrate *maxrate)
+{
+	struct mlx5e_priv *priv    = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u8 max_bw_value[IEEE_8021QAZ_MAX_TCS];
+	u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS];
+	int err;
+	int i;
+
+	err = mlx5_query_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);
+	if (err)
+		return err;
+
+	memset(maxrate->tc_maxrate, 0, sizeof(maxrate->tc_maxrate));
+
+	for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+		switch (max_bw_unit[i]) {
+		case MLX5_100_MBPS_UNIT:
+			maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_100MB;
+			break;
+		case MLX5_GBPS_UNIT:
+			maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_1GB;
+			break;
+		case MLX5_BW_NO_LIMIT:
+			break;
+		default:
+			WARN(true, "non-supported BW unit");
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev,
+				       struct ieee_maxrate *maxrate)
+{
+	struct mlx5e_priv *priv    = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u8 max_bw_value[IEEE_8021QAZ_MAX_TCS];
+	u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS];
+	__u64 upper_limit_mbps = roundup(255 * MLX5E_100MB, MLX5E_1GB);
+	int i;
+
+	memset(max_bw_value, 0, sizeof(max_bw_value));
+	memset(max_bw_unit, 0, sizeof(max_bw_unit));
+
+	for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+		if (!maxrate->tc_maxrate[i]) {
+			max_bw_unit[i]  = MLX5_BW_NO_LIMIT;
+			continue;
+		}
+		if (maxrate->tc_maxrate[i] < upper_limit_mbps) {
+			max_bw_value[i] = div_u64(maxrate->tc_maxrate[i],
+						  MLX5E_100MB);
+			max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1;
+			max_bw_unit[i]  = MLX5_100_MBPS_UNIT;
+		} else {
+			max_bw_value[i] = div_u64(maxrate->tc_maxrate[i],
+						  MLX5E_1GB);
+			max_bw_unit[i]  = MLX5_GBPS_UNIT;
+		}
+	}
+
+	return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);
+}
+
 const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
 	.ieee_getets	= mlx5e_dcbnl_ieee_getets,
 	.ieee_setets	= mlx5e_dcbnl_ieee_setets,
+	.ieee_getmaxrate = mlx5e_dcbnl_ieee_getmaxrate,
+	.ieee_setmaxrate = mlx5e_dcbnl_ieee_setmaxrate,
 	.ieee_getpfc	= mlx5e_dcbnl_ieee_getpfc,
 	.ieee_setpfc	= mlx5e_dcbnl_ieee_setpfc,
 	.getdcbx	= mlx5e_dcbnl_getdcbx,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 569100d3f57b..d97605ef3efd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -450,6 +450,19 @@ static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
 				    MLX5_REG_QETCR, 0, 1);
 }
 
+static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out,
+				     int outlen)
+{
+	u32 in[MLX5_ST_SZ_DW(qtct_reg)];
+
+	if (!MLX5_CAP_GEN(mdev, ets))
+		return -ENOTSUPP;
+
+	memset(in, 0, sizeof(in));
+	return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen,
+				    MLX5_REG_QETCR, 0, 0);
+}
+
 int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group)
 {
 	u32 in[MLX5_ST_SZ_DW(qetc_reg)];
@@ -481,3 +494,55 @@ int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw)
 	return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
 }
 EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc);
+
+int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
+				    u8 *max_bw_value,
+				    u8 *max_bw_units)
+{
+	u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+	void *ets_tcn_conf;
+	int i;
+
+	memset(in, 0, sizeof(in));
+
+	MLX5_SET(qetc_reg, in, port_number, 1);
+
+	for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+		ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, in, tc_configuration[i]);
+
+		MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, r, 1);
+		MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_units,
+			 max_bw_units[i]);
+		MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_value,
+			 max_bw_value[i]);
+	}
+
+	return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_port_ets_rate_limit);
+
+int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev,
+				   u8 *max_bw_value,
+				   u8 *max_bw_units)
+{
+	u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+	void *ets_tcn_conf;
+	int err;
+	int i;
+
+	err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+	if (err)
+		return err;
+
+	for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+		ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, tc_configuration[i]);
+
+		max_bw_value[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+					   max_bw_value);
+		max_bw_units[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+					   max_bw_units);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 987764afa65c..bfc1ab0552d3 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -350,6 +350,12 @@ enum {
 	MLX5_SET_PORT_PKEY_TABLE	= 20,
 };
 
+enum {
+	MLX5_BW_NO_LIMIT   = 0,
+	MLX5_100_MBPS_UNIT = 3,
+	MLX5_GBPS_UNIT	   = 4,
+};
+
 enum {
 	MLX5_MAX_PAGE_SHIFT		= 31
 };
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 0c67e699d017..595c7b2d9bfa 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -75,5 +75,11 @@ int mlx5_max_tc(struct mlx5_core_dev *mdev);
 int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc);
 int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group);
 int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw);
+int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
+				    u8 *max_bw_value,
+				    u8 *max_bw_unit);
+int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev,
+				   u8 *max_bw_value,
+				   u8 *max_bw_unit);
 
 #endif /* __MLX5_PORT_H__ */
-- 
cgit v1.2.3


From 928cfe8745a62e60c1e8e06676a74724e7786024 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Mon, 22 Feb 2016 18:17:29 +0200
Subject: net/mlx5e: Wake On LAN support

Implement set/get WOL by ethtool and added the needed
device commands and structures to mlx5_ifc.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Rana Shahout <ranas@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c      |   6 +
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 125 +++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/port.c     |  38 +++++++
 include/linux/mlx5/device.h                        |  11 ++
 include/linux/mlx5/mlx5_ifc.h                      |  62 +++++++++-
 include/linux/mlx5/port.h                          |   2 +
 6 files changed, 243 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 037fc4cdf5af..9ce87c624450 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -560,6 +560,12 @@ const char *mlx5_command_str(int command)
 	case MLX5_CMD_OP_ACCESS_REG:
 		return "MLX5_CMD_OP_ACCESS_REG";
 
+	case MLX5_CMD_OP_SET_WOL_ROL:
+		return "SET_WOL_ROL";
+
+	case MLX5_CMD_OP_QUERY_WOL_ROL:
+		return "QUERY_WOL_ROL";
+
 	default: return "unknown command opcode";
 	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 65624ac65b4c..e9760f895744 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -884,6 +884,129 @@ static int mlx5e_get_ts_info(struct net_device *dev,
 	return 0;
 }
 
+static __u32 mlx5e_get_wol_supported(struct mlx5_core_dev *mdev)
+{
+	__u32 ret = 0;
+
+	if (MLX5_CAP_GEN(mdev, wol_g))
+		ret |= WAKE_MAGIC;
+
+	if (MLX5_CAP_GEN(mdev, wol_s))
+		ret |= WAKE_MAGICSECURE;
+
+	if (MLX5_CAP_GEN(mdev, wol_a))
+		ret |= WAKE_ARP;
+
+	if (MLX5_CAP_GEN(mdev, wol_b))
+		ret |= WAKE_BCAST;
+
+	if (MLX5_CAP_GEN(mdev, wol_m))
+		ret |= WAKE_MCAST;
+
+	if (MLX5_CAP_GEN(mdev, wol_u))
+		ret |= WAKE_UCAST;
+
+	if (MLX5_CAP_GEN(mdev, wol_p))
+		ret |= WAKE_PHY;
+
+	return ret;
+}
+
+static __u32 mlx5e_refomrat_wol_mode_mlx5_to_linux(u8 mode)
+{
+	__u32 ret = 0;
+
+	if (mode & MLX5_WOL_MAGIC)
+		ret |= WAKE_MAGIC;
+
+	if (mode & MLX5_WOL_SECURED_MAGIC)
+		ret |= WAKE_MAGICSECURE;
+
+	if (mode & MLX5_WOL_ARP)
+		ret |= WAKE_ARP;
+
+	if (mode & MLX5_WOL_BROADCAST)
+		ret |= WAKE_BCAST;
+
+	if (mode & MLX5_WOL_MULTICAST)
+		ret |= WAKE_MCAST;
+
+	if (mode & MLX5_WOL_UNICAST)
+		ret |= WAKE_UCAST;
+
+	if (mode & MLX5_WOL_PHY_ACTIVITY)
+		ret |= WAKE_PHY;
+
+	return ret;
+}
+
+static u8 mlx5e_refomrat_wol_mode_linux_to_mlx5(__u32 mode)
+{
+	u8 ret = 0;
+
+	if (mode & WAKE_MAGIC)
+		ret |= MLX5_WOL_MAGIC;
+
+	if (mode & WAKE_MAGICSECURE)
+		ret |= MLX5_WOL_SECURED_MAGIC;
+
+	if (mode & WAKE_ARP)
+		ret |= MLX5_WOL_ARP;
+
+	if (mode & WAKE_BCAST)
+		ret |= MLX5_WOL_BROADCAST;
+
+	if (mode & WAKE_MCAST)
+		ret |= MLX5_WOL_MULTICAST;
+
+	if (mode & WAKE_UCAST)
+		ret |= MLX5_WOL_UNICAST;
+
+	if (mode & WAKE_PHY)
+		ret |= MLX5_WOL_PHY_ACTIVITY;
+
+	return ret;
+}
+
+static void mlx5e_get_wol(struct net_device *netdev,
+			  struct ethtool_wolinfo *wol)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u8 mlx5_wol_mode;
+	int err;
+
+	memset(wol, 0, sizeof(*wol));
+
+	wol->supported = mlx5e_get_wol_supported(mdev);
+	if (!wol->supported)
+		return;
+
+	err = mlx5_query_port_wol(mdev, &mlx5_wol_mode);
+	if (err)
+		return;
+
+	wol->wolopts = mlx5e_refomrat_wol_mode_mlx5_to_linux(mlx5_wol_mode);
+}
+
+static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	__u32 wol_supported = mlx5e_get_wol_supported(mdev);
+	u32 mlx5_wol_mode;
+
+	if (!wol_supported)
+		return -ENOTSUPP;
+
+	if (wol->wolopts & ~wol_supported)
+		return -EINVAL;
+
+	mlx5_wol_mode = mlx5e_refomrat_wol_mode_linux_to_mlx5(wol->wolopts);
+
+	return mlx5_set_port_wol(mdev, mlx5_wol_mode);
+}
+
 const struct ethtool_ops mlx5e_ethtool_ops = {
 	.get_drvinfo       = mlx5e_get_drvinfo,
 	.get_link          = ethtool_op_get_link,
@@ -908,4 +1031,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
 	.get_pauseparam    = mlx5e_get_pauseparam,
 	.set_pauseparam    = mlx5e_set_pauseparam,
 	.get_ts_info       = mlx5e_get_ts_info,
+	.get_wol	   = mlx5e_get_wol,
+	.set_wol	   = mlx5e_set_wol,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index d97605ef3efd..e1f2e1059cfd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -546,3 +546,41 @@ int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit);
+
+int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode)
+{
+	u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)];
+	u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)];
+
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL);
+	MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1);
+	MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode);
+
+	return mlx5_cmd_exec_check_status(mdev, in, sizeof(in),
+					  out, sizeof(out));
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_wol);
+
+int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode)
+{
+	u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)];
+	u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)];
+	int err;
+
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL);
+
+	err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in),
+					 out, sizeof(out));
+
+	if (!err)
+		*wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_wol);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index bfc1ab0552d3..68a56bc37df2 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1183,6 +1183,17 @@ enum {
 	MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM    = 0x1,
 };
 
+enum mlx5_wol_mode {
+	MLX5_WOL_DISABLE        = 0,
+	MLX5_WOL_SECURED_MAGIC  = 1 << 1,
+	MLX5_WOL_MAGIC          = 1 << 2,
+	MLX5_WOL_ARP            = 1 << 3,
+	MLX5_WOL_BROADCAST      = 1 << 4,
+	MLX5_WOL_MULTICAST      = 1 << 5,
+	MLX5_WOL_UNICAST        = 1 << 6,
+	MLX5_WOL_PHY_ACTIVITY   = 1 << 7,
+};
+
 /* MLX5 DEV CAPs */
 
 /* TODO: EAT.ME */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ec957e059de8..03ffe9530365 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -166,6 +166,8 @@ enum {
 	MLX5_CMD_OP_SET_L2_TABLE_ENTRY            = 0x829,
 	MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY          = 0x82a,
 	MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY         = 0x82b,
+	MLX5_CMD_OP_SET_WOL_ROL                   = 0x830,
+	MLX5_CMD_OP_QUERY_WOL_ROL                 = 0x831,
 	MLX5_CMD_OP_CREATE_TIR                    = 0x900,
 	MLX5_CMD_OP_MODIFY_TIR                    = 0x901,
 	MLX5_CMD_OP_DESTROY_TIR                   = 0x902,
@@ -731,7 +733,17 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_msg[0x5];
 	u8         reserved_at_1c7[0x4];
 	u8         max_tc[0x4];
-	u8         reserved_at_1cf[0x10];
+	u8         reserved_at_1cf[0x6];
+	u8         rol_s[0x1];
+	u8         rol_g[0x1];
+	u8         reserved_at_1d7[0x1];
+	u8         wol_s[0x1];
+	u8         wol_g[0x1];
+	u8         wol_a[0x1];
+	u8         wol_b[0x1];
+	u8         wol_m[0x1];
+	u8         wol_u[0x1];
+	u8         wol_p[0x1];
 
 	u8         stat_rate_support[0x10];
 	u8         reserved_at_1ef[0xc];
@@ -6873,6 +6885,54 @@ struct mlx5_ifc_mtt_bits {
 	u8         rd_en[0x1];
 };
 
+struct mlx5_ifc_query_wol_rol_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x10];
+	u8         rol_mode[0x8];
+	u8         wol_mode[0x8];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_query_wol_rol_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_set_wol_rol_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_set_wol_rol_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         rol_mode_valid[0x1];
+	u8         wol_mode_valid[0x1];
+	u8         reserved_at_42[0xe];
+	u8         rol_mode[0x8];
+	u8         wol_mode[0x8];
+
+	u8         reserved_at_60[0x20];
+};
+
 enum {
 	MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER  = 0x0,
 	MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED     = 0x1,
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 595c7b2d9bfa..a1d145abd4eb 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -81,5 +81,7 @@ int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
 int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev,
 				   u8 *max_bw_value,
 				   u8 *max_bw_unit);
+int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode);
+int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode);
 
 #endif /* __MLX5_PORT_H__ */
-- 
cgit v1.2.3


From 2262d6ef517f9f0d427015cb9a790b86a2c95fed Mon Sep 17 00:00:00 2001
From: Pankaj Dubey <pankaj.dubey@samsung.com>
Date: Fri, 18 Dec 2015 09:02:11 +0530
Subject: ARM: EXYNOS: Move pmu specific headers under "linux/soc/samsung"

Moving Exynos PMU specific header file into "include/linux/soc/samsung"
thus updated affected files under "mach-exynos" to use new location of
these header files.

Signed-off-by: Amit Daniel Kachhap <amitdanielk@gmail.com>
[tested on Peach-Pi (Exynos5880)]
Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
[for testing on Trats2 (Exynos4412) and Odroid XU3 (Exynos5422)]
Tested-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
---
 arch/arm/mach-exynos/exynos-pmu.h           |  24 -
 arch/arm/mach-exynos/exynos.c               |   2 +-
 arch/arm/mach-exynos/mcpm-exynos.c          |   2 +-
 arch/arm/mach-exynos/platsmp.c              |   2 +-
 arch/arm/mach-exynos/pm.c                   |   4 +-
 arch/arm/mach-exynos/pmu.c                  |   6 +-
 arch/arm/mach-exynos/regs-pmu.h             | 693 ----------------------------
 arch/arm/mach-exynos/suspend.c              |   4 +-
 include/linux/soc/samsung/exynos-pmu.h      |  24 +
 include/linux/soc/samsung/exynos-regs-pmu.h | 693 ++++++++++++++++++++++++++++
 10 files changed, 727 insertions(+), 727 deletions(-)
 delete mode 100644 arch/arm/mach-exynos/exynos-pmu.h
 delete mode 100644 arch/arm/mach-exynos/regs-pmu.h
 create mode 100644 include/linux/soc/samsung/exynos-pmu.h
 create mode 100644 include/linux/soc/samsung/exynos-regs-pmu.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-exynos/exynos-pmu.h b/arch/arm/mach-exynos/exynos-pmu.h
deleted file mode 100644
index a2ab0d52b230..000000000000
--- a/arch/arm/mach-exynos/exynos-pmu.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2014 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com
- *
- * Header for EXYNOS PMU Driver support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __EXYNOS_PMU_H
-#define __EXYNOS_PMU_H
-
-enum sys_powerdown {
-	SYS_AFTR,
-	SYS_LPA,
-	SYS_SLEEP,
-	NUM_SYS_POWERDOWN,
-};
-
-extern void exynos_sys_powerdown_conf(enum sys_powerdown mode);
-
-#endif /* __EXYNOS_PMU_H */
diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index a43923356f47..e46a3e4843ed 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -20,6 +20,7 @@
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
 #include <linux/irqchip.h>
+#include <linux/soc/samsung/exynos-regs-pmu.h>
 
 #include <asm/cacheflush.h>
 #include <asm/hardware/cache-l2x0.h>
@@ -31,7 +32,6 @@
 
 #include "common.h"
 #include "mfc.h"
-#include "regs-pmu.h"
 
 void __iomem *pmu_base_addr;
 
diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
index 56978199c479..f086bf615b29 100644
--- a/arch/arm/mach-exynos/mcpm-exynos.c
+++ b/arch/arm/mach-exynos/mcpm-exynos.c
@@ -16,13 +16,13 @@
 #include <linux/io.h>
 #include <linux/of_address.h>
 #include <linux/syscore_ops.h>
+#include <linux/soc/samsung/exynos-regs-pmu.h>
 
 #include <asm/cputype.h>
 #include <asm/cp15.h>
 #include <asm/mcpm.h>
 #include <asm/smp_plat.h>
 
-#include "regs-pmu.h"
 #include "common.h"
 
 #define EXYNOS5420_CPUS_PER_CLUSTER	4
diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
index 5bd9559786ba..da46c639f621 100644
--- a/arch/arm/mach-exynos/platsmp.c
+++ b/arch/arm/mach-exynos/platsmp.c
@@ -20,6 +20,7 @@
 #include <linux/smp.h>
 #include <linux/io.h>
 #include <linux/of_address.h>
+#include <linux/soc/samsung/exynos-regs-pmu.h>
 
 #include <asm/cacheflush.h>
 #include <asm/cp15.h>
@@ -30,7 +31,6 @@
 #include <mach/map.h>
 
 #include "common.h"
-#include "regs-pmu.h"
 
 extern void exynos4_secondary_startup(void);
 
diff --git a/arch/arm/mach-exynos/pm.c b/arch/arm/mach-exynos/pm.c
index 9c1506b499bc..b9b9186f8781 100644
--- a/arch/arm/mach-exynos/pm.c
+++ b/arch/arm/mach-exynos/pm.c
@@ -18,6 +18,8 @@
 #include <linux/cpu_pm.h>
 #include <linux/io.h>
 #include <linux/err.h>
+#include <linux/soc/samsung/exynos-regs-pmu.h>
+#include <linux/soc/samsung/exynos-pmu.h>
 
 #include <asm/firmware.h>
 #include <asm/smp_scu.h>
@@ -29,8 +31,6 @@
 #include <plat/pm-common.h>
 
 #include "common.h"
-#include "exynos-pmu.h"
-#include "regs-pmu.h"
 
 static inline void __iomem *exynos_boot_vector_addr(void)
 {
diff --git a/arch/arm/mach-exynos/pmu.c b/arch/arm/mach-exynos/pmu.c
index dbf9fe98d479..2ff19564876c 100644
--- a/arch/arm/mach-exynos/pmu.c
+++ b/arch/arm/mach-exynos/pmu.c
@@ -15,10 +15,10 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 
-#include <asm/cputype.h>
+#include <linux/soc/samsung/exynos-regs-pmu.h>
+#include <linux/soc/samsung/exynos-pmu.h>
 
-#include "exynos-pmu.h"
-#include "regs-pmu.h"
+#include <asm/cputype.h>
 
 #define PMU_TABLE_END	(-1U)
 
diff --git a/arch/arm/mach-exynos/regs-pmu.h b/arch/arm/mach-exynos/regs-pmu.h
deleted file mode 100644
index 5e4f4c23b06a..000000000000
--- a/arch/arm/mach-exynos/regs-pmu.h
+++ /dev/null
@@ -1,693 +0,0 @@
-/*
- * Copyright (c) 2010-2012 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com
- *
- * EXYNOS - Power management unit definition
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-#ifndef __ASM_ARCH_REGS_PMU_H
-#define __ASM_ARCH_REGS_PMU_H __FILE__
-
-#define S5P_CENTRAL_SEQ_CONFIGURATION		0x0200
-
-#define S5P_CENTRAL_LOWPWR_CFG			(1 << 16)
-
-#define S5P_CENTRAL_SEQ_OPTION			0x0208
-
-#define S5P_USE_STANDBY_WFI0			(1 << 16)
-#define S5P_USE_STANDBY_WFI1			(1 << 17)
-#define S5P_USE_STANDBY_WFI2			(1 << 19)
-#define S5P_USE_STANDBY_WFI3			(1 << 20)
-#define S5P_USE_STANDBY_WFE0			(1 << 24)
-#define S5P_USE_STANDBY_WFE1			(1 << 25)
-#define S5P_USE_STANDBY_WFE2			(1 << 27)
-#define S5P_USE_STANDBY_WFE3			(1 << 28)
-
-#define S5P_USE_STANDBY_WFI_ALL \
-	(S5P_USE_STANDBY_WFI0 | S5P_USE_STANDBY_WFI1 | \
-	 S5P_USE_STANDBY_WFI2 | S5P_USE_STANDBY_WFI3 | \
-	 S5P_USE_STANDBY_WFE0 | S5P_USE_STANDBY_WFE1 | \
-	 S5P_USE_STANDBY_WFE2 | S5P_USE_STANDBY_WFE3)
-
-#define S5P_USE_DELAYED_RESET_ASSERTION		BIT(12)
-
-#define EXYNOS_CORE_PO_RESET(n)			((1 << 4) << n)
-#define EXYNOS_WAKEUP_FROM_LOWPWR		(1 << 28)
-#define EXYNOS_SWRESET				0x0400
-#define EXYNOS5440_SWRESET			0x00C4
-
-#define S5P_WAKEUP_STAT				0x0600
-#define S5P_EINT_WAKEUP_MASK			0x0604
-#define S5P_WAKEUP_MASK				0x0608
-#define S5P_WAKEUP_MASK2				0x0614
-
-#define S5P_INFORM0				0x0800
-#define S5P_INFORM1				0x0804
-#define S5P_INFORM5				0x0814
-#define S5P_INFORM6				0x0818
-#define S5P_INFORM7				0x081C
-#define S5P_PMU_SPARE2				0x0908
-#define S5P_PMU_SPARE3				0x090C
-
-#define EXYNOS_IROM_DATA2			0x0988
-#define S5P_ARM_CORE0_LOWPWR			0x1000
-#define S5P_DIS_IRQ_CORE0			0x1004
-#define S5P_DIS_IRQ_CENTRAL0			0x1008
-#define S5P_ARM_CORE1_LOWPWR			0x1010
-#define S5P_DIS_IRQ_CORE1			0x1014
-#define S5P_DIS_IRQ_CENTRAL1			0x1018
-#define S5P_ARM_COMMON_LOWPWR			0x1080
-#define S5P_L2_0_LOWPWR				0x10C0
-#define S5P_L2_1_LOWPWR				0x10C4
-#define S5P_CMU_ACLKSTOP_LOWPWR			0x1100
-#define S5P_CMU_SCLKSTOP_LOWPWR			0x1104
-#define S5P_CMU_RESET_LOWPWR			0x110C
-#define S5P_APLL_SYSCLK_LOWPWR			0x1120
-#define S5P_MPLL_SYSCLK_LOWPWR			0x1124
-#define S5P_VPLL_SYSCLK_LOWPWR			0x1128
-#define S5P_EPLL_SYSCLK_LOWPWR			0x112C
-#define S5P_CMU_CLKSTOP_GPS_ALIVE_LOWPWR	0x1138
-#define S5P_CMU_RESET_GPSALIVE_LOWPWR		0x113C
-#define S5P_CMU_CLKSTOP_CAM_LOWPWR		0x1140
-#define S5P_CMU_CLKSTOP_TV_LOWPWR		0x1144
-#define S5P_CMU_CLKSTOP_MFC_LOWPWR		0x1148
-#define S5P_CMU_CLKSTOP_G3D_LOWPWR		0x114C
-#define S5P_CMU_CLKSTOP_LCD0_LOWPWR		0x1150
-#define S5P_CMU_CLKSTOP_MAUDIO_LOWPWR		0x1158
-#define S5P_CMU_CLKSTOP_GPS_LOWPWR		0x115C
-#define S5P_CMU_RESET_CAM_LOWPWR		0x1160
-#define S5P_CMU_RESET_TV_LOWPWR			0x1164
-#define S5P_CMU_RESET_MFC_LOWPWR		0x1168
-#define S5P_CMU_RESET_G3D_LOWPWR		0x116C
-#define S5P_CMU_RESET_LCD0_LOWPWR		0x1170
-#define S5P_CMU_RESET_MAUDIO_LOWPWR		0x1178
-#define S5P_CMU_RESET_GPS_LOWPWR		0x117C
-#define S5P_TOP_BUS_LOWPWR			0x1180
-#define S5P_TOP_RETENTION_LOWPWR		0x1184
-#define S5P_TOP_PWR_LOWPWR			0x1188
-#define S5P_LOGIC_RESET_LOWPWR			0x11A0
-#define S5P_ONENAND_MEM_LOWPWR			0x11C0
-#define S5P_G2D_ACP_MEM_LOWPWR			0x11C8
-#define S5P_USBOTG_MEM_LOWPWR			0x11CC
-#define S5P_HSMMC_MEM_LOWPWR			0x11D0
-#define S5P_CSSYS_MEM_LOWPWR			0x11D4
-#define S5P_SECSS_MEM_LOWPWR			0x11D8
-#define S5P_PAD_RETENTION_DRAM_LOWPWR		0x1200
-#define S5P_PAD_RETENTION_MAUDIO_LOWPWR		0x1204
-#define S5P_PAD_RETENTION_GPIO_LOWPWR		0x1220
-#define S5P_PAD_RETENTION_UART_LOWPWR		0x1224
-#define S5P_PAD_RETENTION_MMCA_LOWPWR		0x1228
-#define S5P_PAD_RETENTION_MMCB_LOWPWR		0x122C
-#define S5P_PAD_RETENTION_EBIA_LOWPWR		0x1230
-#define S5P_PAD_RETENTION_EBIB_LOWPWR		0x1234
-#define S5P_PAD_RETENTION_ISOLATION_LOWPWR	0x1240
-#define S5P_PAD_RETENTION_ALV_SEL_LOWPWR	0x1260
-#define S5P_XUSBXTI_LOWPWR			0x1280
-#define S5P_XXTI_LOWPWR				0x1284
-#define S5P_EXT_REGULATOR_LOWPWR		0x12C0
-#define S5P_GPIO_MODE_LOWPWR			0x1300
-#define S5P_GPIO_MODE_MAUDIO_LOWPWR		0x1340
-#define S5P_CAM_LOWPWR				0x1380
-#define S5P_TV_LOWPWR				0x1384
-#define S5P_MFC_LOWPWR				0x1388
-#define S5P_G3D_LOWPWR				0x138C
-#define S5P_LCD0_LOWPWR				0x1390
-#define S5P_MAUDIO_LOWPWR			0x1398
-#define S5P_GPS_LOWPWR				0x139C
-#define S5P_GPS_ALIVE_LOWPWR			0x13A0
-
-#define EXYNOS_ARM_CORE0_CONFIGURATION		0x2000
-#define EXYNOS_ARM_CORE_CONFIGURATION(_nr)	\
-			(EXYNOS_ARM_CORE0_CONFIGURATION + (0x80 * (_nr)))
-#define EXYNOS_ARM_CORE_STATUS(_nr)		\
-			(EXYNOS_ARM_CORE_CONFIGURATION(_nr) + 0x4)
-#define EXYNOS_ARM_CORE_OPTION(_nr)		\
-			(EXYNOS_ARM_CORE_CONFIGURATION(_nr) + 0x8)
-
-#define EXYNOS_ARM_COMMON_CONFIGURATION		0x2500
-#define EXYNOS_COMMON_CONFIGURATION(_nr)	\
-			(EXYNOS_ARM_COMMON_CONFIGURATION + (0x80 * (_nr)))
-#define EXYNOS_COMMON_STATUS(_nr)		\
-			(EXYNOS_COMMON_CONFIGURATION(_nr) + 0x4)
-#define EXYNOS_COMMON_OPTION(_nr)		\
-			(EXYNOS_COMMON_CONFIGURATION(_nr) + 0x8)
-
-#define EXYNOS_CORE_LOCAL_PWR_EN		0x3
-
-#define EXYNOS_ARM_COMMON_STATUS		0x2504
-#define EXYNOS_COMMON_OPTION(_nr)		\
-			(EXYNOS_COMMON_CONFIGURATION(_nr) + 0x8)
-
-#define EXYNOS_ARM_L2_CONFIGURATION		0x2600
-#define EXYNOS_L2_CONFIGURATION(_nr)		\
-			(EXYNOS_ARM_L2_CONFIGURATION + ((_nr) * 0x80))
-#define EXYNOS_L2_STATUS(_nr)			\
-			(EXYNOS_L2_CONFIGURATION(_nr) + 0x4)
-#define EXYNOS_L2_OPTION(_nr)			\
-			(EXYNOS_L2_CONFIGURATION(_nr) + 0x8)
-#define EXYNOS_L2_COMMON_PWR_EN			0x3
-
-#define EXYNOS_ARM_CORE_X_STATUS_OFFSET		0x4
-
-#define EXYNOS5_APLL_SYSCLK_CONFIGURATION	0x2A00
-#define EXYNOS5_APLL_SYSCLK_STATUS		0x2A04
-
-#define EXYNOS5_ARM_L2_OPTION			0x2608
-#define EXYNOS5_USE_RETENTION			BIT(4)
-
-#define EXYNOS5_L2RSTDISABLE_VALUE		BIT(3)
-
-#define S5P_PAD_RET_MAUDIO_OPTION		0x3028
-#define S5P_PAD_RET_MMC2_OPTION			0x30c8
-#define S5P_PAD_RET_GPIO_OPTION			0x3108
-#define S5P_PAD_RET_UART_OPTION			0x3128
-#define S5P_PAD_RET_MMCA_OPTION			0x3148
-#define S5P_PAD_RET_MMCB_OPTION			0x3168
-#define S5P_PAD_RET_EBIA_OPTION			0x3188
-#define S5P_PAD_RET_EBIB_OPTION			0x31A8
-#define S5P_PAD_RET_SPI_OPTION			0x31c8
-
-#define S5P_PS_HOLD_CONTROL			0x330C
-#define S5P_PS_HOLD_EN				(1 << 31)
-#define S5P_PS_HOLD_OUTPUT_HIGH			(3 << 8)
-
-#define S5P_CAM_OPTION				0x3C08
-#define S5P_MFC_OPTION				0x3C48
-#define S5P_G3D_OPTION				0x3C68
-#define S5P_LCD0_OPTION				0x3C88
-#define S5P_LCD1_OPTION				0x3CA8
-#define S5P_ISP_OPTION				S5P_LCD1_OPTION
-
-#define S5P_CORE_LOCAL_PWR_EN			0x3
-#define S5P_CORE_WAKEUP_FROM_LOCAL_CFG		(0x3 << 8)
-#define S5P_CORE_AUTOWAKEUP_EN			(1 << 31)
-
-/* Only for EXYNOS4210 */
-#define S5P_CMU_CLKSTOP_LCD1_LOWPWR	0x1154
-#define S5P_CMU_RESET_LCD1_LOWPWR	0x1174
-#define S5P_MODIMIF_MEM_LOWPWR		0x11C4
-#define S5P_PCIE_MEM_LOWPWR		0x11E0
-#define S5P_SATA_MEM_LOWPWR		0x11E4
-#define S5P_LCD1_LOWPWR			0x1394
-
-/* Only for EXYNOS4x12 */
-#define S5P_ISP_ARM_LOWPWR			0x1050
-#define S5P_DIS_IRQ_ISP_ARM_LOCAL_LOWPWR	0x1054
-#define S5P_DIS_IRQ_ISP_ARM_CENTRAL_LOWPWR	0x1058
-#define S5P_CMU_ACLKSTOP_COREBLK_LOWPWR		0x1110
-#define S5P_CMU_SCLKSTOP_COREBLK_LOWPWR		0x1114
-#define S5P_CMU_RESET_COREBLK_LOWPWR		0x111C
-#define S5P_MPLLUSER_SYSCLK_LOWPWR		0x1130
-#define S5P_CMU_CLKSTOP_ISP_LOWPWR		0x1154
-#define S5P_CMU_RESET_ISP_LOWPWR		0x1174
-#define S5P_TOP_BUS_COREBLK_LOWPWR		0x1190
-#define S5P_TOP_RETENTION_COREBLK_LOWPWR	0x1194
-#define S5P_TOP_PWR_COREBLK_LOWPWR		0x1198
-#define S5P_OSCCLK_GATE_LOWPWR			0x11A4
-#define S5P_LOGIC_RESET_COREBLK_LOWPWR		0x11B0
-#define S5P_OSCCLK_GATE_COREBLK_LOWPWR		0x11B4
-#define S5P_HSI_MEM_LOWPWR			0x11C4
-#define S5P_ROTATOR_MEM_LOWPWR			0x11DC
-#define S5P_PAD_RETENTION_GPIO_COREBLK_LOWPWR	0x123C
-#define S5P_PAD_ISOLATION_COREBLK_LOWPWR	0x1250
-#define S5P_GPIO_MODE_COREBLK_LOWPWR		0x1320
-#define S5P_TOP_ASB_RESET_LOWPWR		0x1344
-#define S5P_TOP_ASB_ISOLATION_LOWPWR		0x1348
-#define S5P_ISP_LOWPWR				0x1394
-#define S5P_DRAM_FREQ_DOWN_LOWPWR		0x13B0
-#define S5P_DDRPHY_DLLOFF_LOWPWR		0x13B4
-#define S5P_CMU_SYSCLK_ISP_LOWPWR		0x13B8
-#define S5P_CMU_SYSCLK_GPS_LOWPWR		0x13BC
-#define S5P_LPDDR_PHY_DLL_LOCK_LOWPWR		0x13C0
-
-#define S5P_ARM_L2_0_OPTION			0x2608
-#define S5P_ARM_L2_1_OPTION			0x2628
-#define S5P_ONENAND_MEM_OPTION			0x2E08
-#define S5P_HSI_MEM_OPTION			0x2E28
-#define S5P_G2D_ACP_MEM_OPTION			0x2E48
-#define S5P_USBOTG_MEM_OPTION			0x2E68
-#define S5P_HSMMC_MEM_OPTION			0x2E88
-#define S5P_CSSYS_MEM_OPTION			0x2EA8
-#define S5P_SECSS_MEM_OPTION			0x2EC8
-#define S5P_ROTATOR_MEM_OPTION			0x2F48
-
-/* Only for EXYNOS4412 */
-#define S5P_ARM_CORE2_LOWPWR			0x1020
-#define S5P_DIS_IRQ_CORE2			0x1024
-#define S5P_DIS_IRQ_CENTRAL2			0x1028
-#define S5P_ARM_CORE3_LOWPWR			0x1030
-#define S5P_DIS_IRQ_CORE3			0x1034
-#define S5P_DIS_IRQ_CENTRAL3			0x1038
-
-/* Only for EXYNOS3XXX */
-#define EXYNOS3_ARM_CORE0_SYS_PWR_REG			0x1000
-#define EXYNOS3_DIS_IRQ_ARM_CORE0_LOCAL_SYS_PWR_REG	0x1004
-#define EXYNOS3_DIS_IRQ_ARM_CORE0_CENTRAL_SYS_PWR_REG	0x1008
-#define EXYNOS3_ARM_CORE1_SYS_PWR_REG			0x1010
-#define EXYNOS3_DIS_IRQ_ARM_CORE1_LOCAL_SYS_PWR_REG	0x1014
-#define EXYNOS3_DIS_IRQ_ARM_CORE1_CENTRAL_SYS_PWR_REG	0x1018
-#define EXYNOS3_ISP_ARM_SYS_PWR_REG			0x1050
-#define EXYNOS3_DIS_IRQ_ISP_ARM_LOCAL_SYS_PWR_REG	0x1054
-#define EXYNOS3_DIS_IRQ_ISP_ARM_CENTRAL_SYS_PWR_REG	0x1058
-#define EXYNOS3_ARM_COMMON_SYS_PWR_REG			0x1080
-#define EXYNOS3_ARM_L2_SYS_PWR_REG			0x10C0
-#define EXYNOS3_CMU_ACLKSTOP_SYS_PWR_REG		0x1100
-#define EXYNOS3_CMU_SCLKSTOP_SYS_PWR_REG		0x1104
-#define EXYNOS3_CMU_RESET_SYS_PWR_REG			0x110C
-#define EXYNOS3_CMU_ACLKSTOP_COREBLK_SYS_PWR_REG	0x1110
-#define EXYNOS3_CMU_SCLKSTOP_COREBLK_SYS_PWR_REG	0x1114
-#define EXYNOS3_CMU_RESET_COREBLK_SYS_PWR_REG		0x111C
-#define EXYNOS3_APLL_SYSCLK_SYS_PWR_REG			0x1120
-#define EXYNOS3_MPLL_SYSCLK_SYS_PWR_REG			0x1124
-#define EXYNOS3_VPLL_SYSCLK_SYS_PWR_REG			0x1128
-#define EXYNOS3_EPLL_SYSCLK_SYS_PWR_REG			0x112C
-#define EXYNOS3_MPLLUSER_SYSCLK_SYS_PWR_REG		0x1130
-#define EXYNOS3_BPLLUSER_SYSCLK_SYS_PWR_REG		0x1134
-#define EXYNOS3_EPLLUSER_SYSCLK_SYS_PWR_REG		0x1138
-#define EXYNOS3_CMU_CLKSTOP_CAM_SYS_PWR_REG		0x1140
-#define EXYNOS3_CMU_CLKSTOP_MFC_SYS_PWR_REG		0x1148
-#define EXYNOS3_CMU_CLKSTOP_G3D_SYS_PWR_REG		0x114C
-#define EXYNOS3_CMU_CLKSTOP_LCD0_SYS_PWR_REG		0x1150
-#define EXYNOS3_CMU_CLKSTOP_ISP_SYS_PWR_REG		0x1154
-#define EXYNOS3_CMU_CLKSTOP_MAUDIO_SYS_PWR_REG		0x1158
-#define EXYNOS3_CMU_RESET_CAM_SYS_PWR_REG		0x1160
-#define EXYNOS3_CMU_RESET_MFC_SYS_PWR_REG		0x1168
-#define EXYNOS3_CMU_RESET_G3D_SYS_PWR_REG		0x116C
-#define EXYNOS3_CMU_RESET_LCD0_SYS_PWR_REG		0x1170
-#define EXYNOS3_CMU_RESET_ISP_SYS_PWR_REG		0x1174
-#define EXYNOS3_CMU_RESET_MAUDIO_SYS_PWR_REG		0x1178
-#define EXYNOS3_TOP_BUS_SYS_PWR_REG			0x1180
-#define EXYNOS3_TOP_RETENTION_SYS_PWR_REG		0x1184
-#define EXYNOS3_TOP_PWR_SYS_PWR_REG			0x1188
-#define EXYNOS3_TOP_BUS_COREBLK_SYS_PWR_REG		0x1190
-#define EXYNOS3_TOP_RETENTION_COREBLK_SYS_PWR_REG	0x1194
-#define EXYNOS3_TOP_PWR_COREBLK_SYS_PWR_REG		0x1198
-#define EXYNOS3_LOGIC_RESET_SYS_PWR_REG			0x11A0
-#define EXYNOS3_OSCCLK_GATE_SYS_PWR_REG			0x11A4
-#define EXYNOS3_LOGIC_RESET_COREBLK_SYS_PWR_REG		0x11B0
-#define EXYNOS3_OSCCLK_GATE_COREBLK_SYS_PWR_REG		0x11B4
-#define EXYNOS3_PAD_RETENTION_DRAM_SYS_PWR_REG		0x1200
-#define EXYNOS3_PAD_RETENTION_MAUDIO_SYS_PWR_REG	0x1204
-#define EXYNOS3_PAD_RETENTION_SPI_SYS_PWR_REG		0x1208
-#define EXYNOS3_PAD_RETENTION_MMC2_SYS_PWR_REG		0x1218
-#define EXYNOS3_PAD_RETENTION_GPIO_SYS_PWR_REG		0x1220
-#define EXYNOS3_PAD_RETENTION_UART_SYS_PWR_REG		0x1224
-#define EXYNOS3_PAD_RETENTION_MMC0_SYS_PWR_REG		0x1228
-#define EXYNOS3_PAD_RETENTION_MMC1_SYS_PWR_REG		0x122C
-#define EXYNOS3_PAD_RETENTION_EBIA_SYS_PWR_REG		0x1230
-#define EXYNOS3_PAD_RETENTION_EBIB_SYS_PWR_REG		0x1234
-#define EXYNOS3_PAD_RETENTION_JTAG_SYS_PWR_REG		0x1238
-#define EXYNOS3_PAD_ISOLATION_SYS_PWR_REG		0x1240
-#define EXYNOS3_PAD_ALV_SEL_SYS_PWR_REG			0x1260
-#define EXYNOS3_XUSBXTI_SYS_PWR_REG			0x1280
-#define EXYNOS3_XXTI_SYS_PWR_REG			0x1284
-#define EXYNOS3_EXT_REGULATOR_SYS_PWR_REG		0x12C0
-#define EXYNOS3_EXT_REGULATOR_COREBLK_SYS_PWR_REG	0x12C4
-#define EXYNOS3_GPIO_MODE_SYS_PWR_REG			0x1300
-#define EXYNOS3_GPIO_MODE_MAUDIO_SYS_PWR_REG		0x1340
-#define EXYNOS3_TOP_ASB_RESET_SYS_PWR_REG		0x1344
-#define EXYNOS3_TOP_ASB_ISOLATION_SYS_PWR_REG		0x1348
-#define EXYNOS3_TOP_ASB_RESET_COREBLK_SYS_PWR_REG	0x1350
-#define EXYNOS3_TOP_ASB_ISOLATION_COREBLK_SYS_PWR_REG	0x1354
-#define EXYNOS3_CAM_SYS_PWR_REG				0x1380
-#define EXYNOS3_MFC_SYS_PWR_REG				0x1388
-#define EXYNOS3_G3D_SYS_PWR_REG				0x138C
-#define EXYNOS3_LCD0_SYS_PWR_REG			0x1390
-#define EXYNOS3_ISP_SYS_PWR_REG				0x1394
-#define EXYNOS3_MAUDIO_SYS_PWR_REG			0x1398
-#define EXYNOS3_DRAM_FREQ_DOWN_SYS_PWR_REG		0x13B0
-#define EXYNOS3_DDRPHY_DLLOFF_SYS_PWR_REG		0x13B4
-#define EXYNOS3_CMU_SYSCLK_ISP_SYS_PWR_REG		0x13B8
-#define EXYNOS3_LPDDR_PHY_DLL_LOCK_SYS_PWR_REG		0x13C0
-#define EXYNOS3_BPLL_SYSCLK_SYS_PWR_REG			0x13C4
-#define EXYNOS3_UPLL_SYSCLK_SYS_PWR_REG			0x13C8
-
-#define EXYNOS3_ARM_CORE0_OPTION			0x2008
-#define EXYNOS3_ARM_CORE_OPTION(_nr)	\
-			(EXYNOS3_ARM_CORE0_OPTION + ((_nr) * 0x80))
-
-#define EXYNOS3_ARM_COMMON_OPTION			0x2408
-#define EXYNOS3_ARM_L2_OPTION				0x2608
-#define EXYNOS3_TOP_PWR_OPTION				0x2C48
-#define EXYNOS3_CORE_TOP_PWR_OPTION			0x2CA8
-#define EXYNOS3_XUSBXTI_DURATION			0x341C
-#define EXYNOS3_XXTI_DURATION				0x343C
-#define EXYNOS3_EXT_REGULATOR_DURATION			0x361C
-#define EXYNOS3_EXT_REGULATOR_COREBLK_DURATION		0x363C
-#define XUSBXTI_DURATION				0x00000BB8
-#define XXTI_DURATION					XUSBXTI_DURATION
-#define EXT_REGULATOR_DURATION				0x00001D4C
-#define EXT_REGULATOR_COREBLK_DURATION			EXT_REGULATOR_DURATION
-
-/* for XXX_OPTION */
-#define EXYNOS3_OPTION_USE_SC_COUNTER			(1 << 0)
-#define EXYNOS3_OPTION_USE_SC_FEEDBACK			(1 << 1)
-#define EXYNOS3_OPTION_SKIP_DEACTIVATE_ACEACP_IN_PWDN	(1 << 7)
-
-/* For EXYNOS5 */
-
-#define EXYNOS5_AUTO_WDTRESET_DISABLE				0x0408
-#define EXYNOS5_MASK_WDTRESET_REQUEST				0x040C
-
-#define EXYNOS5_USE_RETENTION			BIT(4)
-#define EXYNOS5_SYS_WDTRESET					(1 << 20)
-
-#define EXYNOS5_ARM_CORE0_SYS_PWR_REG				0x1000
-#define EXYNOS5_DIS_IRQ_ARM_CORE0_LOCAL_SYS_PWR_REG		0x1004
-#define EXYNOS5_DIS_IRQ_ARM_CORE0_CENTRAL_SYS_PWR_REG		0x1008
-#define EXYNOS5_ARM_CORE1_SYS_PWR_REG				0x1010
-#define EXYNOS5_DIS_IRQ_ARM_CORE1_LOCAL_SYS_PWR_REG		0x1014
-#define EXYNOS5_DIS_IRQ_ARM_CORE1_CENTRAL_SYS_PWR_REG		0x1018
-#define EXYNOS5_FSYS_ARM_SYS_PWR_REG				0x1040
-#define EXYNOS5_DIS_IRQ_FSYS_ARM_CENTRAL_SYS_PWR_REG		0x1048
-#define EXYNOS5_ISP_ARM_SYS_PWR_REG				0x1050
-#define EXYNOS5_DIS_IRQ_ISP_ARM_LOCAL_SYS_PWR_REG		0x1054
-#define EXYNOS5_DIS_IRQ_ISP_ARM_CENTRAL_SYS_PWR_REG		0x1058
-#define EXYNOS5_ARM_COMMON_SYS_PWR_REG				0x1080
-#define EXYNOS5_ARM_L2_SYS_PWR_REG				0x10C0
-#define EXYNOS5_CMU_ACLKSTOP_SYS_PWR_REG			0x1100
-#define EXYNOS5_CMU_SCLKSTOP_SYS_PWR_REG			0x1104
-#define EXYNOS5_CMU_RESET_SYS_PWR_REG				0x110C
-#define EXYNOS5_CMU_ACLKSTOP_SYSMEM_SYS_PWR_REG			0x1120
-#define EXYNOS5_CMU_SCLKSTOP_SYSMEM_SYS_PWR_REG			0x1124
-#define EXYNOS5_CMU_RESET_SYSMEM_SYS_PWR_REG			0x112C
-#define EXYNOS5_DRAM_FREQ_DOWN_SYS_PWR_REG			0x1130
-#define EXYNOS5_DDRPHY_DLLOFF_SYS_PWR_REG			0x1134
-#define EXYNOS5_DDRPHY_DLLLOCK_SYS_PWR_REG			0x1138
-#define EXYNOS5_APLL_SYSCLK_SYS_PWR_REG				0x1140
-#define EXYNOS5_MPLL_SYSCLK_SYS_PWR_REG				0x1144
-#define EXYNOS5_VPLL_SYSCLK_SYS_PWR_REG				0x1148
-#define EXYNOS5_EPLL_SYSCLK_SYS_PWR_REG				0x114C
-#define EXYNOS5_BPLL_SYSCLK_SYS_PWR_REG				0x1150
-#define EXYNOS5_CPLL_SYSCLK_SYS_PWR_REG				0x1154
-#define EXYNOS5_MPLLUSER_SYSCLK_SYS_PWR_REG			0x1164
-#define EXYNOS5_BPLLUSER_SYSCLK_SYS_PWR_REG			0x1170
-#define EXYNOS5_TOP_BUS_SYS_PWR_REG				0x1180
-#define EXYNOS5_TOP_RETENTION_SYS_PWR_REG			0x1184
-#define EXYNOS5_TOP_PWR_SYS_PWR_REG				0x1188
-#define EXYNOS5_TOP_BUS_SYSMEM_SYS_PWR_REG			0x1190
-#define EXYNOS5_TOP_RETENTION_SYSMEM_SYS_PWR_REG		0x1194
-#define EXYNOS5_TOP_PWR_SYSMEM_SYS_PWR_REG			0x1198
-#define EXYNOS5_LOGIC_RESET_SYS_PWR_REG				0x11A0
-#define EXYNOS5_OSCCLK_GATE_SYS_PWR_REG				0x11A4
-#define EXYNOS5_LOGIC_RESET_SYSMEM_SYS_PWR_REG			0x11B0
-#define EXYNOS5_OSCCLK_GATE_SYSMEM_SYS_PWR_REG			0x11B4
-#define EXYNOS5_USBOTG_MEM_SYS_PWR_REG				0x11C0
-#define EXYNOS5_G2D_MEM_SYS_PWR_REG				0x11C8
-#define EXYNOS5_USBDRD_MEM_SYS_PWR_REG				0x11CC
-#define EXYNOS5_SDMMC_MEM_SYS_PWR_REG				0x11D0
-#define EXYNOS5_CSSYS_MEM_SYS_PWR_REG				0x11D4
-#define EXYNOS5_SECSS_MEM_SYS_PWR_REG				0x11D8
-#define EXYNOS5_ROTATOR_MEM_SYS_PWR_REG				0x11DC
-#define EXYNOS5_INTRAM_MEM_SYS_PWR_REG				0x11E0
-#define EXYNOS5_INTROM_MEM_SYS_PWR_REG				0x11E4
-#define EXYNOS5_JPEG_MEM_SYS_PWR_REG				0x11E8
-#define EXYNOS5_HSI_MEM_SYS_PWR_REG				0x11EC
-#define EXYNOS5_MCUIOP_MEM_SYS_PWR_REG				0x11F4
-#define EXYNOS5_SATA_MEM_SYS_PWR_REG				0x11FC
-#define EXYNOS5_PAD_RETENTION_DRAM_SYS_PWR_REG			0x1200
-#define EXYNOS5_PAD_RETENTION_MAU_SYS_PWR_REG			0x1204
-#define EXYNOS5_PAD_RETENTION_EFNAND_SYS_PWR_REG		0x1208
-#define EXYNOS5_PAD_RETENTION_GPIO_SYS_PWR_REG			0x1220
-#define EXYNOS5_PAD_RETENTION_UART_SYS_PWR_REG			0x1224
-#define EXYNOS5_PAD_RETENTION_MMCA_SYS_PWR_REG			0x1228
-#define EXYNOS5_PAD_RETENTION_MMCB_SYS_PWR_REG			0x122C
-#define EXYNOS5_PAD_RETENTION_EBIA_SYS_PWR_REG			0x1230
-#define EXYNOS5_PAD_RETENTION_EBIB_SYS_PWR_REG			0x1234
-#define EXYNOS5_PAD_RETENTION_SPI_SYS_PWR_REG			0x1238
-#define EXYNOS5_PAD_RETENTION_GPIO_SYSMEM_SYS_PWR_REG		0x123C
-#define EXYNOS5_PAD_ISOLATION_SYS_PWR_REG			0x1240
-#define EXYNOS5_PAD_ISOLATION_SYSMEM_SYS_PWR_REG		0x1250
-#define EXYNOS5_PAD_ALV_SEL_SYS_PWR_REG				0x1260
-#define EXYNOS5_XUSBXTI_SYS_PWR_REG				0x1280
-#define EXYNOS5_XXTI_SYS_PWR_REG				0x1284
-#define EXYNOS5_EXT_REGULATOR_SYS_PWR_REG			0x12C0
-#define EXYNOS5_GPIO_MODE_SYS_PWR_REG				0x1300
-#define EXYNOS5_GPIO_MODE_SYSMEM_SYS_PWR_REG			0x1320
-#define EXYNOS5_GPIO_MODE_MAU_SYS_PWR_REG			0x1340
-#define EXYNOS5_TOP_ASB_RESET_SYS_PWR_REG			0x1344
-#define EXYNOS5_TOP_ASB_ISOLATION_SYS_PWR_REG			0x1348
-#define EXYNOS5_GSCL_SYS_PWR_REG				0x1400
-#define EXYNOS5_ISP_SYS_PWR_REG					0x1404
-#define EXYNOS5_MFC_SYS_PWR_REG					0x1408
-#define EXYNOS5_G3D_SYS_PWR_REG					0x140C
-#define EXYNOS5_DISP1_SYS_PWR_REG				0x1414
-#define EXYNOS5_MAU_SYS_PWR_REG					0x1418
-#define EXYNOS5_CMU_CLKSTOP_GSCL_SYS_PWR_REG			0x1480
-#define EXYNOS5_CMU_CLKSTOP_ISP_SYS_PWR_REG			0x1484
-#define EXYNOS5_CMU_CLKSTOP_MFC_SYS_PWR_REG			0x1488
-#define EXYNOS5_CMU_CLKSTOP_G3D_SYS_PWR_REG			0x148C
-#define EXYNOS5_CMU_CLKSTOP_DISP1_SYS_PWR_REG			0x1494
-#define EXYNOS5_CMU_CLKSTOP_MAU_SYS_PWR_REG			0x1498
-#define EXYNOS5_CMU_SYSCLK_GSCL_SYS_PWR_REG			0x14C0
-#define EXYNOS5_CMU_SYSCLK_ISP_SYS_PWR_REG			0x14C4
-#define EXYNOS5_CMU_SYSCLK_MFC_SYS_PWR_REG			0x14C8
-#define EXYNOS5_CMU_SYSCLK_G3D_SYS_PWR_REG			0x14CC
-#define EXYNOS5_CMU_SYSCLK_DISP1_SYS_PWR_REG			0x14D4
-#define EXYNOS5_CMU_SYSCLK_MAU_SYS_PWR_REG			0x14D8
-#define EXYNOS5_CMU_RESET_GSCL_SYS_PWR_REG			0x1580
-#define EXYNOS5_CMU_RESET_ISP_SYS_PWR_REG			0x1584
-#define EXYNOS5_CMU_RESET_MFC_SYS_PWR_REG			0x1588
-#define EXYNOS5_CMU_RESET_G3D_SYS_PWR_REG			0x158C
-#define EXYNOS5_CMU_RESET_DISP1_SYS_PWR_REG			0x1594
-#define EXYNOS5_CMU_RESET_MAU_SYS_PWR_REG			0x1598
-
-#define EXYNOS5_ARM_CORE0_OPTION				0x2008
-#define EXYNOS5_ARM_CORE1_OPTION				0x2088
-#define EXYNOS5_FSYS_ARM_OPTION					0x2208
-#define EXYNOS5_ISP_ARM_OPTION					0x2288
-#define EXYNOS5_ARM_COMMON_OPTION				0x2408
-#define EXYNOS5_ARM_L2_OPTION					0x2608
-#define EXYNOS5_TOP_PWR_OPTION					0x2C48
-#define EXYNOS5_TOP_PWR_SYSMEM_OPTION				0x2CC8
-#define EXYNOS5_JPEG_MEM_OPTION					0x2F48
-#define EXYNOS5_GSCL_OPTION					0x4008
-#define EXYNOS5_ISP_OPTION					0x4028
-#define EXYNOS5_MFC_OPTION					0x4048
-#define EXYNOS5_G3D_OPTION					0x4068
-#define EXYNOS5_DISP1_OPTION					0x40A8
-#define EXYNOS5_MAU_OPTION					0x40C8
-
-#define EXYNOS5_USE_SC_FEEDBACK					(1 << 1)
-#define EXYNOS5_USE_SC_COUNTER					(1 << 0)
-
-#define EXYNOS5_SKIP_DEACTIVATE_ACEACP_IN_PWDN			(1 << 7)
-
-#define EXYNOS5_OPTION_USE_STANDBYWFE				(1 << 24)
-#define EXYNOS5_OPTION_USE_STANDBYWFI				(1 << 16)
-
-#define EXYNOS5_OPTION_USE_RETENTION				(1 << 4)
-
-#define EXYNOS5420_SWRESET_KFC_SEL				0x3
-
-/* Only for EXYNOS5420 */
-#define EXYNOS5420_ISP_ARM_OPTION				0x2488
-#define EXYNOS5420_L2RSTDISABLE_VALUE				BIT(3)
-
-#define EXYNOS5420_LPI_MASK					0x0004
-#define EXYNOS5420_LPI_MASK1					0x0008
-#define EXYNOS5420_UFS						BIT(8)
-#define EXYNOS5420_ATB_KFC					BIT(13)
-#define EXYNOS5420_ATB_ISP_ARM					BIT(19)
-#define EXYNOS5420_EMULATION					BIT(31)
-#define ATB_ISP_ARM						BIT(12)
-#define ATB_KFC							BIT(13)
-#define ATB_NOC							BIT(14)
-
-#define EXYNOS5420_ARM_INTR_SPREAD_ENABLE			0x0100
-#define EXYNOS5420_ARM_INTR_SPREAD_USE_STANDBYWFI		0x0104
-#define EXYNOS5420_UP_SCHEDULER					0x0120
-#define SPREAD_ENABLE						0xF
-#define SPREAD_USE_STANDWFI					0xF
-
-#define EXYNOS5420_KFC_CORE_RESET0				BIT(8)
-#define EXYNOS5420_KFC_ETM_RESET0				BIT(20)
-
-#define EXYNOS5420_KFC_CORE_RESET(_nr)				\
-	((EXYNOS5420_KFC_CORE_RESET0 | EXYNOS5420_KFC_ETM_RESET0) << (_nr))
-
-#define EXYNOS5420_BB_CON1					0x0784
-#define EXYNOS5420_BB_SEL_EN					BIT(31)
-#define EXYNOS5420_BB_PMOS_EN					BIT(7)
-#define EXYNOS5420_BB_1300X					0XF
-
-#define EXYNOS5420_ARM_CORE2_SYS_PWR_REG			0x1020
-#define EXYNOS5420_DIS_IRQ_ARM_CORE2_LOCAL_SYS_PWR_REG		0x1024
-#define EXYNOS5420_DIS_IRQ_ARM_CORE2_CENTRAL_SYS_PWR_REG	0x1028
-#define EXYNOS5420_ARM_CORE3_SYS_PWR_REG			0x1030
-#define EXYNOS5420_DIS_IRQ_ARM_CORE3_LOCAL_SYS_PWR_REG		0x1034
-#define EXYNOS5420_DIS_IRQ_ARM_CORE3_CENTRAL_SYS_PWR_REG	0x1038
-#define EXYNOS5420_KFC_CORE0_SYS_PWR_REG			0x1040
-#define EXYNOS5420_DIS_IRQ_KFC_CORE0_LOCAL_SYS_PWR_REG		0x1044
-#define EXYNOS5420_DIS_IRQ_KFC_CORE0_CENTRAL_SYS_PWR_REG	0x1048
-#define EXYNOS5420_KFC_CORE1_SYS_PWR_REG			0x1050
-#define EXYNOS5420_DIS_IRQ_KFC_CORE1_LOCAL_SYS_PWR_REG		0x1054
-#define EXYNOS5420_DIS_IRQ_KFC_CORE1_CENTRAL_SYS_PWR_REG	0x1058
-#define EXYNOS5420_KFC_CORE2_SYS_PWR_REG			0x1060
-#define EXYNOS5420_DIS_IRQ_KFC_CORE2_LOCAL_SYS_PWR_REG		0x1064
-#define EXYNOS5420_DIS_IRQ_KFC_CORE2_CENTRAL_SYS_PWR_REG	0x1068
-#define EXYNOS5420_KFC_CORE3_SYS_PWR_REG			0x1070
-#define EXYNOS5420_DIS_IRQ_KFC_CORE3_LOCAL_SYS_PWR_REG		0x1074
-#define EXYNOS5420_DIS_IRQ_KFC_CORE3_CENTRAL_SYS_PWR_REG	0x1078
-#define EXYNOS5420_ISP_ARM_SYS_PWR_REG				0x1090
-#define EXYNOS5420_DIS_IRQ_ISP_ARM_LOCAL_SYS_PWR_REG		0x1094
-#define EXYNOS5420_DIS_IRQ_ISP_ARM_CENTRAL_SYS_PWR_REG		0x1098
-#define EXYNOS5420_ARM_COMMON_SYS_PWR_REG			0x10A0
-#define EXYNOS5420_KFC_COMMON_SYS_PWR_REG			0x10B0
-#define EXYNOS5420_KFC_L2_SYS_PWR_REG				0x10D0
-#define EXYNOS5420_DPLL_SYSCLK_SYS_PWR_REG			0x1158
-#define EXYNOS5420_IPLL_SYSCLK_SYS_PWR_REG			0x115C
-#define EXYNOS5420_KPLL_SYSCLK_SYS_PWR_REG			0x1160
-#define EXYNOS5420_RPLL_SYSCLK_SYS_PWR_REG                      0x1174
-#define EXYNOS5420_SPLL_SYSCLK_SYS_PWR_REG                      0x1178
-#define EXYNOS5420_INTRAM_MEM_SYS_PWR_REG                       0x11B8
-#define EXYNOS5420_INTROM_MEM_SYS_PWR_REG                       0x11BC
-#define EXYNOS5420_ONENANDXL_MEM_SYS_PWR			0x11C0
-#define EXYNOS5420_USBDEV_MEM_SYS_PWR				0x11CC
-#define EXYNOS5420_USBDEV1_MEM_SYS_PWR				0x11D0
-#define EXYNOS5420_SDMMC_MEM_SYS_PWR				0x11D4
-#define EXYNOS5420_CSSYS_MEM_SYS_PWR				0x11D8
-#define EXYNOS5420_SECSS_MEM_SYS_PWR				0x11DC
-#define EXYNOS5420_ROTATOR_MEM_SYS_PWR				0x11E0
-#define EXYNOS5420_INTRAM_MEM_SYS_PWR				0x11E4
-#define EXYNOS5420_INTROM_MEM_SYS_PWR				0x11E8
-#define EXYNOS5420_PAD_RETENTION_JTAG_SYS_PWR_REG		0x1208
-#define EXYNOS5420_PAD_RETENTION_DRAM_SYS_PWR_REG		0x1210
-#define EXYNOS5420_PAD_RETENTION_UART_SYS_PWR_REG		0x1214
-#define EXYNOS5420_PAD_RETENTION_MMC0_SYS_PWR_REG		0x1218
-#define EXYNOS5420_PAD_RETENTION_MMC1_SYS_PWR_REG		0x121C
-#define EXYNOS5420_PAD_RETENTION_MMC2_SYS_PWR_REG		0x1220
-#define EXYNOS5420_PAD_RETENTION_HSI_SYS_PWR_REG		0x1224
-#define EXYNOS5420_PAD_RETENTION_EBIA_SYS_PWR_REG		0x1228
-#define EXYNOS5420_PAD_RETENTION_EBIB_SYS_PWR_REG		0x122C
-#define EXYNOS5420_PAD_RETENTION_SPI_SYS_PWR_REG		0x1230
-#define EXYNOS5420_PAD_RETENTION_DRAM_COREBLK_SYS_PWR_REG	0x1234
-#define EXYNOS5420_DISP1_SYS_PWR_REG				0x1410
-#define EXYNOS5420_MAU_SYS_PWR_REG				0x1414
-#define EXYNOS5420_G2D_SYS_PWR_REG				0x1418
-#define EXYNOS5420_MSC_SYS_PWR_REG				0x141C
-#define EXYNOS5420_FSYS_SYS_PWR_REG				0x1420
-#define EXYNOS5420_FSYS2_SYS_PWR_REG				0x1424
-#define EXYNOS5420_PSGEN_SYS_PWR_REG				0x1428
-#define EXYNOS5420_PERIC_SYS_PWR_REG				0x142C
-#define EXYNOS5420_WCORE_SYS_PWR_REG				0x1430
-#define EXYNOS5420_CMU_CLKSTOP_DISP1_SYS_PWR_REG		0x1490
-#define EXYNOS5420_CMU_CLKSTOP_MAU_SYS_PWR_REG			0x1494
-#define EXYNOS5420_CMU_CLKSTOP_G2D_SYS_PWR_REG			0x1498
-#define EXYNOS5420_CMU_CLKSTOP_MSC_SYS_PWR_REG			0x149C
-#define EXYNOS5420_CMU_CLKSTOP_FSYS_SYS_PWR_REG			0x14A0
-#define EXYNOS5420_CMU_CLKSTOP_FSYS2_SYS_PWR_REG		0x14A4
-#define EXYNOS5420_CMU_CLKSTOP_PSGEN_SYS_PWR_REG		0x14A8
-#define EXYNOS5420_CMU_CLKSTOP_PERIC_SYS_PWR_REG		0x14AC
-#define EXYNOS5420_CMU_CLKSTOP_WCORE_SYS_PWR_REG		0x14B0
-#define EXYNOS5420_CMU_SYSCLK_TOPPWR_SYS_PWR_REG		0x14BC
-#define EXYNOS5420_CMU_SYSCLK_DISP1_SYS_PWR_REG			0x14D0
-#define EXYNOS5420_CMU_SYSCLK_MAU_SYS_PWR_REG			0x14D4
-#define EXYNOS5420_CMU_SYSCLK_G2D_SYS_PWR_REG			0x14D8
-#define EXYNOS5420_CMU_SYSCLK_MSC_SYS_PWR_REG			0x14DC
-#define EXYNOS5420_CMU_SYSCLK_FSYS_SYS_PWR_REG			0x14E0
-#define EXYNOS5420_CMU_SYSCLK_FSYS2_SYS_PWR_REG			0x14E4
-#define EXYNOS5420_CMU_SYSCLK_PSGEN_SYS_PWR_REG			0x14E8
-#define EXYNOS5420_CMU_SYSCLK_PERIC_SYS_PWR_REG			0x14EC
-#define EXYNOS5420_CMU_SYSCLK_WCORE_SYS_PWR_REG			0x14F0
-#define EXYNOS5420_CMU_SYSCLK_SYSMEM_TOPPWR_SYS_PWR_REG		0x14F4
-#define EXYNOS5420_CMU_RESET_FSYS2_SYS_PWR_REG			0x1570
-#define EXYNOS5420_CMU_RESET_PSGEN_SYS_PWR_REG			0x1574
-#define EXYNOS5420_CMU_RESET_PERIC_SYS_PWR_REG			0x1578
-#define EXYNOS5420_CMU_RESET_WCORE_SYS_PWR_REG			0x157C
-#define EXYNOS5420_CMU_RESET_DISP1_SYS_PWR_REG			0x1590
-#define EXYNOS5420_CMU_RESET_MAU_SYS_PWR_REG			0x1594
-#define EXYNOS5420_CMU_RESET_G2D_SYS_PWR_REG			0x1598
-#define EXYNOS5420_CMU_RESET_MSC_SYS_PWR_REG			0x159C
-#define EXYNOS5420_CMU_RESET_FSYS_SYS_PWR_REG			0x15A0
-#define EXYNOS5420_SFR_AXI_CGDIS1				0x15E4
-#define EXYNOS_ARM_CORE2_CONFIGURATION				0x2100
-#define EXYNOS5420_ARM_CORE2_OPTION				0x2108
-#define EXYNOS_ARM_CORE3_CONFIGURATION				0x2180
-#define EXYNOS5420_ARM_CORE3_OPTION				0x2188
-#define EXYNOS5420_ARM_COMMON_STATUS				0x2504
-#define EXYNOS5420_ARM_COMMON_OPTION				0x2508
-#define EXYNOS5420_KFC_COMMON_STATUS				0x2584
-#define EXYNOS5420_KFC_COMMON_OPTION				0x2588
-#define EXYNOS5420_LOGIC_RESET_DURATION3			0x2D1C
-
-#define EXYNOS5420_PAD_RET_GPIO_OPTION				0x30C8
-#define EXYNOS5420_PAD_RET_UART_OPTION				0x30E8
-#define EXYNOS5420_PAD_RET_MMCA_OPTION				0x3108
-#define EXYNOS5420_PAD_RET_MMCB_OPTION				0x3128
-#define EXYNOS5420_PAD_RET_MMCC_OPTION				0x3148
-#define EXYNOS5420_PAD_RET_HSI_OPTION				0x3168
-#define EXYNOS5420_PAD_RET_SPI_OPTION				0x31C8
-#define EXYNOS5420_PAD_RET_DRAM_COREBLK_OPTION			0x31E8
-#define EXYNOS_PAD_RET_DRAM_OPTION				0x3008
-#define EXYNOS_PAD_RET_MAUDIO_OPTION				0x3028
-#define EXYNOS_PAD_RET_JTAG_OPTION				0x3048
-#define EXYNOS_PAD_RET_GPIO_OPTION				0x3108
-#define EXYNOS_PAD_RET_UART_OPTION				0x3128
-#define EXYNOS_PAD_RET_MMCA_OPTION				0x3148
-#define EXYNOS_PAD_RET_MMCB_OPTION				0x3168
-#define EXYNOS_PAD_RET_EBIA_OPTION				0x3188
-#define EXYNOS_PAD_RET_EBIB_OPTION				0x31A8
-
-#define EXYNOS_PS_HOLD_CONTROL					0x330C
-
-/* For SYS_PWR_REG */
-#define EXYNOS_SYS_PWR_CFG					BIT(0)
-
-#define EXYNOS5420_MFC_CONFIGURATION				0x4060
-#define EXYNOS5420_MFC_STATUS					0x4064
-#define EXYNOS5420_MFC_OPTION					0x4068
-#define EXYNOS5420_G3D_CONFIGURATION				0x4080
-#define EXYNOS5420_G3D_STATUS					0x4084
-#define EXYNOS5420_G3D_OPTION					0x4088
-#define EXYNOS5420_DISP0_CONFIGURATION				0x40A0
-#define EXYNOS5420_DISP0_STATUS					0x40A4
-#define EXYNOS5420_DISP0_OPTION					0x40A8
-#define EXYNOS5420_DISP1_CONFIGURATION				0x40C0
-#define EXYNOS5420_DISP1_STATUS					0x40C4
-#define EXYNOS5420_DISP1_OPTION					0x40C8
-#define EXYNOS5420_MAU_CONFIGURATION				0x40E0
-#define EXYNOS5420_MAU_STATUS					0x40E4
-#define EXYNOS5420_MAU_OPTION					0x40E8
-#define EXYNOS5420_FSYS2_OPTION					0x4168
-#define EXYNOS5420_PSGEN_OPTION					0x4188
-
-/* For EXYNOS_CENTRAL_SEQ_OPTION */
-#define EXYNOS5_USE_STANDBYWFI_ARM_CORE0			BIT(16)
-#define EXYNOS5_USE_STANDBYWFI_ARM_CORE1			BUT(17)
-#define EXYNOS5_USE_STANDBYWFE_ARM_CORE0			BIT(24)
-#define EXYNOS5_USE_STANDBYWFE_ARM_CORE1			BIT(25)
-
-#define EXYNOS5420_ARM_USE_STANDBY_WFI0				BIT(4)
-#define EXYNOS5420_ARM_USE_STANDBY_WFI1				BIT(5)
-#define EXYNOS5420_ARM_USE_STANDBY_WFI2				BIT(6)
-#define EXYNOS5420_ARM_USE_STANDBY_WFI3				BIT(7)
-#define EXYNOS5420_KFC_USE_STANDBY_WFI0				BIT(8)
-#define EXYNOS5420_KFC_USE_STANDBY_WFI1				BIT(9)
-#define EXYNOS5420_KFC_USE_STANDBY_WFI2				BIT(10)
-#define EXYNOS5420_KFC_USE_STANDBY_WFI3				BIT(11)
-#define EXYNOS5420_ARM_USE_STANDBY_WFE0				BIT(16)
-#define EXYNOS5420_ARM_USE_STANDBY_WFE1				BIT(17)
-#define EXYNOS5420_ARM_USE_STANDBY_WFE2				BIT(18)
-#define EXYNOS5420_ARM_USE_STANDBY_WFE3				BIT(19)
-#define EXYNOS5420_KFC_USE_STANDBY_WFE0				BIT(20)
-#define EXYNOS5420_KFC_USE_STANDBY_WFE1				BIT(21)
-#define EXYNOS5420_KFC_USE_STANDBY_WFE2				BIT(22)
-#define EXYNOS5420_KFC_USE_STANDBY_WFE3				BIT(23)
-
-#define DUR_WAIT_RESET				0xF
-
-#define EXYNOS5420_USE_STANDBY_WFI_ALL	(EXYNOS5420_ARM_USE_STANDBY_WFI0    \
-					 | EXYNOS5420_ARM_USE_STANDBY_WFI1  \
-					 | EXYNOS5420_ARM_USE_STANDBY_WFI2  \
-					 | EXYNOS5420_ARM_USE_STANDBY_WFI3  \
-					 | EXYNOS5420_KFC_USE_STANDBY_WFI0  \
-					 | EXYNOS5420_KFC_USE_STANDBY_WFI1  \
-					 | EXYNOS5420_KFC_USE_STANDBY_WFI2  \
-					 | EXYNOS5420_KFC_USE_STANDBY_WFI3)
-
-#endif /* __ASM_ARCH_REGS_PMU_H */
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index c169cc3049aa..fee2b003e662 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -24,6 +24,8 @@
 #include <linux/of_address.h>
 #include <linux/err.h>
 #include <linux/regulator/machine.h>
+#include <linux/soc/samsung/exynos-pmu.h>
+#include <linux/soc/samsung/exynos-regs-pmu.h>
 
 #include <asm/cacheflush.h>
 #include <asm/hardware/cache-l2x0.h>
@@ -35,8 +37,6 @@
 #include <plat/pm-common.h>
 
 #include "common.h"
-#include "exynos-pmu.h"
-#include "regs-pmu.h"
 #include "regs-srom.h"
 
 #define REG_TABLE_END (-1U)
diff --git a/include/linux/soc/samsung/exynos-pmu.h b/include/linux/soc/samsung/exynos-pmu.h
new file mode 100644
index 000000000000..e2e9de1acc5b
--- /dev/null
+++ b/include/linux/soc/samsung/exynos-pmu.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * Header for EXYNOS PMU Driver support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_SOC_EXYNOS_PMU_H
+#define __LINUX_SOC_EXYNOS_PMU_H
+
+enum sys_powerdown {
+	SYS_AFTR,
+	SYS_LPA,
+	SYS_SLEEP,
+	NUM_SYS_POWERDOWN,
+};
+
+extern void exynos_sys_powerdown_conf(enum sys_powerdown mode);
+
+#endif /* __LINUX_SOC_EXYNOS_PMU_H */
diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
new file mode 100644
index 000000000000..d30186e2b609
--- /dev/null
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -0,0 +1,693 @@
+/*
+ * Copyright (c) 2010-2012 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * EXYNOS - Power management unit definition
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#ifndef __LINUX_SOC_EXYNOS_REGS_PMU_H
+#define __LINUX_SOC_EXYNOS_REGS_PMU_H __FILE__
+
+#define S5P_CENTRAL_SEQ_CONFIGURATION		0x0200
+
+#define S5P_CENTRAL_LOWPWR_CFG			(1 << 16)
+
+#define S5P_CENTRAL_SEQ_OPTION			0x0208
+
+#define S5P_USE_STANDBY_WFI0			(1 << 16)
+#define S5P_USE_STANDBY_WFI1			(1 << 17)
+#define S5P_USE_STANDBY_WFI2			(1 << 19)
+#define S5P_USE_STANDBY_WFI3			(1 << 20)
+#define S5P_USE_STANDBY_WFE0			(1 << 24)
+#define S5P_USE_STANDBY_WFE1			(1 << 25)
+#define S5P_USE_STANDBY_WFE2			(1 << 27)
+#define S5P_USE_STANDBY_WFE3			(1 << 28)
+
+#define S5P_USE_STANDBY_WFI_ALL \
+	(S5P_USE_STANDBY_WFI0 | S5P_USE_STANDBY_WFI1 | \
+	 S5P_USE_STANDBY_WFI2 | S5P_USE_STANDBY_WFI3 | \
+	 S5P_USE_STANDBY_WFE0 | S5P_USE_STANDBY_WFE1 | \
+	 S5P_USE_STANDBY_WFE2 | S5P_USE_STANDBY_WFE3)
+
+#define S5P_USE_DELAYED_RESET_ASSERTION		BIT(12)
+
+#define EXYNOS_CORE_PO_RESET(n)			((1 << 4) << n)
+#define EXYNOS_WAKEUP_FROM_LOWPWR		(1 << 28)
+#define EXYNOS_SWRESET				0x0400
+#define EXYNOS5440_SWRESET			0x00C4
+
+#define S5P_WAKEUP_STAT				0x0600
+#define S5P_EINT_WAKEUP_MASK			0x0604
+#define S5P_WAKEUP_MASK				0x0608
+#define S5P_WAKEUP_MASK2				0x0614
+
+#define S5P_INFORM0				0x0800
+#define S5P_INFORM1				0x0804
+#define S5P_INFORM5				0x0814
+#define S5P_INFORM6				0x0818
+#define S5P_INFORM7				0x081C
+#define S5P_PMU_SPARE2				0x0908
+#define S5P_PMU_SPARE3				0x090C
+
+#define EXYNOS_IROM_DATA2			0x0988
+#define S5P_ARM_CORE0_LOWPWR			0x1000
+#define S5P_DIS_IRQ_CORE0			0x1004
+#define S5P_DIS_IRQ_CENTRAL0			0x1008
+#define S5P_ARM_CORE1_LOWPWR			0x1010
+#define S5P_DIS_IRQ_CORE1			0x1014
+#define S5P_DIS_IRQ_CENTRAL1			0x1018
+#define S5P_ARM_COMMON_LOWPWR			0x1080
+#define S5P_L2_0_LOWPWR				0x10C0
+#define S5P_L2_1_LOWPWR				0x10C4
+#define S5P_CMU_ACLKSTOP_LOWPWR			0x1100
+#define S5P_CMU_SCLKSTOP_LOWPWR			0x1104
+#define S5P_CMU_RESET_LOWPWR			0x110C
+#define S5P_APLL_SYSCLK_LOWPWR			0x1120
+#define S5P_MPLL_SYSCLK_LOWPWR			0x1124
+#define S5P_VPLL_SYSCLK_LOWPWR			0x1128
+#define S5P_EPLL_SYSCLK_LOWPWR			0x112C
+#define S5P_CMU_CLKSTOP_GPS_ALIVE_LOWPWR	0x1138
+#define S5P_CMU_RESET_GPSALIVE_LOWPWR		0x113C
+#define S5P_CMU_CLKSTOP_CAM_LOWPWR		0x1140
+#define S5P_CMU_CLKSTOP_TV_LOWPWR		0x1144
+#define S5P_CMU_CLKSTOP_MFC_LOWPWR		0x1148
+#define S5P_CMU_CLKSTOP_G3D_LOWPWR		0x114C
+#define S5P_CMU_CLKSTOP_LCD0_LOWPWR		0x1150
+#define S5P_CMU_CLKSTOP_MAUDIO_LOWPWR		0x1158
+#define S5P_CMU_CLKSTOP_GPS_LOWPWR		0x115C
+#define S5P_CMU_RESET_CAM_LOWPWR		0x1160
+#define S5P_CMU_RESET_TV_LOWPWR			0x1164
+#define S5P_CMU_RESET_MFC_LOWPWR		0x1168
+#define S5P_CMU_RESET_G3D_LOWPWR		0x116C
+#define S5P_CMU_RESET_LCD0_LOWPWR		0x1170
+#define S5P_CMU_RESET_MAUDIO_LOWPWR		0x1178
+#define S5P_CMU_RESET_GPS_LOWPWR		0x117C
+#define S5P_TOP_BUS_LOWPWR			0x1180
+#define S5P_TOP_RETENTION_LOWPWR		0x1184
+#define S5P_TOP_PWR_LOWPWR			0x1188
+#define S5P_LOGIC_RESET_LOWPWR			0x11A0
+#define S5P_ONENAND_MEM_LOWPWR			0x11C0
+#define S5P_G2D_ACP_MEM_LOWPWR			0x11C8
+#define S5P_USBOTG_MEM_LOWPWR			0x11CC
+#define S5P_HSMMC_MEM_LOWPWR			0x11D0
+#define S5P_CSSYS_MEM_LOWPWR			0x11D4
+#define S5P_SECSS_MEM_LOWPWR			0x11D8
+#define S5P_PAD_RETENTION_DRAM_LOWPWR		0x1200
+#define S5P_PAD_RETENTION_MAUDIO_LOWPWR		0x1204
+#define S5P_PAD_RETENTION_GPIO_LOWPWR		0x1220
+#define S5P_PAD_RETENTION_UART_LOWPWR		0x1224
+#define S5P_PAD_RETENTION_MMCA_LOWPWR		0x1228
+#define S5P_PAD_RETENTION_MMCB_LOWPWR		0x122C
+#define S5P_PAD_RETENTION_EBIA_LOWPWR		0x1230
+#define S5P_PAD_RETENTION_EBIB_LOWPWR		0x1234
+#define S5P_PAD_RETENTION_ISOLATION_LOWPWR	0x1240
+#define S5P_PAD_RETENTION_ALV_SEL_LOWPWR	0x1260
+#define S5P_XUSBXTI_LOWPWR			0x1280
+#define S5P_XXTI_LOWPWR				0x1284
+#define S5P_EXT_REGULATOR_LOWPWR		0x12C0
+#define S5P_GPIO_MODE_LOWPWR			0x1300
+#define S5P_GPIO_MODE_MAUDIO_LOWPWR		0x1340
+#define S5P_CAM_LOWPWR				0x1380
+#define S5P_TV_LOWPWR				0x1384
+#define S5P_MFC_LOWPWR				0x1388
+#define S5P_G3D_LOWPWR				0x138C
+#define S5P_LCD0_LOWPWR				0x1390
+#define S5P_MAUDIO_LOWPWR			0x1398
+#define S5P_GPS_LOWPWR				0x139C
+#define S5P_GPS_ALIVE_LOWPWR			0x13A0
+
+#define EXYNOS_ARM_CORE0_CONFIGURATION		0x2000
+#define EXYNOS_ARM_CORE_CONFIGURATION(_nr)	\
+			(EXYNOS_ARM_CORE0_CONFIGURATION + (0x80 * (_nr)))
+#define EXYNOS_ARM_CORE_STATUS(_nr)		\
+			(EXYNOS_ARM_CORE_CONFIGURATION(_nr) + 0x4)
+#define EXYNOS_ARM_CORE_OPTION(_nr)		\
+			(EXYNOS_ARM_CORE_CONFIGURATION(_nr) + 0x8)
+
+#define EXYNOS_ARM_COMMON_CONFIGURATION		0x2500
+#define EXYNOS_COMMON_CONFIGURATION(_nr)	\
+			(EXYNOS_ARM_COMMON_CONFIGURATION + (0x80 * (_nr)))
+#define EXYNOS_COMMON_STATUS(_nr)		\
+			(EXYNOS_COMMON_CONFIGURATION(_nr) + 0x4)
+#define EXYNOS_COMMON_OPTION(_nr)		\
+			(EXYNOS_COMMON_CONFIGURATION(_nr) + 0x8)
+
+#define EXYNOS_CORE_LOCAL_PWR_EN		0x3
+
+#define EXYNOS_ARM_COMMON_STATUS		0x2504
+#define EXYNOS_COMMON_OPTION(_nr)		\
+			(EXYNOS_COMMON_CONFIGURATION(_nr) + 0x8)
+
+#define EXYNOS_ARM_L2_CONFIGURATION		0x2600
+#define EXYNOS_L2_CONFIGURATION(_nr)		\
+			(EXYNOS_ARM_L2_CONFIGURATION + ((_nr) * 0x80))
+#define EXYNOS_L2_STATUS(_nr)			\
+			(EXYNOS_L2_CONFIGURATION(_nr) + 0x4)
+#define EXYNOS_L2_OPTION(_nr)			\
+			(EXYNOS_L2_CONFIGURATION(_nr) + 0x8)
+#define EXYNOS_L2_COMMON_PWR_EN			0x3
+
+#define EXYNOS_ARM_CORE_X_STATUS_OFFSET		0x4
+
+#define EXYNOS5_APLL_SYSCLK_CONFIGURATION	0x2A00
+#define EXYNOS5_APLL_SYSCLK_STATUS		0x2A04
+
+#define EXYNOS5_ARM_L2_OPTION			0x2608
+#define EXYNOS5_USE_RETENTION			BIT(4)
+
+#define EXYNOS5_L2RSTDISABLE_VALUE		BIT(3)
+
+#define S5P_PAD_RET_MAUDIO_OPTION		0x3028
+#define S5P_PAD_RET_MMC2_OPTION			0x30c8
+#define S5P_PAD_RET_GPIO_OPTION			0x3108
+#define S5P_PAD_RET_UART_OPTION			0x3128
+#define S5P_PAD_RET_MMCA_OPTION			0x3148
+#define S5P_PAD_RET_MMCB_OPTION			0x3168
+#define S5P_PAD_RET_EBIA_OPTION			0x3188
+#define S5P_PAD_RET_EBIB_OPTION			0x31A8
+#define S5P_PAD_RET_SPI_OPTION			0x31c8
+
+#define S5P_PS_HOLD_CONTROL			0x330C
+#define S5P_PS_HOLD_EN				(1 << 31)
+#define S5P_PS_HOLD_OUTPUT_HIGH			(3 << 8)
+
+#define S5P_CAM_OPTION				0x3C08
+#define S5P_MFC_OPTION				0x3C48
+#define S5P_G3D_OPTION				0x3C68
+#define S5P_LCD0_OPTION				0x3C88
+#define S5P_LCD1_OPTION				0x3CA8
+#define S5P_ISP_OPTION				S5P_LCD1_OPTION
+
+#define S5P_CORE_LOCAL_PWR_EN			0x3
+#define S5P_CORE_WAKEUP_FROM_LOCAL_CFG		(0x3 << 8)
+#define S5P_CORE_AUTOWAKEUP_EN			(1 << 31)
+
+/* Only for EXYNOS4210 */
+#define S5P_CMU_CLKSTOP_LCD1_LOWPWR	0x1154
+#define S5P_CMU_RESET_LCD1_LOWPWR	0x1174
+#define S5P_MODIMIF_MEM_LOWPWR		0x11C4
+#define S5P_PCIE_MEM_LOWPWR		0x11E0
+#define S5P_SATA_MEM_LOWPWR		0x11E4
+#define S5P_LCD1_LOWPWR			0x1394
+
+/* Only for EXYNOS4x12 */
+#define S5P_ISP_ARM_LOWPWR			0x1050
+#define S5P_DIS_IRQ_ISP_ARM_LOCAL_LOWPWR	0x1054
+#define S5P_DIS_IRQ_ISP_ARM_CENTRAL_LOWPWR	0x1058
+#define S5P_CMU_ACLKSTOP_COREBLK_LOWPWR		0x1110
+#define S5P_CMU_SCLKSTOP_COREBLK_LOWPWR		0x1114
+#define S5P_CMU_RESET_COREBLK_LOWPWR		0x111C
+#define S5P_MPLLUSER_SYSCLK_LOWPWR		0x1130
+#define S5P_CMU_CLKSTOP_ISP_LOWPWR		0x1154
+#define S5P_CMU_RESET_ISP_LOWPWR		0x1174
+#define S5P_TOP_BUS_COREBLK_LOWPWR		0x1190
+#define S5P_TOP_RETENTION_COREBLK_LOWPWR	0x1194
+#define S5P_TOP_PWR_COREBLK_LOWPWR		0x1198
+#define S5P_OSCCLK_GATE_LOWPWR			0x11A4
+#define S5P_LOGIC_RESET_COREBLK_LOWPWR		0x11B0
+#define S5P_OSCCLK_GATE_COREBLK_LOWPWR		0x11B4
+#define S5P_HSI_MEM_LOWPWR			0x11C4
+#define S5P_ROTATOR_MEM_LOWPWR			0x11DC
+#define S5P_PAD_RETENTION_GPIO_COREBLK_LOWPWR	0x123C
+#define S5P_PAD_ISOLATION_COREBLK_LOWPWR	0x1250
+#define S5P_GPIO_MODE_COREBLK_LOWPWR		0x1320
+#define S5P_TOP_ASB_RESET_LOWPWR		0x1344
+#define S5P_TOP_ASB_ISOLATION_LOWPWR		0x1348
+#define S5P_ISP_LOWPWR				0x1394
+#define S5P_DRAM_FREQ_DOWN_LOWPWR		0x13B0
+#define S5P_DDRPHY_DLLOFF_LOWPWR		0x13B4
+#define S5P_CMU_SYSCLK_ISP_LOWPWR		0x13B8
+#define S5P_CMU_SYSCLK_GPS_LOWPWR		0x13BC
+#define S5P_LPDDR_PHY_DLL_LOCK_LOWPWR		0x13C0
+
+#define S5P_ARM_L2_0_OPTION			0x2608
+#define S5P_ARM_L2_1_OPTION			0x2628
+#define S5P_ONENAND_MEM_OPTION			0x2E08
+#define S5P_HSI_MEM_OPTION			0x2E28
+#define S5P_G2D_ACP_MEM_OPTION			0x2E48
+#define S5P_USBOTG_MEM_OPTION			0x2E68
+#define S5P_HSMMC_MEM_OPTION			0x2E88
+#define S5P_CSSYS_MEM_OPTION			0x2EA8
+#define S5P_SECSS_MEM_OPTION			0x2EC8
+#define S5P_ROTATOR_MEM_OPTION			0x2F48
+
+/* Only for EXYNOS4412 */
+#define S5P_ARM_CORE2_LOWPWR			0x1020
+#define S5P_DIS_IRQ_CORE2			0x1024
+#define S5P_DIS_IRQ_CENTRAL2			0x1028
+#define S5P_ARM_CORE3_LOWPWR			0x1030
+#define S5P_DIS_IRQ_CORE3			0x1034
+#define S5P_DIS_IRQ_CENTRAL3			0x1038
+
+/* Only for EXYNOS3XXX */
+#define EXYNOS3_ARM_CORE0_SYS_PWR_REG			0x1000
+#define EXYNOS3_DIS_IRQ_ARM_CORE0_LOCAL_SYS_PWR_REG	0x1004
+#define EXYNOS3_DIS_IRQ_ARM_CORE0_CENTRAL_SYS_PWR_REG	0x1008
+#define EXYNOS3_ARM_CORE1_SYS_PWR_REG			0x1010
+#define EXYNOS3_DIS_IRQ_ARM_CORE1_LOCAL_SYS_PWR_REG	0x1014
+#define EXYNOS3_DIS_IRQ_ARM_CORE1_CENTRAL_SYS_PWR_REG	0x1018
+#define EXYNOS3_ISP_ARM_SYS_PWR_REG			0x1050
+#define EXYNOS3_DIS_IRQ_ISP_ARM_LOCAL_SYS_PWR_REG	0x1054
+#define EXYNOS3_DIS_IRQ_ISP_ARM_CENTRAL_SYS_PWR_REG	0x1058
+#define EXYNOS3_ARM_COMMON_SYS_PWR_REG			0x1080
+#define EXYNOS3_ARM_L2_SYS_PWR_REG			0x10C0
+#define EXYNOS3_CMU_ACLKSTOP_SYS_PWR_REG		0x1100
+#define EXYNOS3_CMU_SCLKSTOP_SYS_PWR_REG		0x1104
+#define EXYNOS3_CMU_RESET_SYS_PWR_REG			0x110C
+#define EXYNOS3_CMU_ACLKSTOP_COREBLK_SYS_PWR_REG	0x1110
+#define EXYNOS3_CMU_SCLKSTOP_COREBLK_SYS_PWR_REG	0x1114
+#define EXYNOS3_CMU_RESET_COREBLK_SYS_PWR_REG		0x111C
+#define EXYNOS3_APLL_SYSCLK_SYS_PWR_REG			0x1120
+#define EXYNOS3_MPLL_SYSCLK_SYS_PWR_REG			0x1124
+#define EXYNOS3_VPLL_SYSCLK_SYS_PWR_REG			0x1128
+#define EXYNOS3_EPLL_SYSCLK_SYS_PWR_REG			0x112C
+#define EXYNOS3_MPLLUSER_SYSCLK_SYS_PWR_REG		0x1130
+#define EXYNOS3_BPLLUSER_SYSCLK_SYS_PWR_REG		0x1134
+#define EXYNOS3_EPLLUSER_SYSCLK_SYS_PWR_REG		0x1138
+#define EXYNOS3_CMU_CLKSTOP_CAM_SYS_PWR_REG		0x1140
+#define EXYNOS3_CMU_CLKSTOP_MFC_SYS_PWR_REG		0x1148
+#define EXYNOS3_CMU_CLKSTOP_G3D_SYS_PWR_REG		0x114C
+#define EXYNOS3_CMU_CLKSTOP_LCD0_SYS_PWR_REG		0x1150
+#define EXYNOS3_CMU_CLKSTOP_ISP_SYS_PWR_REG		0x1154
+#define EXYNOS3_CMU_CLKSTOP_MAUDIO_SYS_PWR_REG		0x1158
+#define EXYNOS3_CMU_RESET_CAM_SYS_PWR_REG		0x1160
+#define EXYNOS3_CMU_RESET_MFC_SYS_PWR_REG		0x1168
+#define EXYNOS3_CMU_RESET_G3D_SYS_PWR_REG		0x116C
+#define EXYNOS3_CMU_RESET_LCD0_SYS_PWR_REG		0x1170
+#define EXYNOS3_CMU_RESET_ISP_SYS_PWR_REG		0x1174
+#define EXYNOS3_CMU_RESET_MAUDIO_SYS_PWR_REG		0x1178
+#define EXYNOS3_TOP_BUS_SYS_PWR_REG			0x1180
+#define EXYNOS3_TOP_RETENTION_SYS_PWR_REG		0x1184
+#define EXYNOS3_TOP_PWR_SYS_PWR_REG			0x1188
+#define EXYNOS3_TOP_BUS_COREBLK_SYS_PWR_REG		0x1190
+#define EXYNOS3_TOP_RETENTION_COREBLK_SYS_PWR_REG	0x1194
+#define EXYNOS3_TOP_PWR_COREBLK_SYS_PWR_REG		0x1198
+#define EXYNOS3_LOGIC_RESET_SYS_PWR_REG			0x11A0
+#define EXYNOS3_OSCCLK_GATE_SYS_PWR_REG			0x11A4
+#define EXYNOS3_LOGIC_RESET_COREBLK_SYS_PWR_REG		0x11B0
+#define EXYNOS3_OSCCLK_GATE_COREBLK_SYS_PWR_REG		0x11B4
+#define EXYNOS3_PAD_RETENTION_DRAM_SYS_PWR_REG		0x1200
+#define EXYNOS3_PAD_RETENTION_MAUDIO_SYS_PWR_REG	0x1204
+#define EXYNOS3_PAD_RETENTION_SPI_SYS_PWR_REG		0x1208
+#define EXYNOS3_PAD_RETENTION_MMC2_SYS_PWR_REG		0x1218
+#define EXYNOS3_PAD_RETENTION_GPIO_SYS_PWR_REG		0x1220
+#define EXYNOS3_PAD_RETENTION_UART_SYS_PWR_REG		0x1224
+#define EXYNOS3_PAD_RETENTION_MMC0_SYS_PWR_REG		0x1228
+#define EXYNOS3_PAD_RETENTION_MMC1_SYS_PWR_REG		0x122C
+#define EXYNOS3_PAD_RETENTION_EBIA_SYS_PWR_REG		0x1230
+#define EXYNOS3_PAD_RETENTION_EBIB_SYS_PWR_REG		0x1234
+#define EXYNOS3_PAD_RETENTION_JTAG_SYS_PWR_REG		0x1238
+#define EXYNOS3_PAD_ISOLATION_SYS_PWR_REG		0x1240
+#define EXYNOS3_PAD_ALV_SEL_SYS_PWR_REG			0x1260
+#define EXYNOS3_XUSBXTI_SYS_PWR_REG			0x1280
+#define EXYNOS3_XXTI_SYS_PWR_REG			0x1284
+#define EXYNOS3_EXT_REGULATOR_SYS_PWR_REG		0x12C0
+#define EXYNOS3_EXT_REGULATOR_COREBLK_SYS_PWR_REG	0x12C4
+#define EXYNOS3_GPIO_MODE_SYS_PWR_REG			0x1300
+#define EXYNOS3_GPIO_MODE_MAUDIO_SYS_PWR_REG		0x1340
+#define EXYNOS3_TOP_ASB_RESET_SYS_PWR_REG		0x1344
+#define EXYNOS3_TOP_ASB_ISOLATION_SYS_PWR_REG		0x1348
+#define EXYNOS3_TOP_ASB_RESET_COREBLK_SYS_PWR_REG	0x1350
+#define EXYNOS3_TOP_ASB_ISOLATION_COREBLK_SYS_PWR_REG	0x1354
+#define EXYNOS3_CAM_SYS_PWR_REG				0x1380
+#define EXYNOS3_MFC_SYS_PWR_REG				0x1388
+#define EXYNOS3_G3D_SYS_PWR_REG				0x138C
+#define EXYNOS3_LCD0_SYS_PWR_REG			0x1390
+#define EXYNOS3_ISP_SYS_PWR_REG				0x1394
+#define EXYNOS3_MAUDIO_SYS_PWR_REG			0x1398
+#define EXYNOS3_DRAM_FREQ_DOWN_SYS_PWR_REG		0x13B0
+#define EXYNOS3_DDRPHY_DLLOFF_SYS_PWR_REG		0x13B4
+#define EXYNOS3_CMU_SYSCLK_ISP_SYS_PWR_REG		0x13B8
+#define EXYNOS3_LPDDR_PHY_DLL_LOCK_SYS_PWR_REG		0x13C0
+#define EXYNOS3_BPLL_SYSCLK_SYS_PWR_REG			0x13C4
+#define EXYNOS3_UPLL_SYSCLK_SYS_PWR_REG			0x13C8
+
+#define EXYNOS3_ARM_CORE0_OPTION			0x2008
+#define EXYNOS3_ARM_CORE_OPTION(_nr)	\
+			(EXYNOS3_ARM_CORE0_OPTION + ((_nr) * 0x80))
+
+#define EXYNOS3_ARM_COMMON_OPTION			0x2408
+#define EXYNOS3_ARM_L2_OPTION				0x2608
+#define EXYNOS3_TOP_PWR_OPTION				0x2C48
+#define EXYNOS3_CORE_TOP_PWR_OPTION			0x2CA8
+#define EXYNOS3_XUSBXTI_DURATION			0x341C
+#define EXYNOS3_XXTI_DURATION				0x343C
+#define EXYNOS3_EXT_REGULATOR_DURATION			0x361C
+#define EXYNOS3_EXT_REGULATOR_COREBLK_DURATION		0x363C
+#define XUSBXTI_DURATION				0x00000BB8
+#define XXTI_DURATION					XUSBXTI_DURATION
+#define EXT_REGULATOR_DURATION				0x00001D4C
+#define EXT_REGULATOR_COREBLK_DURATION			EXT_REGULATOR_DURATION
+
+/* for XXX_OPTION */
+#define EXYNOS3_OPTION_USE_SC_COUNTER			(1 << 0)
+#define EXYNOS3_OPTION_USE_SC_FEEDBACK			(1 << 1)
+#define EXYNOS3_OPTION_SKIP_DEACTIVATE_ACEACP_IN_PWDN	(1 << 7)
+
+/* For EXYNOS5 */
+
+#define EXYNOS5_AUTO_WDTRESET_DISABLE				0x0408
+#define EXYNOS5_MASK_WDTRESET_REQUEST				0x040C
+
+#define EXYNOS5_USE_RETENTION			BIT(4)
+#define EXYNOS5_SYS_WDTRESET					(1 << 20)
+
+#define EXYNOS5_ARM_CORE0_SYS_PWR_REG				0x1000
+#define EXYNOS5_DIS_IRQ_ARM_CORE0_LOCAL_SYS_PWR_REG		0x1004
+#define EXYNOS5_DIS_IRQ_ARM_CORE0_CENTRAL_SYS_PWR_REG		0x1008
+#define EXYNOS5_ARM_CORE1_SYS_PWR_REG				0x1010
+#define EXYNOS5_DIS_IRQ_ARM_CORE1_LOCAL_SYS_PWR_REG		0x1014
+#define EXYNOS5_DIS_IRQ_ARM_CORE1_CENTRAL_SYS_PWR_REG		0x1018
+#define EXYNOS5_FSYS_ARM_SYS_PWR_REG				0x1040
+#define EXYNOS5_DIS_IRQ_FSYS_ARM_CENTRAL_SYS_PWR_REG		0x1048
+#define EXYNOS5_ISP_ARM_SYS_PWR_REG				0x1050
+#define EXYNOS5_DIS_IRQ_ISP_ARM_LOCAL_SYS_PWR_REG		0x1054
+#define EXYNOS5_DIS_IRQ_ISP_ARM_CENTRAL_SYS_PWR_REG		0x1058
+#define EXYNOS5_ARM_COMMON_SYS_PWR_REG				0x1080
+#define EXYNOS5_ARM_L2_SYS_PWR_REG				0x10C0
+#define EXYNOS5_CMU_ACLKSTOP_SYS_PWR_REG			0x1100
+#define EXYNOS5_CMU_SCLKSTOP_SYS_PWR_REG			0x1104
+#define EXYNOS5_CMU_RESET_SYS_PWR_REG				0x110C
+#define EXYNOS5_CMU_ACLKSTOP_SYSMEM_SYS_PWR_REG			0x1120
+#define EXYNOS5_CMU_SCLKSTOP_SYSMEM_SYS_PWR_REG			0x1124
+#define EXYNOS5_CMU_RESET_SYSMEM_SYS_PWR_REG			0x112C
+#define EXYNOS5_DRAM_FREQ_DOWN_SYS_PWR_REG			0x1130
+#define EXYNOS5_DDRPHY_DLLOFF_SYS_PWR_REG			0x1134
+#define EXYNOS5_DDRPHY_DLLLOCK_SYS_PWR_REG			0x1138
+#define EXYNOS5_APLL_SYSCLK_SYS_PWR_REG				0x1140
+#define EXYNOS5_MPLL_SYSCLK_SYS_PWR_REG				0x1144
+#define EXYNOS5_VPLL_SYSCLK_SYS_PWR_REG				0x1148
+#define EXYNOS5_EPLL_SYSCLK_SYS_PWR_REG				0x114C
+#define EXYNOS5_BPLL_SYSCLK_SYS_PWR_REG				0x1150
+#define EXYNOS5_CPLL_SYSCLK_SYS_PWR_REG				0x1154
+#define EXYNOS5_MPLLUSER_SYSCLK_SYS_PWR_REG			0x1164
+#define EXYNOS5_BPLLUSER_SYSCLK_SYS_PWR_REG			0x1170
+#define EXYNOS5_TOP_BUS_SYS_PWR_REG				0x1180
+#define EXYNOS5_TOP_RETENTION_SYS_PWR_REG			0x1184
+#define EXYNOS5_TOP_PWR_SYS_PWR_REG				0x1188
+#define EXYNOS5_TOP_BUS_SYSMEM_SYS_PWR_REG			0x1190
+#define EXYNOS5_TOP_RETENTION_SYSMEM_SYS_PWR_REG		0x1194
+#define EXYNOS5_TOP_PWR_SYSMEM_SYS_PWR_REG			0x1198
+#define EXYNOS5_LOGIC_RESET_SYS_PWR_REG				0x11A0
+#define EXYNOS5_OSCCLK_GATE_SYS_PWR_REG				0x11A4
+#define EXYNOS5_LOGIC_RESET_SYSMEM_SYS_PWR_REG			0x11B0
+#define EXYNOS5_OSCCLK_GATE_SYSMEM_SYS_PWR_REG			0x11B4
+#define EXYNOS5_USBOTG_MEM_SYS_PWR_REG				0x11C0
+#define EXYNOS5_G2D_MEM_SYS_PWR_REG				0x11C8
+#define EXYNOS5_USBDRD_MEM_SYS_PWR_REG				0x11CC
+#define EXYNOS5_SDMMC_MEM_SYS_PWR_REG				0x11D0
+#define EXYNOS5_CSSYS_MEM_SYS_PWR_REG				0x11D4
+#define EXYNOS5_SECSS_MEM_SYS_PWR_REG				0x11D8
+#define EXYNOS5_ROTATOR_MEM_SYS_PWR_REG				0x11DC
+#define EXYNOS5_INTRAM_MEM_SYS_PWR_REG				0x11E0
+#define EXYNOS5_INTROM_MEM_SYS_PWR_REG				0x11E4
+#define EXYNOS5_JPEG_MEM_SYS_PWR_REG				0x11E8
+#define EXYNOS5_HSI_MEM_SYS_PWR_REG				0x11EC
+#define EXYNOS5_MCUIOP_MEM_SYS_PWR_REG				0x11F4
+#define EXYNOS5_SATA_MEM_SYS_PWR_REG				0x11FC
+#define EXYNOS5_PAD_RETENTION_DRAM_SYS_PWR_REG			0x1200
+#define EXYNOS5_PAD_RETENTION_MAU_SYS_PWR_REG			0x1204
+#define EXYNOS5_PAD_RETENTION_EFNAND_SYS_PWR_REG		0x1208
+#define EXYNOS5_PAD_RETENTION_GPIO_SYS_PWR_REG			0x1220
+#define EXYNOS5_PAD_RETENTION_UART_SYS_PWR_REG			0x1224
+#define EXYNOS5_PAD_RETENTION_MMCA_SYS_PWR_REG			0x1228
+#define EXYNOS5_PAD_RETENTION_MMCB_SYS_PWR_REG			0x122C
+#define EXYNOS5_PAD_RETENTION_EBIA_SYS_PWR_REG			0x1230
+#define EXYNOS5_PAD_RETENTION_EBIB_SYS_PWR_REG			0x1234
+#define EXYNOS5_PAD_RETENTION_SPI_SYS_PWR_REG			0x1238
+#define EXYNOS5_PAD_RETENTION_GPIO_SYSMEM_SYS_PWR_REG		0x123C
+#define EXYNOS5_PAD_ISOLATION_SYS_PWR_REG			0x1240
+#define EXYNOS5_PAD_ISOLATION_SYSMEM_SYS_PWR_REG		0x1250
+#define EXYNOS5_PAD_ALV_SEL_SYS_PWR_REG				0x1260
+#define EXYNOS5_XUSBXTI_SYS_PWR_REG				0x1280
+#define EXYNOS5_XXTI_SYS_PWR_REG				0x1284
+#define EXYNOS5_EXT_REGULATOR_SYS_PWR_REG			0x12C0
+#define EXYNOS5_GPIO_MODE_SYS_PWR_REG				0x1300
+#define EXYNOS5_GPIO_MODE_SYSMEM_SYS_PWR_REG			0x1320
+#define EXYNOS5_GPIO_MODE_MAU_SYS_PWR_REG			0x1340
+#define EXYNOS5_TOP_ASB_RESET_SYS_PWR_REG			0x1344
+#define EXYNOS5_TOP_ASB_ISOLATION_SYS_PWR_REG			0x1348
+#define EXYNOS5_GSCL_SYS_PWR_REG				0x1400
+#define EXYNOS5_ISP_SYS_PWR_REG					0x1404
+#define EXYNOS5_MFC_SYS_PWR_REG					0x1408
+#define EXYNOS5_G3D_SYS_PWR_REG					0x140C
+#define EXYNOS5_DISP1_SYS_PWR_REG				0x1414
+#define EXYNOS5_MAU_SYS_PWR_REG					0x1418
+#define EXYNOS5_CMU_CLKSTOP_GSCL_SYS_PWR_REG			0x1480
+#define EXYNOS5_CMU_CLKSTOP_ISP_SYS_PWR_REG			0x1484
+#define EXYNOS5_CMU_CLKSTOP_MFC_SYS_PWR_REG			0x1488
+#define EXYNOS5_CMU_CLKSTOP_G3D_SYS_PWR_REG			0x148C
+#define EXYNOS5_CMU_CLKSTOP_DISP1_SYS_PWR_REG			0x1494
+#define EXYNOS5_CMU_CLKSTOP_MAU_SYS_PWR_REG			0x1498
+#define EXYNOS5_CMU_SYSCLK_GSCL_SYS_PWR_REG			0x14C0
+#define EXYNOS5_CMU_SYSCLK_ISP_SYS_PWR_REG			0x14C4
+#define EXYNOS5_CMU_SYSCLK_MFC_SYS_PWR_REG			0x14C8
+#define EXYNOS5_CMU_SYSCLK_G3D_SYS_PWR_REG			0x14CC
+#define EXYNOS5_CMU_SYSCLK_DISP1_SYS_PWR_REG			0x14D4
+#define EXYNOS5_CMU_SYSCLK_MAU_SYS_PWR_REG			0x14D8
+#define EXYNOS5_CMU_RESET_GSCL_SYS_PWR_REG			0x1580
+#define EXYNOS5_CMU_RESET_ISP_SYS_PWR_REG			0x1584
+#define EXYNOS5_CMU_RESET_MFC_SYS_PWR_REG			0x1588
+#define EXYNOS5_CMU_RESET_G3D_SYS_PWR_REG			0x158C
+#define EXYNOS5_CMU_RESET_DISP1_SYS_PWR_REG			0x1594
+#define EXYNOS5_CMU_RESET_MAU_SYS_PWR_REG			0x1598
+
+#define EXYNOS5_ARM_CORE0_OPTION				0x2008
+#define EXYNOS5_ARM_CORE1_OPTION				0x2088
+#define EXYNOS5_FSYS_ARM_OPTION					0x2208
+#define EXYNOS5_ISP_ARM_OPTION					0x2288
+#define EXYNOS5_ARM_COMMON_OPTION				0x2408
+#define EXYNOS5_ARM_L2_OPTION					0x2608
+#define EXYNOS5_TOP_PWR_OPTION					0x2C48
+#define EXYNOS5_TOP_PWR_SYSMEM_OPTION				0x2CC8
+#define EXYNOS5_JPEG_MEM_OPTION					0x2F48
+#define EXYNOS5_GSCL_OPTION					0x4008
+#define EXYNOS5_ISP_OPTION					0x4028
+#define EXYNOS5_MFC_OPTION					0x4048
+#define EXYNOS5_G3D_OPTION					0x4068
+#define EXYNOS5_DISP1_OPTION					0x40A8
+#define EXYNOS5_MAU_OPTION					0x40C8
+
+#define EXYNOS5_USE_SC_FEEDBACK					(1 << 1)
+#define EXYNOS5_USE_SC_COUNTER					(1 << 0)
+
+#define EXYNOS5_SKIP_DEACTIVATE_ACEACP_IN_PWDN			(1 << 7)
+
+#define EXYNOS5_OPTION_USE_STANDBYWFE				(1 << 24)
+#define EXYNOS5_OPTION_USE_STANDBYWFI				(1 << 16)
+
+#define EXYNOS5_OPTION_USE_RETENTION				(1 << 4)
+
+#define EXYNOS5420_SWRESET_KFC_SEL				0x3
+
+/* Only for EXYNOS5420 */
+#define EXYNOS5420_ISP_ARM_OPTION				0x2488
+#define EXYNOS5420_L2RSTDISABLE_VALUE				BIT(3)
+
+#define EXYNOS5420_LPI_MASK					0x0004
+#define EXYNOS5420_LPI_MASK1					0x0008
+#define EXYNOS5420_UFS						BIT(8)
+#define EXYNOS5420_ATB_KFC					BIT(13)
+#define EXYNOS5420_ATB_ISP_ARM					BIT(19)
+#define EXYNOS5420_EMULATION					BIT(31)
+#define ATB_ISP_ARM						BIT(12)
+#define ATB_KFC							BIT(13)
+#define ATB_NOC							BIT(14)
+
+#define EXYNOS5420_ARM_INTR_SPREAD_ENABLE			0x0100
+#define EXYNOS5420_ARM_INTR_SPREAD_USE_STANDBYWFI		0x0104
+#define EXYNOS5420_UP_SCHEDULER					0x0120
+#define SPREAD_ENABLE						0xF
+#define SPREAD_USE_STANDWFI					0xF
+
+#define EXYNOS5420_KFC_CORE_RESET0				BIT(8)
+#define EXYNOS5420_KFC_ETM_RESET0				BIT(20)
+
+#define EXYNOS5420_KFC_CORE_RESET(_nr)				\
+	((EXYNOS5420_KFC_CORE_RESET0 | EXYNOS5420_KFC_ETM_RESET0) << (_nr))
+
+#define EXYNOS5420_BB_CON1					0x0784
+#define EXYNOS5420_BB_SEL_EN					BIT(31)
+#define EXYNOS5420_BB_PMOS_EN					BIT(7)
+#define EXYNOS5420_BB_1300X					0XF
+
+#define EXYNOS5420_ARM_CORE2_SYS_PWR_REG			0x1020
+#define EXYNOS5420_DIS_IRQ_ARM_CORE2_LOCAL_SYS_PWR_REG		0x1024
+#define EXYNOS5420_DIS_IRQ_ARM_CORE2_CENTRAL_SYS_PWR_REG	0x1028
+#define EXYNOS5420_ARM_CORE3_SYS_PWR_REG			0x1030
+#define EXYNOS5420_DIS_IRQ_ARM_CORE3_LOCAL_SYS_PWR_REG		0x1034
+#define EXYNOS5420_DIS_IRQ_ARM_CORE3_CENTRAL_SYS_PWR_REG	0x1038
+#define EXYNOS5420_KFC_CORE0_SYS_PWR_REG			0x1040
+#define EXYNOS5420_DIS_IRQ_KFC_CORE0_LOCAL_SYS_PWR_REG		0x1044
+#define EXYNOS5420_DIS_IRQ_KFC_CORE0_CENTRAL_SYS_PWR_REG	0x1048
+#define EXYNOS5420_KFC_CORE1_SYS_PWR_REG			0x1050
+#define EXYNOS5420_DIS_IRQ_KFC_CORE1_LOCAL_SYS_PWR_REG		0x1054
+#define EXYNOS5420_DIS_IRQ_KFC_CORE1_CENTRAL_SYS_PWR_REG	0x1058
+#define EXYNOS5420_KFC_CORE2_SYS_PWR_REG			0x1060
+#define EXYNOS5420_DIS_IRQ_KFC_CORE2_LOCAL_SYS_PWR_REG		0x1064
+#define EXYNOS5420_DIS_IRQ_KFC_CORE2_CENTRAL_SYS_PWR_REG	0x1068
+#define EXYNOS5420_KFC_CORE3_SYS_PWR_REG			0x1070
+#define EXYNOS5420_DIS_IRQ_KFC_CORE3_LOCAL_SYS_PWR_REG		0x1074
+#define EXYNOS5420_DIS_IRQ_KFC_CORE3_CENTRAL_SYS_PWR_REG	0x1078
+#define EXYNOS5420_ISP_ARM_SYS_PWR_REG				0x1090
+#define EXYNOS5420_DIS_IRQ_ISP_ARM_LOCAL_SYS_PWR_REG		0x1094
+#define EXYNOS5420_DIS_IRQ_ISP_ARM_CENTRAL_SYS_PWR_REG		0x1098
+#define EXYNOS5420_ARM_COMMON_SYS_PWR_REG			0x10A0
+#define EXYNOS5420_KFC_COMMON_SYS_PWR_REG			0x10B0
+#define EXYNOS5420_KFC_L2_SYS_PWR_REG				0x10D0
+#define EXYNOS5420_DPLL_SYSCLK_SYS_PWR_REG			0x1158
+#define EXYNOS5420_IPLL_SYSCLK_SYS_PWR_REG			0x115C
+#define EXYNOS5420_KPLL_SYSCLK_SYS_PWR_REG			0x1160
+#define EXYNOS5420_RPLL_SYSCLK_SYS_PWR_REG                      0x1174
+#define EXYNOS5420_SPLL_SYSCLK_SYS_PWR_REG                      0x1178
+#define EXYNOS5420_INTRAM_MEM_SYS_PWR_REG                       0x11B8
+#define EXYNOS5420_INTROM_MEM_SYS_PWR_REG                       0x11BC
+#define EXYNOS5420_ONENANDXL_MEM_SYS_PWR			0x11C0
+#define EXYNOS5420_USBDEV_MEM_SYS_PWR				0x11CC
+#define EXYNOS5420_USBDEV1_MEM_SYS_PWR				0x11D0
+#define EXYNOS5420_SDMMC_MEM_SYS_PWR				0x11D4
+#define EXYNOS5420_CSSYS_MEM_SYS_PWR				0x11D8
+#define EXYNOS5420_SECSS_MEM_SYS_PWR				0x11DC
+#define EXYNOS5420_ROTATOR_MEM_SYS_PWR				0x11E0
+#define EXYNOS5420_INTRAM_MEM_SYS_PWR				0x11E4
+#define EXYNOS5420_INTROM_MEM_SYS_PWR				0x11E8
+#define EXYNOS5420_PAD_RETENTION_JTAG_SYS_PWR_REG		0x1208
+#define EXYNOS5420_PAD_RETENTION_DRAM_SYS_PWR_REG		0x1210
+#define EXYNOS5420_PAD_RETENTION_UART_SYS_PWR_REG		0x1214
+#define EXYNOS5420_PAD_RETENTION_MMC0_SYS_PWR_REG		0x1218
+#define EXYNOS5420_PAD_RETENTION_MMC1_SYS_PWR_REG		0x121C
+#define EXYNOS5420_PAD_RETENTION_MMC2_SYS_PWR_REG		0x1220
+#define EXYNOS5420_PAD_RETENTION_HSI_SYS_PWR_REG		0x1224
+#define EXYNOS5420_PAD_RETENTION_EBIA_SYS_PWR_REG		0x1228
+#define EXYNOS5420_PAD_RETENTION_EBIB_SYS_PWR_REG		0x122C
+#define EXYNOS5420_PAD_RETENTION_SPI_SYS_PWR_REG		0x1230
+#define EXYNOS5420_PAD_RETENTION_DRAM_COREBLK_SYS_PWR_REG	0x1234
+#define EXYNOS5420_DISP1_SYS_PWR_REG				0x1410
+#define EXYNOS5420_MAU_SYS_PWR_REG				0x1414
+#define EXYNOS5420_G2D_SYS_PWR_REG				0x1418
+#define EXYNOS5420_MSC_SYS_PWR_REG				0x141C
+#define EXYNOS5420_FSYS_SYS_PWR_REG				0x1420
+#define EXYNOS5420_FSYS2_SYS_PWR_REG				0x1424
+#define EXYNOS5420_PSGEN_SYS_PWR_REG				0x1428
+#define EXYNOS5420_PERIC_SYS_PWR_REG				0x142C
+#define EXYNOS5420_WCORE_SYS_PWR_REG				0x1430
+#define EXYNOS5420_CMU_CLKSTOP_DISP1_SYS_PWR_REG		0x1490
+#define EXYNOS5420_CMU_CLKSTOP_MAU_SYS_PWR_REG			0x1494
+#define EXYNOS5420_CMU_CLKSTOP_G2D_SYS_PWR_REG			0x1498
+#define EXYNOS5420_CMU_CLKSTOP_MSC_SYS_PWR_REG			0x149C
+#define EXYNOS5420_CMU_CLKSTOP_FSYS_SYS_PWR_REG			0x14A0
+#define EXYNOS5420_CMU_CLKSTOP_FSYS2_SYS_PWR_REG		0x14A4
+#define EXYNOS5420_CMU_CLKSTOP_PSGEN_SYS_PWR_REG		0x14A8
+#define EXYNOS5420_CMU_CLKSTOP_PERIC_SYS_PWR_REG		0x14AC
+#define EXYNOS5420_CMU_CLKSTOP_WCORE_SYS_PWR_REG		0x14B0
+#define EXYNOS5420_CMU_SYSCLK_TOPPWR_SYS_PWR_REG		0x14BC
+#define EXYNOS5420_CMU_SYSCLK_DISP1_SYS_PWR_REG			0x14D0
+#define EXYNOS5420_CMU_SYSCLK_MAU_SYS_PWR_REG			0x14D4
+#define EXYNOS5420_CMU_SYSCLK_G2D_SYS_PWR_REG			0x14D8
+#define EXYNOS5420_CMU_SYSCLK_MSC_SYS_PWR_REG			0x14DC
+#define EXYNOS5420_CMU_SYSCLK_FSYS_SYS_PWR_REG			0x14E0
+#define EXYNOS5420_CMU_SYSCLK_FSYS2_SYS_PWR_REG			0x14E4
+#define EXYNOS5420_CMU_SYSCLK_PSGEN_SYS_PWR_REG			0x14E8
+#define EXYNOS5420_CMU_SYSCLK_PERIC_SYS_PWR_REG			0x14EC
+#define EXYNOS5420_CMU_SYSCLK_WCORE_SYS_PWR_REG			0x14F0
+#define EXYNOS5420_CMU_SYSCLK_SYSMEM_TOPPWR_SYS_PWR_REG		0x14F4
+#define EXYNOS5420_CMU_RESET_FSYS2_SYS_PWR_REG			0x1570
+#define EXYNOS5420_CMU_RESET_PSGEN_SYS_PWR_REG			0x1574
+#define EXYNOS5420_CMU_RESET_PERIC_SYS_PWR_REG			0x1578
+#define EXYNOS5420_CMU_RESET_WCORE_SYS_PWR_REG			0x157C
+#define EXYNOS5420_CMU_RESET_DISP1_SYS_PWR_REG			0x1590
+#define EXYNOS5420_CMU_RESET_MAU_SYS_PWR_REG			0x1594
+#define EXYNOS5420_CMU_RESET_G2D_SYS_PWR_REG			0x1598
+#define EXYNOS5420_CMU_RESET_MSC_SYS_PWR_REG			0x159C
+#define EXYNOS5420_CMU_RESET_FSYS_SYS_PWR_REG			0x15A0
+#define EXYNOS5420_SFR_AXI_CGDIS1				0x15E4
+#define EXYNOS_ARM_CORE2_CONFIGURATION				0x2100
+#define EXYNOS5420_ARM_CORE2_OPTION				0x2108
+#define EXYNOS_ARM_CORE3_CONFIGURATION				0x2180
+#define EXYNOS5420_ARM_CORE3_OPTION				0x2188
+#define EXYNOS5420_ARM_COMMON_STATUS				0x2504
+#define EXYNOS5420_ARM_COMMON_OPTION				0x2508
+#define EXYNOS5420_KFC_COMMON_STATUS				0x2584
+#define EXYNOS5420_KFC_COMMON_OPTION				0x2588
+#define EXYNOS5420_LOGIC_RESET_DURATION3			0x2D1C
+
+#define EXYNOS5420_PAD_RET_GPIO_OPTION				0x30C8
+#define EXYNOS5420_PAD_RET_UART_OPTION				0x30E8
+#define EXYNOS5420_PAD_RET_MMCA_OPTION				0x3108
+#define EXYNOS5420_PAD_RET_MMCB_OPTION				0x3128
+#define EXYNOS5420_PAD_RET_MMCC_OPTION				0x3148
+#define EXYNOS5420_PAD_RET_HSI_OPTION				0x3168
+#define EXYNOS5420_PAD_RET_SPI_OPTION				0x31C8
+#define EXYNOS5420_PAD_RET_DRAM_COREBLK_OPTION			0x31E8
+#define EXYNOS_PAD_RET_DRAM_OPTION				0x3008
+#define EXYNOS_PAD_RET_MAUDIO_OPTION				0x3028
+#define EXYNOS_PAD_RET_JTAG_OPTION				0x3048
+#define EXYNOS_PAD_RET_GPIO_OPTION				0x3108
+#define EXYNOS_PAD_RET_UART_OPTION				0x3128
+#define EXYNOS_PAD_RET_MMCA_OPTION				0x3148
+#define EXYNOS_PAD_RET_MMCB_OPTION				0x3168
+#define EXYNOS_PAD_RET_EBIA_OPTION				0x3188
+#define EXYNOS_PAD_RET_EBIB_OPTION				0x31A8
+
+#define EXYNOS_PS_HOLD_CONTROL					0x330C
+
+/* For SYS_PWR_REG */
+#define EXYNOS_SYS_PWR_CFG					BIT(0)
+
+#define EXYNOS5420_MFC_CONFIGURATION				0x4060
+#define EXYNOS5420_MFC_STATUS					0x4064
+#define EXYNOS5420_MFC_OPTION					0x4068
+#define EXYNOS5420_G3D_CONFIGURATION				0x4080
+#define EXYNOS5420_G3D_STATUS					0x4084
+#define EXYNOS5420_G3D_OPTION					0x4088
+#define EXYNOS5420_DISP0_CONFIGURATION				0x40A0
+#define EXYNOS5420_DISP0_STATUS					0x40A4
+#define EXYNOS5420_DISP0_OPTION					0x40A8
+#define EXYNOS5420_DISP1_CONFIGURATION				0x40C0
+#define EXYNOS5420_DISP1_STATUS					0x40C4
+#define EXYNOS5420_DISP1_OPTION					0x40C8
+#define EXYNOS5420_MAU_CONFIGURATION				0x40E0
+#define EXYNOS5420_MAU_STATUS					0x40E4
+#define EXYNOS5420_MAU_OPTION					0x40E8
+#define EXYNOS5420_FSYS2_OPTION					0x4168
+#define EXYNOS5420_PSGEN_OPTION					0x4188
+
+/* For EXYNOS_CENTRAL_SEQ_OPTION */
+#define EXYNOS5_USE_STANDBYWFI_ARM_CORE0			BIT(16)
+#define EXYNOS5_USE_STANDBYWFI_ARM_CORE1			BUT(17)
+#define EXYNOS5_USE_STANDBYWFE_ARM_CORE0			BIT(24)
+#define EXYNOS5_USE_STANDBYWFE_ARM_CORE1			BIT(25)
+
+#define EXYNOS5420_ARM_USE_STANDBY_WFI0				BIT(4)
+#define EXYNOS5420_ARM_USE_STANDBY_WFI1				BIT(5)
+#define EXYNOS5420_ARM_USE_STANDBY_WFI2				BIT(6)
+#define EXYNOS5420_ARM_USE_STANDBY_WFI3				BIT(7)
+#define EXYNOS5420_KFC_USE_STANDBY_WFI0				BIT(8)
+#define EXYNOS5420_KFC_USE_STANDBY_WFI1				BIT(9)
+#define EXYNOS5420_KFC_USE_STANDBY_WFI2				BIT(10)
+#define EXYNOS5420_KFC_USE_STANDBY_WFI3				BIT(11)
+#define EXYNOS5420_ARM_USE_STANDBY_WFE0				BIT(16)
+#define EXYNOS5420_ARM_USE_STANDBY_WFE1				BIT(17)
+#define EXYNOS5420_ARM_USE_STANDBY_WFE2				BIT(18)
+#define EXYNOS5420_ARM_USE_STANDBY_WFE3				BIT(19)
+#define EXYNOS5420_KFC_USE_STANDBY_WFE0				BIT(20)
+#define EXYNOS5420_KFC_USE_STANDBY_WFE1				BIT(21)
+#define EXYNOS5420_KFC_USE_STANDBY_WFE2				BIT(22)
+#define EXYNOS5420_KFC_USE_STANDBY_WFE3				BIT(23)
+
+#define DUR_WAIT_RESET				0xF
+
+#define EXYNOS5420_USE_STANDBY_WFI_ALL	(EXYNOS5420_ARM_USE_STANDBY_WFI0    \
+					 | EXYNOS5420_ARM_USE_STANDBY_WFI1  \
+					 | EXYNOS5420_ARM_USE_STANDBY_WFI2  \
+					 | EXYNOS5420_ARM_USE_STANDBY_WFI3  \
+					 | EXYNOS5420_KFC_USE_STANDBY_WFI0  \
+					 | EXYNOS5420_KFC_USE_STANDBY_WFI1  \
+					 | EXYNOS5420_KFC_USE_STANDBY_WFI2  \
+					 | EXYNOS5420_KFC_USE_STANDBY_WFI3)
+
+#endif /* __LINUX_SOC_EXYNOS_REGS_PMU_H */
-- 
cgit v1.2.3


From b84106b4e2290c081cdab521fa832596cdfea246 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 25 Feb 2016 14:35:57 -0600
Subject: PCI: Disable IO/MEM decoding for devices with non-compliant BARs

The PCI config header (first 64 bytes of each device's config space) is
defined by the PCI spec so generic software can identify the device and
manage its usage of I/O, memory, and IRQ resources.

Some non-spec-compliant devices put registers other than BARs where the
BARs should be.  When the PCI core sizes these "BARs", the reads and writes
it does may have unwanted side effects, and the "BAR" may appear to
describe non-sensical address space.

Add a flag bit to mark non-compliant devices so we don't touch their BARs.
Turn off IO/MEM decoding to prevent the devices from consuming address
space, since we can't read the BARs to find out what that address space
would be.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Andi Kleen <ak@linux.intel.com>
CC: stable@vger.kernel.org
---
 drivers/pci/probe.c | 14 ++++++++++++++
 include/linux/pci.h |  1 +
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 6d7ab9bb0d5a..6b0056e9c33e 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -179,6 +179,9 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 	u16 orig_cmd;
 	struct pci_bus_region region, inverted_region;
 
+	if (dev->non_compliant_bars)
+		return 0;
+
 	mask = type ? PCI_ROM_ADDRESS_MASK : ~0;
 
 	/* No printks while decoding is disabled! */
@@ -1171,6 +1174,7 @@ static void pci_msi_setup_pci_dev(struct pci_dev *dev)
 int pci_setup_device(struct pci_dev *dev)
 {
 	u32 class;
+	u16 cmd;
 	u8 hdr_type;
 	int pos = 0;
 	struct pci_bus_region region;
@@ -1214,6 +1218,16 @@ int pci_setup_device(struct pci_dev *dev)
 	/* device class may be changed after fixup */
 	class = dev->class >> 8;
 
+	if (dev->non_compliant_bars) {
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		if (cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
+			dev_info(&dev->dev, "device has non-compliant BARs; disabling IO/MEM decoding\n");
+			cmd &= ~PCI_COMMAND_IO;
+			cmd &= ~PCI_COMMAND_MEMORY;
+			pci_write_config_word(dev, PCI_COMMAND, cmd);
+		}
+	}
+
 	switch (dev->hdr_type) {		    /* header type */
 	case PCI_HEADER_TYPE_NORMAL:		    /* standard header */
 		if (class == PCI_CLASS_BRIDGE_PCI)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 27df4a6585da..5f80661d3519 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -359,6 +359,7 @@ struct pci_dev {
 	unsigned int	io_window_1k:1;	/* Intel P2P bridge 1K I/O windows */
 	unsigned int	irq_managed:1;
 	unsigned int	has_secondary_link:1;
+	unsigned int	non_compliant_bars:1;	/* broken BARs; ignore them */
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
-- 
cgit v1.2.3


From 489061bba6c655a2f98d39be17df118c0fd09d57 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 15 Feb 2016 05:25:54 +0000
Subject: regmap: add regmap_field_force_xxx() macros

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 7d3d498fd3e8..d36ea89adc50 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -76,8 +76,12 @@ struct reg_sequence {
 
 #define	regmap_field_write(field, val) \
 	regmap_field_update_bits_base(field, ~0, val, NULL, false, false)
+#define	regmap_field_force_write(field, val) \
+	regmap_field_update_bits_base(field, ~0, val, NULL, false, true)
 #define	regmap_field_update_bits(field, mask, val)\
 	regmap_field_update_bits_base(field, mask, val, NULL, false, false)
+#define	regmap_field_force_update_bits(field, mask, val) \
+	regmap_field_update_bits_base(field, mask, val, NULL, false, true)
 
 #define	regmap_fields_write(field, id, val) \
 	regmap_fields_update_bits_base(field, id, ~0, val, NULL, false, false)
-- 
cgit v1.2.3


From e6ef243fa4660f3206137bd5f3e69b13a9b7c28a Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 15 Feb 2016 05:26:14 +0000
Subject: regmap: add regmap_fields_force_xxx() macros

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 12 ------------
 include/linux/regmap.h       |  6 ++++--
 2 files changed, 4 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 79d7f51019d7..c7d4a636778d 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1748,18 +1748,6 @@ int regmap_fields_update_bits_base(struct regmap_field *field,  unsigned int id,
 }
 EXPORT_SYMBOL_GPL(regmap_fields_update_bits_base);
 
-int regmap_fields_force_write(struct regmap_field *field, unsigned int id,
-			unsigned int val)
-{
-	if (id >= field->id_size)
-		return -EINVAL;
-
-	return regmap_write_bits(field->regmap,
-				  field->reg + (field->id_offset * id),
-				  field->mask, val << field->shift);
-}
-EXPORT_SYMBOL_GPL(regmap_fields_force_write);
-
 /*
  * regmap_bulk_write(): Write multiple registers to the device
  *
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index d36ea89adc50..e0960b3ff290 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -85,8 +85,12 @@ struct reg_sequence {
 
 #define	regmap_fields_write(field, id, val) \
 	regmap_fields_update_bits_base(field, id, ~0, val, NULL, false, false)
+#define	regmap_fields_force_write(field, id, val) \
+	regmap_fields_update_bits_base(field, id, ~0, val, NULL, false, true)
 #define	regmap_fields_update_bits(field, id, mask, val)\
 	regmap_fields_update_bits_base(field, id, mask, val, NULL, false, false)
+#define	regmap_fields_force_update_bits(field, id, mask, val) \
+	regmap_fields_update_bits_base(field, id, mask, val, NULL, false, true)
 
 #ifdef CONFIG_REGMAP
 
@@ -789,8 +793,6 @@ int regmap_field_read(struct regmap_field *field, unsigned int *val);
 int regmap_field_update_bits_base(struct regmap_field *field,
 				  unsigned int mask, unsigned int val,
 				  bool *change, bool async, bool force);
-int regmap_fields_force_write(struct regmap_field *field, unsigned int id,
-			unsigned int val);
 int regmap_fields_read(struct regmap_field *field, unsigned int id,
 		       unsigned int *val);
 int regmap_fields_update_bits_base(struct regmap_field *field,  unsigned int id,
-- 
cgit v1.2.3


From f1705ec197e705b79ea40fe7a2cc5acfa1d3bfac Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Wed, 24 Feb 2016 09:25:37 -0800
Subject: net: ipv6: Make address flushing on ifdown optional

Currently, all ipv6 addresses are flushed when the interface is configured
down, including global, static addresses:

    $ ip -6 addr show dev eth1
    3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000
        inet6 2100:1::2/120 scope global
           valid_lft forever preferred_lft forever
        inet6 fe80::e0:f9ff:fe79:34bd/64 scope link
           valid_lft forever preferred_lft forever
    $ ip link set dev eth1 down
    $ ip -6 addr show dev eth1
    << nothing; all addresses have been flushed>>

Add a new sysctl to make this behavior optional. The new setting defaults to
flush all addresses to maintain backwards compatibility. When the set global
addresses with no expire times are not flushed on an admin down. The sysctl
is per-interface or system-wide for all interfaces

    $ sysctl -w net.ipv6.conf.eth1.keep_addr_on_down=1
or
    $ sysctl -w net.ipv6.conf.all.keep_addr_on_down=1

Will keep addresses on eth1 on an admin down.

    $ ip -6 addr show dev eth1
    3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000
        inet6 2100:1::2/120 scope global
           valid_lft forever preferred_lft forever
        inet6 fe80::e0:f9ff:fe79:34bd/64 scope link
           valid_lft forever preferred_lft forever
    $ ip link set dev eth1 down
    $ ip -6 addr show dev eth1
    3: eth1: <BROADCAST,MULTICAST> mtu 1500 state DOWN qlen 1000
        inet6 2100:1::2/120 scope global tentative
           valid_lft forever preferred_lft forever
        inet6 fe80::e0:f9ff:fe79:34bd/64 scope link tentative
           valid_lft forever preferred_lft forever

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |   9 +++
 include/linux/ipv6.h                   |   1 +
 include/uapi/linux/ipv6.h              |   1 +
 net/ipv6/addrconf.c                    | 136 +++++++++++++++++++++++++++++----
 4 files changed, 132 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 24ce97f42d35..d5df40c75aa4 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1563,6 +1563,15 @@ temp_prefered_lft - INTEGER
 	Preferred lifetime (in seconds) for temporary addresses.
 	Default: 86400 (1 day)
 
+keep_addr_on_down - INTEGER
+	Keep all IPv6 addresses on an interface down event. If set static
+	global addresses with no expiration time are not flushed.
+	  >0 : enabled
+	   0 : system default
+	  <0 : disabled
+
+	Default: 0 (addresses are removed)
+
 max_desync_factor - INTEGER
 	Maximum value for DESYNC_FACTOR, which is a random value
 	that ensures that clients don't synchronize with each
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 4b2267e1b7c3..7edc14fb66b6 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -62,6 +62,7 @@ struct ipv6_devconf {
 		struct in6_addr secret;
 	} stable_secret;
 	__s32		use_oif_addrs_only;
+	__s32		keep_addr_on_down;
 	void		*sysctl;
 };
 
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index ec117b65d5a5..395876060f50 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -176,6 +176,7 @@ enum {
 	DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN,
 	DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
 	DEVCONF_DROP_UNSOLICITED_NA,
+	DEVCONF_KEEP_ADDR_ON_DOWN,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4751f8922362..a2d6f6c242af 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -216,6 +216,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	},
 	.use_oif_addrs_only	= 0,
 	.ignore_routes_with_linkdown = 0,
+	.keep_addr_on_down	= 0,
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -260,6 +261,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	},
 	.use_oif_addrs_only	= 0,
 	.ignore_routes_with_linkdown = 0,
+	.keep_addr_on_down	= 0,
 };
 
 /* Check if a valid qdisc is available */
@@ -3168,6 +3170,55 @@ static void addrconf_gre_config(struct net_device *dev)
 }
 #endif
 
+static int fixup_permanent_addr(struct inet6_dev *idev,
+				struct inet6_ifaddr *ifp)
+{
+	if (!ifp->rt) {
+		struct rt6_info *rt;
+
+		rt = addrconf_dst_alloc(idev, &ifp->addr, false);
+		if (unlikely(IS_ERR(rt)))
+			return PTR_ERR(rt);
+
+		ifp->rt = rt;
+	}
+
+	if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
+		addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+				      idev->dev, 0, 0);
+	}
+
+	addrconf_dad_start(ifp);
+
+	return 0;
+}
+
+static void addrconf_permanent_addr(struct net_device *dev)
+{
+	struct inet6_ifaddr *ifp, *tmp;
+	struct inet6_dev *idev;
+
+	idev = __in6_dev_get(dev);
+	if (!idev)
+		return;
+
+	write_lock_bh(&idev->lock);
+
+	list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
+		if ((ifp->flags & IFA_F_PERMANENT) &&
+		    fixup_permanent_addr(idev, ifp) < 0) {
+			write_unlock_bh(&idev->lock);
+			ipv6_del_addr(ifp);
+			write_lock_bh(&idev->lock);
+
+			net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n",
+					     idev->dev->name, &ifp->addr);
+		}
+	}
+
+	write_unlock_bh(&idev->lock);
+}
+
 static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			   void *ptr)
 {
@@ -3253,6 +3304,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			run_pending = 1;
 		}
 
+		/* restore routes for permanent addresses */
+		addrconf_permanent_addr(dev);
+
 		switch (dev->type) {
 #if IS_ENABLED(CONFIG_IPV6_SIT)
 		case ARPHRD_SIT:
@@ -3356,7 +3410,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 {
 	struct net *net = dev_net(dev);
 	struct inet6_dev *idev;
-	struct inet6_ifaddr *ifa;
+	struct inet6_ifaddr *ifa, *tmp;
+	struct list_head del_list;
+	int _keep_addr;
+	bool keep_addr;
 	int state, i;
 
 	ASSERT_RTNL();
@@ -3383,6 +3440,16 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	}
 
+	/* aggregate the system setting and interface setting */
+	_keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
+	if (!_keep_addr)
+		_keep_addr = idev->cnf.keep_addr_on_down;
+
+	/* combine the user config with event to determine if permanent
+	 * addresses are to be removed from address hash table
+	 */
+	keep_addr = !(how || _keep_addr <= 0);
+
 	/* Step 2: clear hash table */
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 		struct hlist_head *h = &inet6_addr_lst[i];
@@ -3391,9 +3458,15 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 restart:
 		hlist_for_each_entry_rcu(ifa, h, addr_lst) {
 			if (ifa->idev == idev) {
-				hlist_del_init_rcu(&ifa->addr_lst);
 				addrconf_del_dad_work(ifa);
-				goto restart;
+				/* combined flag + permanent flag decide if
+				 * address is retained on a down event
+				 */
+				if (!keep_addr ||
+				    !(ifa->flags & IFA_F_PERMANENT)) {
+					hlist_del_init_rcu(&ifa->addr_lst);
+					goto restart;
+				}
 			}
 		}
 		spin_unlock_bh(&addrconf_hash_lock);
@@ -3427,31 +3500,53 @@ restart:
 		write_lock_bh(&idev->lock);
 	}
 
-	while (!list_empty(&idev->addr_list)) {
-		ifa = list_first_entry(&idev->addr_list,
-				       struct inet6_ifaddr, if_list);
-		addrconf_del_dad_work(ifa);
+	/* re-combine the user config with event to determine if permanent
+	 * addresses are to be removed from the interface list
+	 */
+	keep_addr = (!how && _keep_addr > 0);
 
-		list_del(&ifa->if_list);
+	INIT_LIST_HEAD(&del_list);
+	list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
+		addrconf_del_dad_work(ifa);
 
 		write_unlock_bh(&idev->lock);
-
 		spin_lock_bh(&ifa->lock);
-		state = ifa->state;
-		ifa->state = INET6_IFADDR_STATE_DEAD;
+
+		if (keep_addr && (ifa->flags & IFA_F_PERMANENT)) {
+			/* set state to skip the notifier below */
+			state = INET6_IFADDR_STATE_DEAD;
+			ifa->state = 0;
+			if (!(ifa->flags & IFA_F_NODAD))
+				ifa->flags |= IFA_F_TENTATIVE;
+		} else {
+			state = ifa->state;
+			ifa->state = INET6_IFADDR_STATE_DEAD;
+
+			list_del(&ifa->if_list);
+			list_add(&ifa->if_list, &del_list);
+		}
+
 		spin_unlock_bh(&ifa->lock);
 
 		if (state != INET6_IFADDR_STATE_DEAD) {
 			__ipv6_ifa_notify(RTM_DELADDR, ifa);
 			inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
 		}
-		in6_ifa_put(ifa);
 
 		write_lock_bh(&idev->lock);
 	}
 
 	write_unlock_bh(&idev->lock);
 
+	/* now clean up addresses to be removed */
+	while (!list_empty(&del_list)) {
+		ifa = list_first_entry(&del_list,
+				       struct inet6_ifaddr, if_list);
+		list_del(&ifa->if_list);
+
+		in6_ifa_put(ifa);
+	}
+
 	/* Step 5: Discard anycast and multicast list */
 	if (how) {
 		ipv6_ac_destroy_dev(idev);
@@ -4716,6 +4811,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
 	array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
 	array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
+	array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -5197,10 +5293,12 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 			if (rt)
 				ip6_del_rt(rt);
 		}
-		dst_hold(&ifp->rt->dst);
-
-		ip6_del_rt(ifp->rt);
+		if (ifp->rt) {
+			dst_hold(&ifp->rt->dst);
 
+			ip6_del_rt(ifp->rt);
+			ifp->rt = NULL;
+		}
 		rt_genid_bump_ipv6(net);
 		break;
 	}
@@ -5803,6 +5901,14 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
+		{
+			.procname       = "keep_addr_on_down",
+			.data           = &ipv6_devconf.keep_addr_on_down,
+			.maxlen         = sizeof(int),
+			.mode           = 0644,
+			.proc_handler   = proc_dointvec,
+
+		},
 		{
 			/* sentinel */
 		}
-- 
cgit v1.2.3


From 3f1ac7a700d039c61d8d8b99f28d605d489a60cf Mon Sep 17 00:00:00 2001
From: David Decotigny <decot@googlers.com>
Date: Wed, 24 Feb 2016 10:57:59 -0800
Subject: net: ethtool: add new ETHTOOL_xLINKSETTINGS API

This patch defines a new ETHTOOL_GLINKSETTINGS/SLINKSETTINGS API,
handled by the new get_link_ksettings/set_link_ksettings callbacks.
This API provides support for most legacy ethtool_cmd fields, adds
support for larger link mode masks (up to 4064 bits, variable length),
and removes ethtool_cmd deprecated
fields (transceiver/maxrxpkt/maxtxpkt).

This API is deprecating the legacy ETHTOOL_GSET/SSET API and provides
the following backward compatibility properties:
 - legacy ethtool with legacy drivers: no change, still using the
   get_settings/set_settings callbacks.
 - legacy ethtool with new get/set_link_ksettings drivers: the new
   driver callbacks are used, data internally converted to legacy
   ethtool_cmd. ETHTOOL_GSET will return only the 1st 32b of each link
   mode mask. ETHTOOL_SSET will fail if user tries to set the
   ethtool_cmd deprecated fields to
   non-0 (transceiver/maxrxpkt/maxtxpkt). A kernel warning is logged if
   driver sets higher bits.
 - future ethtool with legacy drivers: no change, still using the
   get_settings/set_settings callbacks, internally converted to new data
   structure. Deprecated fields (transceiver/maxrxpkt/maxtxpkt) will be
   ignored and seen as 0 from user space. Note that that "future"
   ethtool tool will not allow changes to these deprecated fields.
 - future ethtool with new drivers: direct call to the new callbacks.

By "future" ethtool, what is meant is:
 - query: first try ETHTOOL_GLINKSETTINGS, and revert to ETHTOOL_GSET if
   fails
 - set: query first and remember which of ETHTOOL_GLINKSETTINGS or
   ETHTOOL_GSET was successful
   + if ETHTOOL_GLINKSETTINGS was successful, then change config with
     ETHTOOL_SLINKSETTINGS. A failure there is final (do not try
     ETHTOOL_SSET).
   + otherwise ETHTOOL_GSET was successful, change config with
     ETHTOOL_SSET. A failure there is final (do not try
     ETHTOOL_SLINKSETTINGS).

The interaction user/kernel via the new API requires a small
ETHTOOL_GLINKSETTINGS handshake first to agree on the length of the link
mode bitmaps. If kernel doesn't agree with user, it returns the bitmap
length it is expecting from user as a negative length (and cmd field is
0). When kernel and user agree, kernel returns valid info in all
fields (ie. link mode length > 0 and cmd is ETHTOOL_GLINKSETTINGS).

Data structure crossing user/kernel boundary is 32/64-bit
agnostic. Converted internally to a legal kernel bitmap.

The internal __ethtool_get_settings kernel helper will gradually be
replaced by __ethtool_get_link_ksettings by the time the first
"link_settings" drivers start to appear. So this patch doesn't change
it, it will be removed before it needs to be changed.

Signed-off-by: David Decotigny <decot@googlers.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h      |  91 ++++++++-
 include/uapi/linux/ethtool.h | 322 +++++++++++++++++++++++-------
 net/core/ethtool.c           | 453 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 786 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 472d7d7b01c2..8a400a54c92e 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -12,6 +12,7 @@
 #ifndef _LINUX_ETHTOOL_H
 #define _LINUX_ETHTOOL_H
 
+#include <linux/bitmap.h>
 #include <linux/compat.h>
 #include <uapi/linux/ethtool.h>
 
@@ -40,9 +41,6 @@ struct compat_ethtool_rxnfc {
 
 #include <linux/rculist.h>
 
-extern int __ethtool_get_settings(struct net_device *dev,
-				  struct ethtool_cmd *cmd);
-
 /**
  * enum ethtool_phys_id_state - indicator state for physical identification
  * @ETHTOOL_ID_INACTIVE: Physical ID indicator should be deactivated
@@ -97,13 +95,74 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
 	return index % n_rx_rings;
 }
 
+/* number of link mode bits/ulongs handled internally by kernel */
+#define __ETHTOOL_LINK_MODE_MASK_NBITS			\
+	(__ETHTOOL_LINK_MODE_LAST + 1)
+
+/* declare a link mode bitmap */
+#define __ETHTOOL_DECLARE_LINK_MODE_MASK(name)		\
+	DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS)
+
+/* drivers must ignore base.cmd and base.link_mode_masks_nwords
+ * fields, but they are allowed to overwrite them (will be ignored).
+ */
+struct ethtool_link_ksettings {
+	struct ethtool_link_settings base;
+	struct {
+		__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+		__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
+		__ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising);
+	} link_modes;
+};
+
+/**
+ * ethtool_link_ksettings_zero_link_mode - clear link_ksettings link mode mask
+ *   @ptr : pointer to struct ethtool_link_ksettings
+ *   @name : one of supported/advertising/lp_advertising
+ */
+#define ethtool_link_ksettings_zero_link_mode(ptr, name)		\
+	bitmap_zero((ptr)->link_modes.name, __ETHTOOL_LINK_MODE_MASK_NBITS)
+
+/**
+ * ethtool_link_ksettings_add_link_mode - set bit in link_ksettings
+ * link mode mask
+ *   @ptr : pointer to struct ethtool_link_ksettings
+ *   @name : one of supported/advertising/lp_advertising
+ *   @mode : one of the ETHTOOL_LINK_MODE_*_BIT
+ * (not atomic, no bound checking)
+ */
+#define ethtool_link_ksettings_add_link_mode(ptr, name, mode)		\
+	__set_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, (ptr)->link_modes.name)
+
+/**
+ * ethtool_link_ksettings_test_link_mode - test bit in ksettings link mode mask
+ *   @ptr : pointer to struct ethtool_link_ksettings
+ *   @name : one of supported/advertising/lp_advertising
+ *   @mode : one of the ETHTOOL_LINK_MODE_*_BIT
+ * (not atomic, no bound checking)
+ *
+ * Returns true/false.
+ */
+#define ethtool_link_ksettings_test_link_mode(ptr, name, mode)		\
+	test_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, (ptr)->link_modes.name)
+
+extern int
+__ethtool_get_link_ksettings(struct net_device *dev,
+			     struct ethtool_link_ksettings *link_ksettings);
+
+/* DEPRECATED, use __ethtool_get_link_ksettings */
+extern int __ethtool_get_settings(struct net_device *dev,
+				  struct ethtool_cmd *cmd);
+
 /**
  * struct ethtool_ops - optional netdev operations
- * @get_settings: Get various device settings including Ethernet link
+ * @get_settings: DEPRECATED, use %get_link_ksettings/%set_link_ksettings
+ *	API. Get various device settings including Ethernet link
  *	settings. The @cmd parameter is expected to have been cleared
- *	before get_settings is called. Returns a negative error code or
- *	zero.
- * @set_settings: Set various device settings including Ethernet link
+ *	before get_settings is called. Returns a negative error code
+ *	or zero.
+ * @set_settings: DEPRECATED, use %get_link_ksettings/%set_link_ksettings
+ *	API. Set various device settings including Ethernet link
  *	settings.  Returns a negative error code or zero.
  * @get_drvinfo: Report driver/device information.  Should only set the
  *	@driver, @version, @fw_version and @bus_info fields.  If not
@@ -211,6 +270,19 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  *	a TX queue has this number, return -EINVAL. If only a RX queue or a TX
  *	queue has this number, ignore the inapplicable fields.
  *	Returns a negative error code or zero.
+ * @get_link_ksettings: When defined, takes precedence over the
+ *	%get_settings method. Get various device settings
+ *	including Ethernet link settings. The %cmd and
+ *	%link_mode_masks_nwords fields should be ignored (use
+ *	%__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter), any
+ *	change to them will be overwritten by kernel. Returns a
+ *	negative error code or zero.
+ * @set_link_ksettings: When defined, takes precedence over the
+ *	%set_settings method. Set various device settings including
+ *	Ethernet link settings. The %cmd and %link_mode_masks_nwords
+ *	fields should be ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS
+ *	instead of the latter), any change to them will be overwritten
+ *	by kernel. Returns a negative error code or zero.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -293,6 +365,9 @@ struct ethtool_ops {
 					  struct ethtool_coalesce *);
 	int	(*set_per_queue_coalesce)(struct net_device *, u32,
 					  struct ethtool_coalesce *);
-
+	int	(*get_link_ksettings)(struct net_device *,
+				      struct ethtool_link_ksettings *);
+	int	(*set_link_ksettings)(struct net_device *,
+				      const struct ethtool_link_ksettings *);
 };
 #endif /* _LINUX_ETHTOOL_H */
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index f15ae02621a1..37fd6dc33de4 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -21,7 +21,8 @@
  */
 
 /**
- * struct ethtool_cmd - link control and status
+ * struct ethtool_cmd - DEPRECATED, link control and status
+ * This structure is DEPRECATED, please use struct ethtool_link_settings.
  * @cmd: Command number = %ETHTOOL_GSET or %ETHTOOL_SSET
  * @supported: Bitmask of %SUPPORTED_* flags for the link modes,
  *	physical connectors and other link features for which the
@@ -1219,8 +1220,12 @@ struct ethtool_per_queue_op {
 };
 
 /* CMDs currently supported */
-#define ETHTOOL_GSET		0x00000001 /* Get settings. */
-#define ETHTOOL_SSET		0x00000002 /* Set settings. */
+#define ETHTOOL_GSET		0x00000001 /* DEPRECATED, Get settings.
+					    * Please use ETHTOOL_GLINKSETTINGS
+					    */
+#define ETHTOOL_SSET		0x00000002 /* DEPRECATED, Set settings.
+					    * Please use ETHTOOL_SLINKSETTINGS
+					    */
 #define ETHTOOL_GDRVINFO	0x00000003 /* Get driver info. */
 #define ETHTOOL_GREGS		0x00000004 /* Get NIC registers. */
 #define ETHTOOL_GWOL		0x00000005 /* Get wake-on-lan options. */
@@ -1302,73 +1307,139 @@ struct ethtool_per_queue_op {
 
 #define ETHTOOL_PERQUEUE	0x0000004b /* Set per queue options */
 
+#define ETHTOOL_GLINKSETTINGS	0x0000004c /* Get ethtool_link_settings */
+#define ETHTOOL_SLINKSETTINGS	0x0000004d /* Set ethtool_link_settings */
+
+
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
 #define SPARC_ETH_SSET		ETHTOOL_SSET
 
-#define SUPPORTED_10baseT_Half		(1 << 0)
-#define SUPPORTED_10baseT_Full		(1 << 1)
-#define SUPPORTED_100baseT_Half		(1 << 2)
-#define SUPPORTED_100baseT_Full		(1 << 3)
-#define SUPPORTED_1000baseT_Half	(1 << 4)
-#define SUPPORTED_1000baseT_Full	(1 << 5)
-#define SUPPORTED_Autoneg		(1 << 6)
-#define SUPPORTED_TP			(1 << 7)
-#define SUPPORTED_AUI			(1 << 8)
-#define SUPPORTED_MII			(1 << 9)
-#define SUPPORTED_FIBRE			(1 << 10)
-#define SUPPORTED_BNC			(1 << 11)
-#define SUPPORTED_10000baseT_Full	(1 << 12)
-#define SUPPORTED_Pause			(1 << 13)
-#define SUPPORTED_Asym_Pause		(1 << 14)
-#define SUPPORTED_2500baseX_Full	(1 << 15)
-#define SUPPORTED_Backplane		(1 << 16)
-#define SUPPORTED_1000baseKX_Full	(1 << 17)
-#define SUPPORTED_10000baseKX4_Full	(1 << 18)
-#define SUPPORTED_10000baseKR_Full	(1 << 19)
-#define SUPPORTED_10000baseR_FEC	(1 << 20)
-#define SUPPORTED_20000baseMLD2_Full	(1 << 21)
-#define SUPPORTED_20000baseKR2_Full	(1 << 22)
-#define SUPPORTED_40000baseKR4_Full	(1 << 23)
-#define SUPPORTED_40000baseCR4_Full	(1 << 24)
-#define SUPPORTED_40000baseSR4_Full	(1 << 25)
-#define SUPPORTED_40000baseLR4_Full	(1 << 26)
-#define SUPPORTED_56000baseKR4_Full	(1 << 27)
-#define SUPPORTED_56000baseCR4_Full	(1 << 28)
-#define SUPPORTED_56000baseSR4_Full	(1 << 29)
-#define SUPPORTED_56000baseLR4_Full	(1 << 30)
-
-#define ADVERTISED_10baseT_Half		(1 << 0)
-#define ADVERTISED_10baseT_Full		(1 << 1)
-#define ADVERTISED_100baseT_Half	(1 << 2)
-#define ADVERTISED_100baseT_Full	(1 << 3)
-#define ADVERTISED_1000baseT_Half	(1 << 4)
-#define ADVERTISED_1000baseT_Full	(1 << 5)
-#define ADVERTISED_Autoneg		(1 << 6)
-#define ADVERTISED_TP			(1 << 7)
-#define ADVERTISED_AUI			(1 << 8)
-#define ADVERTISED_MII			(1 << 9)
-#define ADVERTISED_FIBRE		(1 << 10)
-#define ADVERTISED_BNC			(1 << 11)
-#define ADVERTISED_10000baseT_Full	(1 << 12)
-#define ADVERTISED_Pause		(1 << 13)
-#define ADVERTISED_Asym_Pause		(1 << 14)
-#define ADVERTISED_2500baseX_Full	(1 << 15)
-#define ADVERTISED_Backplane		(1 << 16)
-#define ADVERTISED_1000baseKX_Full	(1 << 17)
-#define ADVERTISED_10000baseKX4_Full	(1 << 18)
-#define ADVERTISED_10000baseKR_Full	(1 << 19)
-#define ADVERTISED_10000baseR_FEC	(1 << 20)
-#define ADVERTISED_20000baseMLD2_Full	(1 << 21)
-#define ADVERTISED_20000baseKR2_Full	(1 << 22)
-#define ADVERTISED_40000baseKR4_Full	(1 << 23)
-#define ADVERTISED_40000baseCR4_Full	(1 << 24)
-#define ADVERTISED_40000baseSR4_Full	(1 << 25)
-#define ADVERTISED_40000baseLR4_Full	(1 << 26)
-#define ADVERTISED_56000baseKR4_Full	(1 << 27)
-#define ADVERTISED_56000baseCR4_Full	(1 << 28)
-#define ADVERTISED_56000baseSR4_Full	(1 << 29)
-#define ADVERTISED_56000baseLR4_Full	(1 << 30)
+/* Link mode bit indices */
+enum ethtool_link_mode_bit_indices {
+	ETHTOOL_LINK_MODE_10baseT_Half_BIT	= 0,
+	ETHTOOL_LINK_MODE_10baseT_Full_BIT	= 1,
+	ETHTOOL_LINK_MODE_100baseT_Half_BIT	= 2,
+	ETHTOOL_LINK_MODE_100baseT_Full_BIT	= 3,
+	ETHTOOL_LINK_MODE_1000baseT_Half_BIT	= 4,
+	ETHTOOL_LINK_MODE_1000baseT_Full_BIT	= 5,
+	ETHTOOL_LINK_MODE_Autoneg_BIT		= 6,
+	ETHTOOL_LINK_MODE_TP_BIT		= 7,
+	ETHTOOL_LINK_MODE_AUI_BIT		= 8,
+	ETHTOOL_LINK_MODE_MII_BIT		= 9,
+	ETHTOOL_LINK_MODE_FIBRE_BIT		= 10,
+	ETHTOOL_LINK_MODE_BNC_BIT		= 11,
+	ETHTOOL_LINK_MODE_10000baseT_Full_BIT	= 12,
+	ETHTOOL_LINK_MODE_Pause_BIT		= 13,
+	ETHTOOL_LINK_MODE_Asym_Pause_BIT	= 14,
+	ETHTOOL_LINK_MODE_2500baseX_Full_BIT	= 15,
+	ETHTOOL_LINK_MODE_Backplane_BIT		= 16,
+	ETHTOOL_LINK_MODE_1000baseKX_Full_BIT	= 17,
+	ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT	= 18,
+	ETHTOOL_LINK_MODE_10000baseKR_Full_BIT	= 19,
+	ETHTOOL_LINK_MODE_10000baseR_FEC_BIT	= 20,
+	ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT = 21,
+	ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT	= 22,
+	ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT	= 23,
+	ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT	= 24,
+	ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT	= 25,
+	ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT	= 26,
+	ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT	= 27,
+	ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT	= 28,
+	ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT	= 29,
+	ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT	= 30,
+
+	/* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
+	 * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
+	 * macro for bits > 31. The only way to use indices > 31 is to
+	 * use the new ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API.
+	 */
+
+	__ETHTOOL_LINK_MODE_LAST
+	  = ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT,
+};
+
+#define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name)	\
+	(1UL << (ETHTOOL_LINK_MODE_ ## base_name ## _BIT))
+
+/* DEPRECATED macros. Please migrate to
+ * ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. Please do NOT
+ * define any new SUPPORTED_* macro for bits > 31.
+ */
+#define SUPPORTED_10baseT_Half		__ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Half)
+#define SUPPORTED_10baseT_Full		__ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Full)
+#define SUPPORTED_100baseT_Half		__ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Half)
+#define SUPPORTED_100baseT_Full		__ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Full)
+#define SUPPORTED_1000baseT_Half	__ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Half)
+#define SUPPORTED_1000baseT_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Full)
+#define SUPPORTED_Autoneg		__ETHTOOL_LINK_MODE_LEGACY_MASK(Autoneg)
+#define SUPPORTED_TP			__ETHTOOL_LINK_MODE_LEGACY_MASK(TP)
+#define SUPPORTED_AUI			__ETHTOOL_LINK_MODE_LEGACY_MASK(AUI)
+#define SUPPORTED_MII			__ETHTOOL_LINK_MODE_LEGACY_MASK(MII)
+#define SUPPORTED_FIBRE			__ETHTOOL_LINK_MODE_LEGACY_MASK(FIBRE)
+#define SUPPORTED_BNC			__ETHTOOL_LINK_MODE_LEGACY_MASK(BNC)
+#define SUPPORTED_10000baseT_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseT_Full)
+#define SUPPORTED_Pause			__ETHTOOL_LINK_MODE_LEGACY_MASK(Pause)
+#define SUPPORTED_Asym_Pause		__ETHTOOL_LINK_MODE_LEGACY_MASK(Asym_Pause)
+#define SUPPORTED_2500baseX_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(2500baseX_Full)
+#define SUPPORTED_Backplane		__ETHTOOL_LINK_MODE_LEGACY_MASK(Backplane)
+#define SUPPORTED_1000baseKX_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseKX_Full)
+#define SUPPORTED_10000baseKX4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKX4_Full)
+#define SUPPORTED_10000baseKR_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKR_Full)
+#define SUPPORTED_10000baseR_FEC	__ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseR_FEC)
+#define SUPPORTED_20000baseMLD2_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseMLD2_Full)
+#define SUPPORTED_20000baseKR2_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseKR2_Full)
+#define SUPPORTED_40000baseKR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseKR4_Full)
+#define SUPPORTED_40000baseCR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseCR4_Full)
+#define SUPPORTED_40000baseSR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseSR4_Full)
+#define SUPPORTED_40000baseLR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseLR4_Full)
+#define SUPPORTED_56000baseKR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseKR4_Full)
+#define SUPPORTED_56000baseCR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseCR4_Full)
+#define SUPPORTED_56000baseSR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseSR4_Full)
+#define SUPPORTED_56000baseLR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseLR4_Full)
+/* Please do not define any new SUPPORTED_* macro for bits > 31, see
+ * notice above.
+ */
+
+/*
+ * DEPRECATED macros. Please migrate to
+ * ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. Please do NOT
+ * define any new ADERTISE_* macro for bits > 31.
+ */
+#define ADVERTISED_10baseT_Half		__ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Half)
+#define ADVERTISED_10baseT_Full		__ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Full)
+#define ADVERTISED_100baseT_Half	__ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Half)
+#define ADVERTISED_100baseT_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Full)
+#define ADVERTISED_1000baseT_Half	__ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Half)
+#define ADVERTISED_1000baseT_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Full)
+#define ADVERTISED_Autoneg		__ETHTOOL_LINK_MODE_LEGACY_MASK(Autoneg)
+#define ADVERTISED_TP			__ETHTOOL_LINK_MODE_LEGACY_MASK(TP)
+#define ADVERTISED_AUI			__ETHTOOL_LINK_MODE_LEGACY_MASK(AUI)
+#define ADVERTISED_MII			__ETHTOOL_LINK_MODE_LEGACY_MASK(MII)
+#define ADVERTISED_FIBRE		__ETHTOOL_LINK_MODE_LEGACY_MASK(FIBRE)
+#define ADVERTISED_BNC			__ETHTOOL_LINK_MODE_LEGACY_MASK(BNC)
+#define ADVERTISED_10000baseT_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseT_Full)
+#define ADVERTISED_Pause		__ETHTOOL_LINK_MODE_LEGACY_MASK(Pause)
+#define ADVERTISED_Asym_Pause		__ETHTOOL_LINK_MODE_LEGACY_MASK(Asym_Pause)
+#define ADVERTISED_2500baseX_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(2500baseX_Full)
+#define ADVERTISED_Backplane		__ETHTOOL_LINK_MODE_LEGACY_MASK(Backplane)
+#define ADVERTISED_1000baseKX_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseKX_Full)
+#define ADVERTISED_10000baseKX4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKX4_Full)
+#define ADVERTISED_10000baseKR_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKR_Full)
+#define ADVERTISED_10000baseR_FEC	__ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseR_FEC)
+#define ADVERTISED_20000baseMLD2_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseMLD2_Full)
+#define ADVERTISED_20000baseKR2_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseKR2_Full)
+#define ADVERTISED_40000baseKR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseKR4_Full)
+#define ADVERTISED_40000baseCR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseCR4_Full)
+#define ADVERTISED_40000baseSR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseSR4_Full)
+#define ADVERTISED_40000baseLR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseLR4_Full)
+#define ADVERTISED_56000baseKR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseKR4_Full)
+#define ADVERTISED_56000baseCR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseCR4_Full)
+#define ADVERTISED_56000baseSR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseSR4_Full)
+#define ADVERTISED_56000baseLR4_Full	__ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseLR4_Full)
+/* Please do not define any new ADVERTISED_* macro for bits > 31, see
+ * notice above.
+ */
 
 /* The following are all involved in forcing a particular link
  * mode for the device for setting things.  When getting the
@@ -1533,4 +1604,123 @@ enum ethtool_reset_flags {
 };
 #define ETH_RESET_SHARED_SHIFT	16
 
+
+/**
+ * struct ethtool_link_settings - link control and status
+ *
+ * IMPORTANT, Backward compatibility notice: When implementing new
+ *	user-space tools, please first try %ETHTOOL_GLINKSETTINGS, and
+ *	if it succeeds use %ETHTOOL_SLINKSETTINGS to change link
+ *	settings; do not use %ETHTOOL_SSET if %ETHTOOL_GLINKSETTINGS
+ *	succeeded: stick to %ETHTOOL_GLINKSETTINGS/%SLINKSETTINGS in
+ *	that case.  Conversely, if %ETHTOOL_GLINKSETTINGS fails, use
+ *	%ETHTOOL_GSET to query and %ETHTOOL_SSET to change link
+ *	settings; do not use %ETHTOOL_SLINKSETTINGS if
+ *	%ETHTOOL_GLINKSETTINGS failed: stick to
+ *	%ETHTOOL_GSET/%ETHTOOL_SSET in that case.
+ *
+ * @cmd: Command number = %ETHTOOL_GLINKSETTINGS or %ETHTOOL_SLINKSETTINGS
+ * @speed: Link speed (Mbps)
+ * @duplex: Duplex mode; one of %DUPLEX_*
+ * @port: Physical connector type; one of %PORT_*
+ * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not
+ *	applicable.  For clause 45 PHYs this is the PRTAD.
+ * @autoneg: Enable/disable autonegotiation and auto-detection;
+ *	either %AUTONEG_DISABLE or %AUTONEG_ENABLE
+ * @mdio_support: Bitmask of %ETH_MDIO_SUPPORTS_* flags for the MDIO
+ *	protocols supported by the interface; 0 if unknown.
+ *	Read-only.
+ * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of
+ *	%ETH_TP_MDI_*.  If the status is unknown or not applicable, the
+ *	value will be %ETH_TP_MDI_INVALID.  Read-only.
+ * @eth_tp_mdix_ctrl: Ethernet twisted pair MDI(-X) control; one of
+ *	%ETH_TP_MDI_*.  If MDI(-X) control is not implemented, reads
+ *	yield %ETH_TP_MDI_INVALID and writes may be ignored or rejected.
+ *	When written successfully, the link should be renegotiated if
+ *	necessary.
+ * @link_mode_masks_nwords: Number of 32-bit words for each of the
+ *	supported, advertising, lp_advertising link mode bitmaps. For
+ *	%ETHTOOL_GLINKSETTINGS: on entry, number of words passed by user
+ *	(>= 0); on return, if handshake in progress, negative if
+ *	request size unsupported by kernel: absolute value indicates
+ *	kernel recommended size and cmd field is 0, as well as all the
+ *	other fields; otherwise (handshake completed), strictly
+ *	positive to indicate size used by kernel and cmd field is
+ *	%ETHTOOL_GLINKSETTINGS, all other fields populated by driver. For
+ *	%ETHTOOL_SLINKSETTINGS: must be valid on entry, ie. a positive
+ *	value returned previously by %ETHTOOL_GLINKSETTINGS, otherwise
+ *	refused. For drivers: ignore this field (use kernel's
+ *	__ETHTOOL_LINK_MODE_MASK_NBITS instead), any change to it will
+ *	be overwritten by kernel.
+ * @supported: Bitmap with each bit meaning given by
+ *	%ethtool_link_mode_bit_indices for the link modes, physical
+ *	connectors and other link features for which the interface
+ *	supports autonegotiation or auto-detection.  Read-only.
+ * @advertising: Bitmap with each bit meaning given by
+ *	%ethtool_link_mode_bit_indices for the link modes, physical
+ *	connectors and other link features that are advertised through
+ *	autonegotiation or enabled for auto-detection.
+ * @lp_advertising: Bitmap with each bit meaning given by
+ *	%ethtool_link_mode_bit_indices for the link modes, and other
+ *	link features that the link partner advertised through
+ *	autonegotiation; 0 if unknown or not applicable.  Read-only.
+ *
+ * If autonegotiation is disabled, the speed and @duplex represent the
+ * fixed link mode and are writable if the driver supports multiple
+ * link modes.  If it is enabled then they are read-only; if the link
+ * is up they represent the negotiated link mode; if the link is down,
+ * the speed is 0, %SPEED_UNKNOWN or the highest enabled speed and
+ * @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode.
+ *
+ * Some hardware interfaces may have multiple PHYs and/or physical
+ * connectors fitted or do not allow the driver to detect which are
+ * fitted.  For these interfaces @port and/or @phy_address may be
+ * writable, possibly dependent on @autoneg being %AUTONEG_DISABLE.
+ * Otherwise, attempts to write different values may be ignored or
+ * rejected.
+ *
+ * Deprecated %ethtool_cmd fields transceiver, maxtxpkt and maxrxpkt
+ * are not available in %ethtool_link_settings. Until all drivers are
+ * converted to ignore them or to the new %ethtool_link_settings API,
+ * for both queries and changes, users should always try
+ * %ETHTOOL_GLINKSETTINGS first, and if it fails with -ENOTSUPP stick
+ * only to %ETHTOOL_GSET and %ETHTOOL_SSET consistently. If it
+ * succeeds, then users should stick to %ETHTOOL_GLINKSETTINGS and
+ * %ETHTOOL_SLINKSETTINGS (which would support drivers implementing
+ * either %ethtool_cmd or %ethtool_link_settings).
+ *
+ * Users should assume that all fields not marked read-only are
+ * writable and subject to validation by the driver.  They should use
+ * %ETHTOOL_GLINKSETTINGS to get the current values before making specific
+ * changes and then applying them with %ETHTOOL_SLINKSETTINGS.
+ *
+ * Drivers that implement %get_link_ksettings and/or
+ * %set_link_ksettings should ignore the @cmd
+ * and @link_mode_masks_nwords fields (any change to them overwritten
+ * by kernel), and rely only on kernel's internal
+ * %__ETHTOOL_LINK_MODE_MASK_NBITS and
+ * %ethtool_link_mode_mask_t. Drivers that implement
+ * %set_link_ksettings() should validate all fields other than @cmd
+ * and @link_mode_masks_nwords that are not described as read-only or
+ * deprecated, and must ignore all fields described as read-only.
+ */
+struct ethtool_link_settings {
+	__u32	cmd;
+	__u32	speed;
+	__u8	duplex;
+	__u8	port;
+	__u8	phy_address;
+	__u8	autoneg;
+	__u8	mdio_support;
+	__u8	eth_tp_mdix;
+	__u8	eth_tp_mdix_ctrl;
+	__s8	link_mode_masks_nwords;
+	__u32	reserved[8];
+	__u32	link_mode_masks[0];
+	/* layout of link_mode_masks fields:
+	 * __u32 map_supported[link_mode_masks_nwords];
+	 * __u32 map_advertising[link_mode_masks_nwords];
+	 * __u32 map_lp_advertising[link_mode_masks_nwords];
+	 */
+};
 #endif /* _UAPI_LINUX_ETHTOOL_H */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 2406101002b1..edcec56ed228 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -387,6 +387,359 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
 	return 0;
 }
 
+static void convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32)
+{
+	bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	dst[0] = legacy_u32;
+}
+
+/* return false if src had higher bits set. lower bits always updated. */
+static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32,
+					    const unsigned long *src)
+{
+	bool retval = true;
+
+	/* TODO: following test will soon always be true */
+	if (__ETHTOOL_LINK_MODE_MASK_NBITS > 32) {
+		__ETHTOOL_DECLARE_LINK_MODE_MASK(ext);
+
+		bitmap_zero(ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
+		bitmap_fill(ext, 32);
+		bitmap_complement(ext, ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
+		if (bitmap_intersects(ext, src,
+				      __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+			/* src mask goes beyond bit 31 */
+			retval = false;
+		}
+	}
+	*legacy_u32 = src[0];
+	return retval;
+}
+
+/* return false if legacy contained non-0 deprecated fields
+ * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated
+ */
+static bool
+convert_legacy_settings_to_link_ksettings(
+	struct ethtool_link_ksettings *link_ksettings,
+	const struct ethtool_cmd *legacy_settings)
+{
+	bool retval = true;
+
+	memset(link_ksettings, 0, sizeof(*link_ksettings));
+
+	/* This is used to tell users that driver is still using these
+	 * deprecated legacy fields, and they should not use
+	 * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
+	 */
+	if (legacy_settings->transceiver ||
+	    legacy_settings->maxtxpkt ||
+	    legacy_settings->maxrxpkt)
+		retval = false;
+
+	convert_legacy_u32_to_link_mode(
+		link_ksettings->link_modes.supported,
+		legacy_settings->supported);
+	convert_legacy_u32_to_link_mode(
+		link_ksettings->link_modes.advertising,
+		legacy_settings->advertising);
+	convert_legacy_u32_to_link_mode(
+		link_ksettings->link_modes.lp_advertising,
+		legacy_settings->lp_advertising);
+	link_ksettings->base.speed
+		= ethtool_cmd_speed(legacy_settings);
+	link_ksettings->base.duplex
+		= legacy_settings->duplex;
+	link_ksettings->base.port
+		= legacy_settings->port;
+	link_ksettings->base.phy_address
+		= legacy_settings->phy_address;
+	link_ksettings->base.autoneg
+		= legacy_settings->autoneg;
+	link_ksettings->base.mdio_support
+		= legacy_settings->mdio_support;
+	link_ksettings->base.eth_tp_mdix
+		= legacy_settings->eth_tp_mdix;
+	link_ksettings->base.eth_tp_mdix_ctrl
+		= legacy_settings->eth_tp_mdix_ctrl;
+	return retval;
+}
+
+/* return false if ksettings link modes had higher bits
+ * set. legacy_settings always updated (best effort)
+ */
+static bool
+convert_link_ksettings_to_legacy_settings(
+	struct ethtool_cmd *legacy_settings,
+	const struct ethtool_link_ksettings *link_ksettings)
+{
+	bool retval = true;
+
+	memset(legacy_settings, 0, sizeof(*legacy_settings));
+	/* this also clears the deprecated fields in legacy structure:
+	 * __u8		transceiver;
+	 * __u32	maxtxpkt;
+	 * __u32	maxrxpkt;
+	 */
+
+	retval &= convert_link_mode_to_legacy_u32(
+		&legacy_settings->supported,
+		link_ksettings->link_modes.supported);
+	retval &= convert_link_mode_to_legacy_u32(
+		&legacy_settings->advertising,
+		link_ksettings->link_modes.advertising);
+	retval &= convert_link_mode_to_legacy_u32(
+		&legacy_settings->lp_advertising,
+		link_ksettings->link_modes.lp_advertising);
+	ethtool_cmd_speed_set(legacy_settings, link_ksettings->base.speed);
+	legacy_settings->duplex
+		= link_ksettings->base.duplex;
+	legacy_settings->port
+		= link_ksettings->base.port;
+	legacy_settings->phy_address
+		= link_ksettings->base.phy_address;
+	legacy_settings->autoneg
+		= link_ksettings->base.autoneg;
+	legacy_settings->mdio_support
+		= link_ksettings->base.mdio_support;
+	legacy_settings->eth_tp_mdix
+		= link_ksettings->base.eth_tp_mdix;
+	legacy_settings->eth_tp_mdix_ctrl
+		= link_ksettings->base.eth_tp_mdix_ctrl;
+	return retval;
+}
+
+/* number of 32-bit words to store the user's link mode bitmaps */
+#define __ETHTOOL_LINK_MODE_MASK_NU32			\
+	DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32)
+
+/* layout of the struct passed from/to userland */
+struct ethtool_link_usettings {
+	struct ethtool_link_settings base;
+	struct {
+		__u32 supported[__ETHTOOL_LINK_MODE_MASK_NU32];
+		__u32 advertising[__ETHTOOL_LINK_MODE_MASK_NU32];
+		__u32 lp_advertising[__ETHTOOL_LINK_MODE_MASK_NU32];
+	} link_modes;
+};
+
+/* Internal kernel helper to query a device ethtool_link_settings.
+ *
+ * Backward compatibility note: for compatibility with legacy drivers
+ * that implement only the ethtool_cmd API, this has to work with both
+ * drivers implementing get_link_ksettings API and drivers
+ * implementing get_settings API. When drivers implement get_settings
+ * and report ethtool_cmd deprecated fields
+ * (transceiver/maxrxpkt/maxtxpkt), these fields are silently ignored
+ * because the resulting struct ethtool_link_settings does not report them.
+ */
+int __ethtool_get_link_ksettings(struct net_device *dev,
+				 struct ethtool_link_ksettings *link_ksettings)
+{
+	int err;
+	struct ethtool_cmd cmd;
+
+	ASSERT_RTNL();
+
+	if (dev->ethtool_ops->get_link_ksettings) {
+		memset(link_ksettings, 0, sizeof(*link_ksettings));
+		return dev->ethtool_ops->get_link_ksettings(dev,
+							    link_ksettings);
+	}
+
+	/* driver doesn't support %ethtool_link_ksettings API. revert to
+	 * legacy %ethtool_cmd API, unless it's not supported either.
+	 * TODO: remove when ethtool_ops::get_settings disappears internally
+	 */
+	err = __ethtool_get_settings(dev, &cmd);
+	if (err < 0)
+		return err;
+
+	/* we ignore deprecated fields transceiver/maxrxpkt/maxtxpkt
+	 */
+	convert_legacy_settings_to_link_ksettings(link_ksettings, &cmd);
+	return err;
+}
+EXPORT_SYMBOL(__ethtool_get_link_ksettings);
+
+/* convert ethtool_link_usettings in user space to a kernel internal
+ * ethtool_link_ksettings. return 0 on success, errno on error.
+ */
+static int load_link_ksettings_from_user(struct ethtool_link_ksettings *to,
+					 const void __user *from)
+{
+	struct ethtool_link_usettings link_usettings;
+
+	if (copy_from_user(&link_usettings, from, sizeof(link_usettings)))
+		return -EFAULT;
+
+	memcpy(&to->base, &link_usettings.base, sizeof(to->base));
+	bitmap_from_u32array(to->link_modes.supported,
+			     __ETHTOOL_LINK_MODE_MASK_NBITS,
+			     link_usettings.link_modes.supported,
+			     __ETHTOOL_LINK_MODE_MASK_NU32);
+	bitmap_from_u32array(to->link_modes.advertising,
+			     __ETHTOOL_LINK_MODE_MASK_NBITS,
+			     link_usettings.link_modes.advertising,
+			     __ETHTOOL_LINK_MODE_MASK_NU32);
+	bitmap_from_u32array(to->link_modes.lp_advertising,
+			     __ETHTOOL_LINK_MODE_MASK_NBITS,
+			     link_usettings.link_modes.lp_advertising,
+			     __ETHTOOL_LINK_MODE_MASK_NU32);
+
+	return 0;
+}
+
+/* convert a kernel internal ethtool_link_ksettings to
+ * ethtool_link_usettings in user space. return 0 on success, errno on
+ * error.
+ */
+static int
+store_link_ksettings_for_user(void __user *to,
+			      const struct ethtool_link_ksettings *from)
+{
+	struct ethtool_link_usettings link_usettings;
+
+	memcpy(&link_usettings.base, &from->base, sizeof(link_usettings));
+	bitmap_to_u32array(link_usettings.link_modes.supported,
+			   __ETHTOOL_LINK_MODE_MASK_NU32,
+			   from->link_modes.supported,
+			   __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_to_u32array(link_usettings.link_modes.advertising,
+			   __ETHTOOL_LINK_MODE_MASK_NU32,
+			   from->link_modes.advertising,
+			   __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_to_u32array(link_usettings.link_modes.lp_advertising,
+			   __ETHTOOL_LINK_MODE_MASK_NU32,
+			   from->link_modes.lp_advertising,
+			   __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+	if (copy_to_user(to, &link_usettings, sizeof(link_usettings)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/* Query device for its ethtool_link_settings.
+ *
+ * Backward compatibility note: this function must fail when driver
+ * does not implement ethtool::get_link_ksettings, even if legacy
+ * ethtool_ops::get_settings is implemented. This tells new versions
+ * of ethtool that they should use the legacy API %ETHTOOL_GSET for
+ * this driver, so that they can correctly access the ethtool_cmd
+ * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
+ * implements ethtool_ops::get_settings anymore.
+ */
+static int ethtool_get_link_ksettings(struct net_device *dev,
+				      void __user *useraddr)
+{
+	int err = 0;
+	struct ethtool_link_ksettings link_ksettings;
+
+	ASSERT_RTNL();
+
+	if (!dev->ethtool_ops->get_link_ksettings)
+		return -EOPNOTSUPP;
+
+	/* handle bitmap nbits handshake */
+	if (copy_from_user(&link_ksettings.base, useraddr,
+			   sizeof(link_ksettings.base)))
+		return -EFAULT;
+
+	if (__ETHTOOL_LINK_MODE_MASK_NU32
+	    != link_ksettings.base.link_mode_masks_nwords) {
+		/* wrong link mode nbits requested */
+		memset(&link_ksettings, 0, sizeof(link_ksettings));
+		/* keep cmd field reset to 0 */
+		/* send back number of words required as negative val */
+		compiletime_assert(__ETHTOOL_LINK_MODE_MASK_NU32 <= S8_MAX,
+				   "need too many bits for link modes!");
+		link_ksettings.base.link_mode_masks_nwords
+			= -((s8)__ETHTOOL_LINK_MODE_MASK_NU32);
+
+		/* copy the base fields back to user, not the link
+		 * mode bitmaps
+		 */
+		if (copy_to_user(useraddr, &link_ksettings.base,
+				 sizeof(link_ksettings.base)))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	/* handshake successful: user/kernel agree on
+	 * link_mode_masks_nwords
+	 */
+
+	memset(&link_ksettings, 0, sizeof(link_ksettings));
+	err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings);
+	if (err < 0)
+		return err;
+
+	/* make sure we tell the right values to user */
+	link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS;
+	link_ksettings.base.link_mode_masks_nwords
+		= __ETHTOOL_LINK_MODE_MASK_NU32;
+
+	return store_link_ksettings_for_user(useraddr, &link_ksettings);
+}
+
+/* Update device ethtool_link_settings.
+ *
+ * Backward compatibility note: this function must fail when driver
+ * does not implement ethtool::set_link_ksettings, even if legacy
+ * ethtool_ops::set_settings is implemented. This tells new versions
+ * of ethtool that they should use the legacy API %ETHTOOL_SSET for
+ * this driver, so that they can correctly update the ethtool_cmd
+ * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
+ * implements ethtool_ops::get_settings anymore.
+ */
+static int ethtool_set_link_ksettings(struct net_device *dev,
+				      void __user *useraddr)
+{
+	int err;
+	struct ethtool_link_ksettings link_ksettings;
+
+	ASSERT_RTNL();
+
+	if (!dev->ethtool_ops->set_link_ksettings)
+		return -EOPNOTSUPP;
+
+	/* make sure nbits field has expected value */
+	if (copy_from_user(&link_ksettings.base, useraddr,
+			   sizeof(link_ksettings.base)))
+		return -EFAULT;
+
+	if (__ETHTOOL_LINK_MODE_MASK_NU32
+	    != link_ksettings.base.link_mode_masks_nwords)
+		return -EINVAL;
+
+	/* copy the whole structure, now that we know it has expected
+	 * format
+	 */
+	err = load_link_ksettings_from_user(&link_ksettings, useraddr);
+	if (err)
+		return err;
+
+	/* re-check nwords field, just in case */
+	if (__ETHTOOL_LINK_MODE_MASK_NU32
+	    != link_ksettings.base.link_mode_masks_nwords)
+		return -EINVAL;
+
+	return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
+}
+
+/* Internal kernel helper to query a device ethtool_cmd settings.
+ *
+ * Note about transition to ethtool_link_settings API: We do not need
+ * (or want) this function to support "dev" instances that implement
+ * the ethtool_link_settings API as we will update the drivers calling
+ * this function to call __ethtool_get_link_ksettings instead, before
+ * the first drivers implement ethtool_ops::get_link_ksettings.
+ *
+ * TODO 1: at least make this function static when no driver is using it
+ * TODO 2: remove when ethtool_ops::get_settings disappears internally
+ */
 int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	ASSERT_RTNL();
@@ -400,30 +753,112 @@ int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 }
 EXPORT_SYMBOL(__ethtool_get_settings);
 
+static void
+warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
+{
+	char name[sizeof(current->comm)];
+
+	pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
+		     get_task_comm(name, current), details);
+}
+
+/* Query device for its ethtool_cmd settings.
+ *
+ * Backward compatibility note: for compatibility with legacy ethtool,
+ * this has to work with both drivers implementing get_link_ksettings
+ * API and drivers implementing get_settings API. When drivers
+ * implement get_link_ksettings and report higher link mode bits, a
+ * kernel warning is logged once (with name of 1st driver/device) to
+ * recommend user to upgrade ethtool, but the command is successful
+ * (only the lower link mode bits reported back to user).
+ */
 static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
 {
-	int err;
 	struct ethtool_cmd cmd;
 
-	err = __ethtool_get_settings(dev, &cmd);
-	if (err < 0)
-		return err;
+	ASSERT_RTNL();
+
+	if (dev->ethtool_ops->get_link_ksettings) {
+		/* First, use link_ksettings API if it is supported */
+		int err;
+		struct ethtool_link_ksettings link_ksettings;
+
+		memset(&link_ksettings, 0, sizeof(link_ksettings));
+		err = dev->ethtool_ops->get_link_ksettings(dev,
+							   &link_ksettings);
+		if (err < 0)
+			return err;
+		if (!convert_link_ksettings_to_legacy_settings(&cmd,
+							       &link_ksettings))
+			warn_incomplete_ethtool_legacy_settings_conversion(
+				"link modes are only partially reported");
+
+		/* send a sensible cmd tag back to user */
+		cmd.cmd = ETHTOOL_GSET;
+	} else {
+		int err;
+		/* TODO: return -EOPNOTSUPP when
+		 * ethtool_ops::get_settings disappears internally
+		 */
+
+		/* driver doesn't support %ethtool_link_ksettings
+		 * API. revert to legacy %ethtool_cmd API, unless it's
+		 * not supported either.
+		 */
+		err = __ethtool_get_settings(dev, &cmd);
+		if (err < 0)
+			return err;
+	}
 
 	if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
 		return -EFAULT;
+
 	return 0;
 }
 
+/* Update device link settings with given ethtool_cmd.
+ *
+ * Backward compatibility note: for compatibility with legacy ethtool,
+ * this has to work with both drivers implementing set_link_ksettings
+ * API and drivers implementing set_settings API. When drivers
+ * implement set_link_ksettings and user's request updates deprecated
+ * ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel
+ * warning is logged once (with name of 1st driver/device) to
+ * recommend user to upgrade ethtool, and the request is rejected.
+ */
 static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_cmd cmd;
 
-	if (!dev->ethtool_ops->set_settings)
-		return -EOPNOTSUPP;
+	ASSERT_RTNL();
 
 	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
 		return -EFAULT;
 
+	/* first, try new %ethtool_link_ksettings API. */
+	if (dev->ethtool_ops->set_link_ksettings) {
+		struct ethtool_link_ksettings link_ksettings;
+
+		if (!convert_legacy_settings_to_link_ksettings(&link_ksettings,
+							       &cmd))
+			return -EINVAL;
+
+		link_ksettings.base.cmd = ETHTOOL_SLINKSETTINGS;
+		link_ksettings.base.link_mode_masks_nwords
+			= __ETHTOOL_LINK_MODE_MASK_NU32;
+		return dev->ethtool_ops->set_link_ksettings(dev,
+							    &link_ksettings);
+	}
+
+	/* legacy %ethtool_cmd API */
+
+	/* TODO: return -EOPNOTSUPP when ethtool_ops::get_settings
+	 * disappears internally
+	 */
+
+	if (!dev->ethtool_ops->set_settings)
+		return -EOPNOTSUPP;
+
 	return dev->ethtool_ops->set_settings(dev, &cmd);
 }
 
@@ -2252,6 +2687,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_PERQUEUE:
 		rc = ethtool_set_per_queue(dev, useraddr);
 		break;
+	case ETHTOOL_GLINKSETTINGS:
+		rc = ethtool_get_link_ksettings(dev, useraddr);
+		break;
+	case ETHTOOL_SLINKSETTINGS:
+		rc = ethtool_set_link_ksettings(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From 3237fc63a3297d472a8cec33cb914f20570cfc23 Mon Sep 17 00:00:00 2001
From: David Decotigny <decot@googlers.com>
Date: Wed, 24 Feb 2016 10:58:11 -0800
Subject: net: ethtool: remove unused __ethtool_get_settings

replaced by __ethtool_get_link_ksettings.

Signed-off-by: David Decotigny <decot@googlers.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  4 ----
 net/core/ethtool.c      | 45 ++++++++++++++-------------------------------
 2 files changed, 14 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 8a400a54c92e..e2b7bf27c03e 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -150,10 +150,6 @@ extern int
 __ethtool_get_link_ksettings(struct net_device *dev,
 			     struct ethtool_link_ksettings *link_ksettings);
 
-/* DEPRECATED, use __ethtool_get_link_ksettings */
-extern int __ethtool_get_settings(struct net_device *dev,
-				  struct ethtool_cmd *cmd);
-
 /**
  * struct ethtool_ops - optional netdev operations
  * @get_settings: DEPRECATED, use %get_link_ksettings/%set_link_ksettings
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index edcec56ed228..2966cd0d7c93 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -551,7 +551,12 @@ int __ethtool_get_link_ksettings(struct net_device *dev,
 	 * legacy %ethtool_cmd API, unless it's not supported either.
 	 * TODO: remove when ethtool_ops::get_settings disappears internally
 	 */
-	err = __ethtool_get_settings(dev, &cmd);
+	if (!dev->ethtool_ops->get_settings)
+		return -EOPNOTSUPP;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.cmd = ETHTOOL_GSET;
+	err = dev->ethtool_ops->get_settings(dev, &cmd);
 	if (err < 0)
 		return err;
 
@@ -729,30 +734,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
 	return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
 }
 
-/* Internal kernel helper to query a device ethtool_cmd settings.
- *
- * Note about transition to ethtool_link_settings API: We do not need
- * (or want) this function to support "dev" instances that implement
- * the ethtool_link_settings API as we will update the drivers calling
- * this function to call __ethtool_get_link_ksettings instead, before
- * the first drivers implement ethtool_ops::get_link_ksettings.
- *
- * TODO 1: at least make this function static when no driver is using it
- * TODO 2: remove when ethtool_ops::get_settings disappears internally
- */
-int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	ASSERT_RTNL();
-
-	if (!dev->ethtool_ops->get_settings)
-		return -EOPNOTSUPP;
-
-	memset(cmd, 0, sizeof(struct ethtool_cmd));
-	cmd->cmd = ETHTOOL_GSET;
-	return dev->ethtool_ops->get_settings(dev, cmd);
-}
-EXPORT_SYMBOL(__ethtool_get_settings);
-
 static void
 warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
 {
@@ -796,16 +777,18 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
 		/* send a sensible cmd tag back to user */
 		cmd.cmd = ETHTOOL_GSET;
 	} else {
-		int err;
-		/* TODO: return -EOPNOTSUPP when
-		 * ethtool_ops::get_settings disappears internally
-		 */
-
 		/* driver doesn't support %ethtool_link_ksettings
 		 * API. revert to legacy %ethtool_cmd API, unless it's
 		 * not supported either.
 		 */
-		err = __ethtool_get_settings(dev, &cmd);
+		int err;
+
+		if (!dev->ethtool_ops->get_settings)
+			return -EOPNOTSUPP;
+
+		memset(&cmd, 0, sizeof(cmd));
+		cmd.cmd = ETHTOOL_GSET;
+		err = dev->ethtool_ops->get_settings(dev, &cmd);
 		if (err < 0)
 			return err;
 	}
-- 
cgit v1.2.3


From cb77659272af51e80099c93b0e65abbf43bdaad5 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin <dmitry.eremin@intel.com>
Date: Sun, 14 Feb 2016 19:13:52 -0500
Subject: staging/lustre: proper support of NFS anonymous dentries

NFS can ask to encode dentries that are not connected to the root.
The fix check for parent is NULL and encode a file handle accordingly.

Reviewed-on: http://review.whamcloud.com/8347
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-4231
Reviewed-by: Fan Yong <fan.yong@intel.com>
Reviewed-by: James Simmons <uja.ornl@gmail.com>
Reviewed-by: Jian Yu <jian.yu@intel.com>
Signed-off-by: Dmitry Eremin <dmitry.eremin@intel.com>
Signed-off-by: Oleg Drokin <green@linuxhacker.ru>
Acked-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/lustre/lustre/llite/llite_nfs.c | 30 ++++++++++++++-----------
 include/linux/exportfs.h                        |  6 +++++
 2 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
index 248441215794..74a88685e9dd 100644
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c
@@ -142,10 +142,11 @@ ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *paren
 	struct inode  *inode;
 	struct dentry *result;
 
-	CDEBUG(D_INFO, "Get dentry for fid: "DFID"\n", PFID(fid));
 	if (!fid_is_sane(fid))
 		return ERR_PTR(-ESTALE);
 
+	CDEBUG(D_INFO, "Get dentry for fid: " DFID "\n", PFID(fid));
+
 	inode = search_inode_for_lustre(sb, fid);
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
@@ -161,7 +162,7 @@ ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *paren
 	 * We have to find the parent to tell MDS how to init lov objects.
 	 */
 	if (S_ISREG(inode->i_mode) && !ll_i2info(inode)->lli_has_smd &&
-	    parent != NULL) {
+	    parent && !fid_is_zero(parent)) {
 		struct ll_inode_info *lli = ll_i2info(inode);
 
 		spin_lock(&lli->lli_lock);
@@ -175,8 +176,6 @@ ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *paren
 	return result;
 }
 
-#define LUSTRE_NFS_FID	  0x97
-
 /**
  * \a connectable - is nfsd will connect himself or this should be done
  *		  at lustre
@@ -189,20 +188,25 @@ ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *paren
 static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen,
 			struct inode *parent)
 {
+	int fileid_len = sizeof(struct lustre_nfs_fid) / 4;
 	struct lustre_nfs_fid *nfs_fid = (void *)fh;
 
 	CDEBUG(D_INFO, "encoding for (%lu,"DFID") maxlen=%d minlen=%d\n",
-	      inode->i_ino, PFID(ll_inode2fid(inode)), *plen,
-	      (int)sizeof(struct lustre_nfs_fid));
+	      inode->i_ino, PFID(ll_inode2fid(inode)), *plen, fileid_len);
 
-	if (*plen < sizeof(struct lustre_nfs_fid) / 4)
-		return 255;
+	if (*plen < fileid_len) {
+		*plen = fileid_len;
+		return FILEID_INVALID;
+	}
 
 	nfs_fid->lnf_child = *ll_inode2fid(inode);
-	nfs_fid->lnf_parent = *ll_inode2fid(parent);
-	*plen = sizeof(struct lustre_nfs_fid) / 4;
+	if (parent)
+		nfs_fid->lnf_parent = *ll_inode2fid(parent);
+	else
+		fid_zero(&nfs_fid->lnf_parent);
+	*plen = fileid_len;
 
-	return LUSTRE_NFS_FID;
+	return FILEID_LUSTRE;
 }
 
 static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name,
@@ -261,7 +265,7 @@ static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid,
 {
 	struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
 
-	if (fh_type != LUSTRE_NFS_FID)
+	if (fh_type != FILEID_LUSTRE)
 		return ERR_PTR(-EPROTO);
 
 	return ll_iget_for_nfs(sb, &nfs_fid->lnf_child, &nfs_fid->lnf_parent);
@@ -272,7 +276,7 @@ static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
 {
 	struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
 
-	if (fh_type != LUSTRE_NFS_FID)
+	if (fh_type != FILEID_LUSTRE)
 		return ERR_PTR(-EPROTO);
 
 	return ll_iget_for_nfs(sb, &nfs_fid->lnf_parent, NULL);
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index fa05e04c5531..d8414502edb4 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -96,6 +96,12 @@ enum fid_type {
 	 */
 	FILEID_FAT_WITH_PARENT = 0x72,
 
+	/*
+	 * 128 bit child FID (struct lu_fid)
+	 * 128 bit parent FID (struct lu_fid)
+	 */
+	FILEID_LUSTRE = 0x97,
+
 	/*
 	 * Filesystems must not use 0xff file ID.
 	 */
-- 
cgit v1.2.3


From 2f9ba65d9d79311119470d9a5280b335c2fb023c Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 9 Feb 2016 19:56:11 +0100
Subject: fbdev: kill fb_rotate

The fb_rotate method in struct fb_ops is never actually invoked, and
it's been that way in the entire history of git (in fact, the last
occurrence of the string '->fb_rotate' vanished over 10 years ago,
with b4d8aea6d6, and that merely tested whether the callback
existed). So remove some dead code and make struct fb_obs a little
smaller.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/fbdev/atafb.c            |  3 ---
 drivers/video/fbdev/au1100fb.c         | 22 ----------------------
 drivers/video/fbdev/bf537-lq035.c      | 23 -----------------------
 drivers/video/fbdev/omap/omapfb_main.c | 22 ----------------------
 drivers/video/fbdev/skeletonfb.c       | 17 -----------------
 include/linux/fb.h                     |  3 ---
 6 files changed, 90 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/atafb.c b/drivers/video/fbdev/atafb.c
index d6ce613e12ad..fcd2dd670a65 100644
--- a/drivers/video/fbdev/atafb.c
+++ b/drivers/video/fbdev/atafb.c
@@ -313,9 +313,6 @@ extern unsigned char fontdata_8x16[];
  *	* Draws cursor *
  *	int (*fb_cursor) (struct fb_info *info, struct fb_cursor *cursor);
  *
- *	* Rotates the display *
- *	void (*fb_rotate)(struct fb_info *info, int angle);
- *
  *	* wait for blit idle, optional *
  *	int (*fb_sync)(struct fb_info *info);
  *
diff --git a/drivers/video/fbdev/au1100fb.c b/drivers/video/fbdev/au1100fb.c
index 59560189b24a..35df2c1a8a63 100644
--- a/drivers/video/fbdev/au1100fb.c
+++ b/drivers/video/fbdev/au1100fb.c
@@ -334,27 +334,6 @@ int au1100fb_fb_pan_display(struct fb_var_screeninfo *var, struct fb_info *fbi)
 	return 0;
 }
 
-/* fb_rotate
- * Rotate the display of this angle. This doesn't seems to be used by the core,
- * but as our hardware supports it, so why not implementing it...
- */
-void au1100fb_fb_rotate(struct fb_info *fbi, int angle)
-{
-	struct au1100fb_device *fbdev = to_au1100fb_device(fbi);
-
-	print_dbg("fb_rotate %p %d", fbi, angle);
-
-	if (fbdev && (angle > 0) && !(angle % 90)) {
-
-		fbdev->regs->lcd_control &= ~LCD_CONTROL_GO;
-
-		fbdev->regs->lcd_control &= ~(LCD_CONTROL_SM_MASK);
-		fbdev->regs->lcd_control |= ((angle/90) << LCD_CONTROL_SM_BIT);
-
-		fbdev->regs->lcd_control |= LCD_CONTROL_GO;
-	}
-}
-
 /* fb_mmap
  * Map video memory in user space. We don't use the generic fb_mmap method mainly
  * to allow the use of the TLB streaming flag (CCA=6)
@@ -380,7 +359,6 @@ static struct fb_ops au1100fb_ops =
 	.fb_fillrect		= cfb_fillrect,
 	.fb_copyarea		= cfb_copyarea,
 	.fb_imageblit		= cfb_imageblit,
-	.fb_rotate		= au1100fb_fb_rotate,
 	.fb_mmap		= au1100fb_fb_mmap,
 };
 
diff --git a/drivers/video/fbdev/bf537-lq035.c b/drivers/video/fbdev/bf537-lq035.c
index 7db3052b471d..ef29fb425122 100644
--- a/drivers/video/fbdev/bf537-lq035.c
+++ b/drivers/video/fbdev/bf537-lq035.c
@@ -554,28 +554,6 @@ static int bfin_lq035_fb_check_var(struct fb_var_screeninfo *var,
 	return 0;
 }
 
-/* fb_rotate
- * Rotate the display of this angle. This doesn't seems to be used by the core,
- * but as our hardware supports it, so why not implementing it...
- */
-static void bfin_lq035_fb_rotate(struct fb_info *fbi, int angle)
-{
-	pr_debug("%s: %p %d", __func__, fbi, angle);
-#if (defined(UD) && defined(LBR))
-	switch (angle) {
-
-	case 180:
-		gpio_set_value(LBR, 0);
-		gpio_set_value(UD, 1);
-		break;
-	default:
-		gpio_set_value(LBR, 1);
-		gpio_set_value(UD, 0);
-		break;
-	}
-#endif
-}
-
 static int bfin_lq035_fb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 {
 	if (nocursor)
@@ -623,7 +601,6 @@ static struct fb_ops bfin_lq035_fb_ops = {
 	.fb_open		= bfin_lq035_fb_open,
 	.fb_release		= bfin_lq035_fb_release,
 	.fb_check_var		= bfin_lq035_fb_check_var,
-	.fb_rotate		= bfin_lq035_fb_rotate,
 	.fb_fillrect		= cfb_fillrect,
 	.fb_copyarea		= cfb_copyarea,
 	.fb_imageblit		= cfb_imageblit,
diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c
index 393ae1bc07e8..6429f33167f5 100644
--- a/drivers/video/fbdev/omap/omapfb_main.c
+++ b/drivers/video/fbdev/omap/omapfb_main.c
@@ -594,27 +594,6 @@ static int set_fb_var(struct fb_info *fbi,
 }
 
 
-/* Set rotation (0, 90, 180, 270 degree), and switch to the new mode. */
-static void omapfb_rotate(struct fb_info *fbi, int rotate)
-{
-	struct omapfb_plane_struct *plane = fbi->par;
-	struct omapfb_device *fbdev = plane->fbdev;
-
-	omapfb_rqueue_lock(fbdev);
-	if (rotate != fbi->var.rotate) {
-		struct fb_var_screeninfo *new_var = &fbdev->new_var;
-
-		memcpy(new_var, &fbi->var, sizeof(*new_var));
-		new_var->rotate = rotate;
-		if (set_fb_var(fbi, new_var) == 0 &&
-		    memcmp(new_var, &fbi->var, sizeof(*new_var))) {
-			memcpy(&fbi->var, new_var, sizeof(*new_var));
-			ctrl_change_mode(fbi);
-		}
-	}
-	omapfb_rqueue_unlock(fbdev);
-}
-
 /*
  * Set new x,y offsets in the virtual display for the visible area and switch
  * to the new mode.
@@ -1256,7 +1235,6 @@ static struct fb_ops omapfb_ops = {
 	.fb_ioctl	= omapfb_ioctl,
 	.fb_check_var	= omapfb_check_var,
 	.fb_set_par	= omapfb_set_par,
-	.fb_rotate	= omapfb_rotate,
 	.fb_pan_display = omapfb_pan_display,
 };
 
diff --git a/drivers/video/fbdev/skeletonfb.c b/drivers/video/fbdev/skeletonfb.c
index fefde7c6add7..f948baa16d82 100644
--- a/drivers/video/fbdev/skeletonfb.c
+++ b/drivers/video/fbdev/skeletonfb.c
@@ -613,22 +613,6 @@ int xxxfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
  */
 }
 
-/**
- *	xxxfb_rotate -  NOT a required function. If your hardware
- *			supports rotation the whole screen then 
- *			you would provide a hook for this. 
- *
- *      @info: frame buffer structure that represents a single frame buffer
- *	@angle: The angle we rotate the screen.   
- *
- *      This operation is used to set or alter the properities of the
- *	cursor.
- */
-void xxxfb_rotate(struct fb_info *info, int angle)
-{
-/* Will be deprecated */
-}
-
 /**
  *	xxxfb_sync - NOT a required function. Normally the accel engine 
  *		     for a graphics card take a specific amount of time.
@@ -665,7 +649,6 @@ static struct fb_ops xxxfb_ops = {
 	.fb_copyarea	= xxxfb_copyarea,	/* Needed !!! */
 	.fb_imageblit	= xxxfb_imageblit,	/* Needed !!! */
 	.fb_cursor	= xxxfb_cursor,		/* Optional !!! */
-	.fb_rotate	= xxxfb_rotate,
 	.fb_sync	= xxxfb_sync,
 	.fb_ioctl	= xxxfb_ioctl,
 	.fb_mmap	= xxxfb_mmap,
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 55433f86f0a3..dfe88351341f 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -296,9 +296,6 @@ struct fb_ops {
 	/* Draws cursor */
 	int (*fb_cursor) (struct fb_info *info, struct fb_cursor *cursor);
 
-	/* Rotates the display */
-	void (*fb_rotate)(struct fb_info *info, int angle);
-
 	/* wait for blit idle, optional */
 	int (*fb_sync)(struct fb_info *info);
 
-- 
cgit v1.2.3


From da4e4f18afe0f3729d68f3785c5802f786d36e34 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Tue, 23 Feb 2016 18:22:39 +0000
Subject: drivers/perf: arm_pmu: implement CPU_PM notifier

When a CPU is suspended (either through suspend-to-RAM or CPUidle),
its PMU registers content can be lost, which means that counters
registers values that were initialized on power down entry have to be
reprogrammed on power-up to make sure the counters set-up is preserved
(ie on power-up registers take the reset values on Cold or Warm reset,
which can be architecturally UNKNOWN).

To guarantee seamless profiling conditions across a core power down
this patch adds a CPU PM notifier to ARM pmus, that upon CPU PM
entry/exit from low-power states saves/restores the pmu registers
set-up (by using the ARM perf API), so that the power-down/up cycle does
not affect the perf behaviour (apart from a black-out period between
power-up/down CPU PM notifications that is unavoidable).

Cc: Will Deacon <will.deacon@arm.com>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Acked-by: Ashwin Chaugule <ashwin.chaugule@linaro.org>
Acked-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/perf/arm_pmu.c       | 95 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/perf/arm_pmu.h |  1 +
 2 files changed, 96 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index ca63a452393a..11bacc7220a1 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -13,6 +13,7 @@
 
 #include <linux/bitmap.h>
 #include <linux/cpumask.h>
+#include <linux/cpu_pm.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/of_device.h>
@@ -710,6 +711,93 @@ static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
 	return NOTIFY_OK;
 }
 
+#ifdef CONFIG_CPU_PM
+static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
+{
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	struct perf_event *event;
+	int idx;
+
+	for (idx = 0; idx < armpmu->num_events; idx++) {
+		/*
+		 * If the counter is not used skip it, there is no
+		 * need of stopping/restarting it.
+		 */
+		if (!test_bit(idx, hw_events->used_mask))
+			continue;
+
+		event = hw_events->events[idx];
+
+		switch (cmd) {
+		case CPU_PM_ENTER:
+			/*
+			 * Stop and update the counter
+			 */
+			armpmu_stop(event, PERF_EF_UPDATE);
+			break;
+		case CPU_PM_EXIT:
+		case CPU_PM_ENTER_FAILED:
+			 /* Restore and enable the counter */
+			armpmu_start(event, PERF_EF_RELOAD);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
+			     void *v)
+{
+	struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return NOTIFY_DONE;
+
+	/*
+	 * Always reset the PMU registers on power-up even if
+	 * there are no events running.
+	 */
+	if (cmd == CPU_PM_EXIT && armpmu->reset)
+		armpmu->reset(armpmu);
+
+	if (!enabled)
+		return NOTIFY_OK;
+
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		armpmu->stop(armpmu);
+		cpu_pm_pmu_setup(armpmu, cmd);
+		break;
+	case CPU_PM_EXIT:
+		cpu_pm_pmu_setup(armpmu, cmd);
+	case CPU_PM_ENTER_FAILED:
+		armpmu->start(armpmu);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->cpu_pm_nb.notifier_call = cpu_pm_pmu_notify;
+	return cpu_pm_register_notifier(&cpu_pmu->cpu_pm_nb);
+}
+
+static void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu)
+{
+	cpu_pm_unregister_notifier(&cpu_pmu->cpu_pm_nb);
+}
+#else
+static inline int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) { return 0; }
+static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { }
+#endif
+
 static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	int err;
@@ -725,6 +813,10 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 	if (err)
 		goto out_hw_events;
 
+	err = cpu_pm_pmu_register(cpu_pmu);
+	if (err)
+		goto out_unregister;
+
 	for_each_possible_cpu(cpu) {
 		struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
 		raw_spin_lock_init(&events->pmu_lock);
@@ -746,6 +838,8 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 
 	return 0;
 
+out_unregister:
+	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
 out_hw_events:
 	free_percpu(cpu_hw_events);
 	return err;
@@ -753,6 +847,7 @@ out_hw_events:
 
 static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
 {
+	cpu_pm_pmu_unregister(cpu_pmu);
 	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
 	free_percpu(cpu_pmu->hw_events);
 }
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 2d5eaaa90078..4196c90a3c88 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -108,6 +108,7 @@ struct arm_pmu {
 	struct platform_device	*plat_device;
 	struct pmu_hw_events	__percpu *hw_events;
 	struct notifier_block	hotplug_nb;
+	struct notifier_block	cpu_pm_nb;
 };
 
 #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
-- 
cgit v1.2.3


From 34b0870515aaac6b7ea1ffdc370516b0a8024c82 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 26 Feb 2016 00:22:57 +0100
Subject: cpufreq: Simplify the cpufreq_for_each_valid_entry()

That macro uses an internal static inline function that is first
totally unnecessary and second hard to read, so simplify it and
get rid of that monster.

No functional changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/cpufreq.h | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index d0bf555b6bbf..4064cfcfbffd 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -504,16 +504,6 @@ static inline void dev_pm_opp_free_cpufreq_table(struct device *dev,
 }
 #endif
 
-static inline bool cpufreq_next_valid(struct cpufreq_frequency_table **pos)
-{
-	while ((*pos)->frequency != CPUFREQ_TABLE_END)
-		if ((*pos)->frequency != CPUFREQ_ENTRY_INVALID)
-			return true;
-		else
-			(*pos)++;
-	return false;
-}
-
 /*
  * cpufreq_for_each_entry -	iterate over a cpufreq_frequency_table
  * @pos:	the cpufreq_frequency_table * to use as a loop cursor.
@@ -530,8 +520,11 @@ static inline bool cpufreq_next_valid(struct cpufreq_frequency_table **pos)
  * @table:      the cpufreq_frequency_table * to iterate over.
  */
 
-#define cpufreq_for_each_valid_entry(pos, table)	\
-	for (pos = table; cpufreq_next_valid(&pos); pos++)
+#define cpufreq_for_each_valid_entry(pos, table)			\
+	for (pos = table; pos->frequency != CPUFREQ_TABLE_END; pos++)	\
+		if (pos->frequency == CPUFREQ_ENTRY_INVALID)		\
+			continue;					\
+		else
 
 int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 				    struct cpufreq_frequency_table *table);
-- 
cgit v1.2.3


From c283e41ef32442f41e7180f9bb1c5aedf9255bfe Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Mon, 15 Feb 2016 21:57:46 +0100
Subject: dmaengine: mmp-pdma: add number of requestors

The DMA chip has a fixed number of requestor lines used for flow
control. This number is platform dependent. The pxa_dma dma driver will
use this value to activate or not the flow control.

There won't be any impact on mmp_pdma driver.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
---
 include/linux/platform_data/mmp_dma.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/mmp_dma.h b/include/linux/platform_data/mmp_dma.h
index 2a330ec9e2af..d1397c8ed94e 100644
--- a/include/linux/platform_data/mmp_dma.h
+++ b/include/linux/platform_data/mmp_dma.h
@@ -14,6 +14,7 @@
 
 struct mmp_dma_platdata {
 	int dma_channels;
+	int nb_requestors;
 };
 
 #endif /* MMP_DMA_H */
-- 
cgit v1.2.3


From 929e7f3bc7b82fb3e72392dec6a1df334cff6313 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Fri, 19 Feb 2016 15:52:32 -0800
Subject: clk: Make of_clk_get_parent_count() return unsigned ints

Russell King recently pointed out a bug in the clk-gpio code
where it fails to register the clk if of_clk_get_parent_count()
returns an error because the "clocks" property isn't present in
the DT node. If we're trying to count parents from DT we'd like
to know the count, not if there is a "clocks" property or not.
Furthermore, some drivers are assigning the return value to their
clk_init_data::num_parents member which is unsigned, leading to
potentially large numbers of parents when the property isn't
present.

Let's change the API to return an unsigned int instead of an int.
All the callers just want to know the count anyway, and this
avoids the bug that was in the clk-gpio driver.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk.c            | 16 ++++++++++++++--
 include/linux/clk-provider.h |  2 +-
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index b775c88ac36d..fb74dc1f7520 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2986,9 +2986,21 @@ struct clk *of_clk_get_from_provider(struct of_phandle_args *clkspec)
 }
 EXPORT_SYMBOL_GPL(of_clk_get_from_provider);
 
-int of_clk_get_parent_count(struct device_node *np)
+/**
+ * of_clk_get_parent_count() - Count the number of clocks a device node has
+ * @np: device node to count
+ *
+ * Returns: The number of clocks that are possible parents of this node
+ */
+unsigned int of_clk_get_parent_count(struct device_node *np)
 {
-	return of_count_phandle_with_args(np, "clocks", "#clock-cells");
+	int count;
+
+	count = of_count_phandle_with_args(np, "clocks", "#clock-cells");
+	if (count < 0)
+		return 0;
+
+	return count;
 }
 EXPORT_SYMBOL_GPL(of_clk_get_parent_count);
 
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index fce7f027f8a7..da95258127aa 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -716,7 +716,7 @@ void of_clk_del_provider(struct device_node *np);
 struct clk *of_clk_src_simple_get(struct of_phandle_args *clkspec,
 				  void *data);
 struct clk *of_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data);
-int of_clk_get_parent_count(struct device_node *np);
+unsigned int of_clk_get_parent_count(struct device_node *np);
 int of_clk_parent_fill(struct device_node *np, const char **parents,
 		       unsigned int size);
 const char *of_clk_get_parent_name(struct device_node *np, int index);
-- 
cgit v1.2.3


From 9a99417acbad99ba6c6a9389b45a53a4d002bb7e Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Sun, 28 Feb 2016 22:22:35 -0600
Subject: objtool: Add STACK_FRAME_NON_STANDARD() macro

Add a new macro, STACK_FRAME_NON_STANDARD(), which is used to denote a
function which does something unusual related to its stack frame.  Use
of the macro prevents objtool from emitting a false positive warning.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bernd Petrovitsch <bernd@petrovitsch.priv.at>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris J Arges <chris.j.arges@canonical.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Pedro Alves <palves@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: live-patching@vger.kernel.org
Link: http://lkml.kernel.org/r/34487a17b23dba43c50941599d47054a9584b219.1456719558.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/vmlinux.lds.S |  5 ++++-
 include/linux/frame.h         | 23 +++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/frame.h

(limited to 'include/linux')

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 92dc211c11db..13fa0ad62df8 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -343,7 +343,10 @@ SECTIONS
 
 	/* Sections to be discarded */
 	DISCARDS
-	/DISCARD/ : { *(.eh_frame) }
+	/DISCARD/ : {
+		*(.eh_frame)
+		*(__func_stack_frame_non_standard)
+	}
 }
 
 
diff --git a/include/linux/frame.h b/include/linux/frame.h
new file mode 100644
index 000000000000..e6baaba3f1ae
--- /dev/null
+++ b/include/linux/frame.h
@@ -0,0 +1,23 @@
+#ifndef _LINUX_FRAME_H
+#define _LINUX_FRAME_H
+
+#ifdef CONFIG_STACK_VALIDATION
+/*
+ * This macro marks the given function's stack frame as "non-standard", which
+ * tells objtool to ignore the function when doing stack metadata validation.
+ * It should only be used in special cases where you're 100% sure it won't
+ * affect the reliability of frame pointers and kernel stack traces.
+ *
+ * For more information, see tools/objtool/Documentation/stack-validation.txt.
+ */
+#define STACK_FRAME_NON_STANDARD(func) \
+	static void __used __section(__func_stack_frame_non_standard) \
+		*__func_stack_frame_non_standard_##func = func
+
+#else /* !CONFIG_STACK_VALIDATION */
+
+#define STACK_FRAME_NON_STANDARD(func)
+
+#endif /* CONFIG_STACK_VALIDATION */
+
+#endif /* _LINUX_FRAME_H */
-- 
cgit v1.2.3


From d3204ab9951b755d35c540836402ae2668d3158b Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Mon, 1 Feb 2016 21:07:35 +0900
Subject: mmc: core: remove the MMC_DATA_STREAM flag

It's not set to MMC_DATA_STREAM anywhere.
It seems that it had been used with CMD11/CMD20.
But according to Spec, CMD11/CMD20 are obsolete command.

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/core.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 37967b6da03c..b01e77de1a74 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -113,7 +113,6 @@ struct mmc_data {
 
 #define MMC_DATA_WRITE	(1 << 8)
 #define MMC_DATA_READ	(1 << 9)
-#define MMC_DATA_STREAM	(1 << 10)
 
 	unsigned int		bytes_xfered;
 
-- 
cgit v1.2.3


From 04e24b80a3ddf4cdf85e49a99d33aec27c9432ad Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 19 Jan 2016 12:28:31 +0100
Subject: mmc: tmio: add flag to reduce delay after changing clock status

The docs for RCar Gen2 & 3 I have access to, mention delays of 5ms after
stop and 1ms after start. Make it possible to apply these values.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/tmio_mmc_pio.c | 7 ++++---
 include/linux/mfd/tmio.h        | 4 ++++
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index a10fde40b6c3..ffff687e98b1 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -172,7 +172,8 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
 		host->set_clk_div(host->pdev, (clk>>22) & 1);
 
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & 0x1ff);
-	msleep(10);
+	if (!(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG))
+		msleep(10);
 }
 
 static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
@@ -185,14 +186,14 @@ static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
 
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~0x0100 &
 		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
-	msleep(10);
+	msleep(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG ? 5 : 10);
 }
 
 static void tmio_mmc_clk_start(struct tmio_mmc_host *host)
 {
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, 0x0100 |
 		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
-	msleep(10);
+	msleep(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG ? 1 : 10);
 
 	/* implicit BUG_ON(!res) */
 	if (host->pdata->flags & TMIO_MMC_HAVE_HIGH_REG) {
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 24b86d538e88..05d58ee5e6a7 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -65,6 +65,10 @@
  * Some controllers can support SDIO IRQ signalling.
  */
 #define TMIO_MMC_SDIO_IRQ		(1 << 2)
+
+/* Some controllers don't need to wait 10ms for clock changes */
+#define TMIO_MMC_FAST_CLK_CHG		(1 << 3)
+
 /*
  * Some controllers require waiting for the SD bus to become
  * idle before writing to some registers.
-- 
cgit v1.2.3


From bf96208f0539d2009ca0b031da2ed7c430edc3a2 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 19 Jan 2016 12:32:58 +0100
Subject: mmc: tmio: refactor set_clock a little

Some of the indentation made the code awful to read. Fix that. Also,
introduce defines instead of magic hex values. Note that this includes
one change: We mask out know 0xff instead of 0x1ff. But 0x100 has always
been the clock enable bit. It doesn't make any sense to set it depending
on the clock calculation. Update copyright notices, too. I'll be working
on those files some more in the future.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/tmio_mmc_pio.c | 15 +++++++++------
 include/linux/mmc/tmio.h        |  5 +++++
 2 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 207e77a09cda..7f6b5adf1209 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -1,6 +1,8 @@
 /*
  * linux/drivers/mmc/host/tmio_mmc_pio.c
  *
+ * Copyright (C) 2016 Sang Engineering, Wolfram Sang
+ * Copyright (C) 2015-16 Renesas Electronics Corporation
  * Copyright (C) 2011 Guennadi Liakhovetski
  * Copyright (C) 2007 Ian Molton
  * Copyright (C) 2004 Ian Molton
@@ -159,19 +161,20 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
 
 	if (new_clock) {
 		for (clock = host->mmc->f_min, clk = 0x80000080;
-			new_clock >= (clock<<1); clk >>= 1)
+		     new_clock >= (clock << 1);
+		     clk >>= 1)
 			clock <<= 1;
 
 		/* 1/1 clock is option */
 		if ((host->pdata->flags & TMIO_MMC_CLK_ACTUAL) &&
-		    ((clk >> 22) & 0x1))
+		   ((clk >> 22) & 0x1))
 			clk |= 0xff;
 	}
 
 	if (host->set_clk_div)
-		host->set_clk_div(host->pdev, (clk>>22) & 1);
+		host->set_clk_div(host->pdev, (clk >> 22) & 1);
 
-	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & 0x1ff);
+	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & CLK_CTL_DIV_MASK);
 	if (!(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG))
 		msleep(10);
 }
@@ -183,14 +186,14 @@ static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
 		msleep(10);
 	}
 
-	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~0x0100 &
+	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~CLK_CTL_SCLKEN &
 		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
 	msleep(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG ? 5 : 10);
 }
 
 static void tmio_mmc_clk_start(struct tmio_mmc_host *host)
 {
-	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, 0x0100 |
+	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, CLK_CTL_SCLKEN |
 		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
 	msleep(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG ? 1 : 10);
 
diff --git a/include/linux/mmc/tmio.h b/include/linux/mmc/tmio.h
index 84d9053b5dca..5f5cd80e9765 100644
--- a/include/linux/mmc/tmio.h
+++ b/include/linux/mmc/tmio.h
@@ -1,6 +1,8 @@
 /*
  * include/linux/mmc/tmio.h
  *
+ * Copyright (C) 2016 Sang Engineering, Wolfram Sang
+ * Copyright (C) 2015-16 Renesas Electronics Corporation
  * Copyright (C) 2007 Ian Molton
  * Copyright (C) 2004 Ian Molton
  *
@@ -61,6 +63,9 @@
 #define TMIO_STAT_CMD_BUSY      0x40000000
 #define TMIO_STAT_ILL_ACCESS    0x80000000
 
+#define	CLK_CTL_DIV_MASK	0xff
+#define	CLK_CTL_SCLKEN		BIT(8)
+
 #define TMIO_BBS		512		/* Boot block size */
 
 #endif /* LINUX_MMC_TMIO_H */
-- 
cgit v1.2.3


From 13c3d4740216c4931fce1e764b9a76dd55c2da23 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Thu, 21 Jan 2016 16:06:14 +0800
Subject: mmc: dw_mmc: remove struct block_settings

This patch removes struct block_settings since
it's never used anywhere.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/dw_mmc.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 89df7abedd67..e1f90b80b77a 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -242,14 +242,6 @@ struct dw_mci_dma_ops {
 
 struct dma_pdata;
 
-struct block_settings {
-	unsigned short	max_segs;	/* see blk_queue_max_segments */
-	unsigned int	max_blk_size;	/* maximum size of one mmc block */
-	unsigned int	max_blk_count;	/* maximum number of blocks in one req*/
-	unsigned int	max_req_size;	/* maximum number of bytes in one req*/
-	unsigned int	max_seg_size;	/* see blk_queue_max_segment_size */
-};
-
 /* Board platform data */
 struct dw_mci_board {
 	u32 num_slots;
-- 
cgit v1.2.3


From e8cc37b8fc3a94d17a2689cd77a7744d70477c14 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Thu, 21 Jan 2016 14:52:52 +0800
Subject: mmc: dw_mmc: remove DW_MCI_QUIRK_BROKEN_CARD_DETECTION quirk

dw_mmc already use mmc_of_parse to get "broken-cd" property,
but it considered "broken-cd" to be a quirk in its driver. We
don't need this quirk here, and just take what we need from
mmc->caps.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Tested-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc.c  | 35 ++++++++++-------------------------
 include/linux/mmc/dw_mmc.h |  4 +---
 2 files changed, 11 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 6ad9ebf1faef..8cb8c1c8b3dd 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1447,12 +1447,11 @@ static int dw_mci_get_cd(struct mmc_host *mmc)
 {
 	int present;
 	struct dw_mci_slot *slot = mmc_priv(mmc);
-	struct dw_mci_board *brd = slot->host->pdata;
 	struct dw_mci *host = slot->host;
 	int gpio_cd = mmc_gpio_get_cd(mmc);
 
 	/* Use platform get_cd function, else try onboard card detect */
-	if ((brd->quirks & DW_MCI_QUIRK_BROKEN_CARD_DETECTION) ||
+	if ((mmc->caps & MMC_CAP_NEEDS_POLL) ||
 	    (mmc->caps & MMC_CAP_NONREMOVABLE))
 		present = 1;
 	else if (!IS_ERR_VALUE(gpio_cd))
@@ -2866,23 +2865,13 @@ static void dw_mci_dto_timer(unsigned long arg)
 }
 
 #ifdef CONFIG_OF
-static struct dw_mci_of_quirks {
-	char *quirk;
-	int id;
-} of_quirks[] = {
-	{
-		.quirk	= "broken-cd",
-		.id	= DW_MCI_QUIRK_BROKEN_CARD_DETECTION,
-	},
-};
-
 static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host)
 {
 	struct dw_mci_board *pdata;
 	struct device *dev = host->dev;
 	struct device_node *np = dev->of_node;
 	const struct dw_mci_drv_data *drv_data = host->drv_data;
-	int idx, ret;
+	int ret;
 	u32 clock_frequency;
 
 	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
@@ -2897,11 +2886,6 @@ static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host)
 		pdata->num_slots = 1;
 	}
 
-	/* get quirks */
-	for (idx = 0; idx < ARRAY_SIZE(of_quirks); idx++)
-		if (of_get_property(np, of_quirks[idx].quirk, NULL))
-			pdata->quirks |= of_quirks[idx].id;
-
 	if (of_property_read_u32(np, "fifo-depth", &pdata->fifo_depth))
 		dev_info(dev,
 			 "fifo-depth property not found, using value of FIFOTH register as default\n");
@@ -2934,18 +2918,19 @@ static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host)
 
 static void dw_mci_enable_cd(struct dw_mci *host)
 {
-	struct dw_mci_board *brd = host->pdata;
 	unsigned long irqflags;
 	u32 temp;
 	int i;
+	struct dw_mci_slot *slot;
 
-	/* No need for CD if broken card detection */
-	if (brd->quirks & DW_MCI_QUIRK_BROKEN_CARD_DETECTION)
-		return;
-
-	/* No need for CD if all slots have a non-error GPIO */
+	/*
+	 * No need for CD if all slots have a non-error GPIO
+	 * as well as broken card detection is found.
+	 */
 	for (i = 0; i < host->num_slots; i++) {
-		struct dw_mci_slot *slot = host->slot[i];
+		slot = host->slot[i];
+		if (slot->mmc->caps & MMC_CAP_NEEDS_POLL)
+			return;
 
 		if (IS_ERR_VALUE(mmc_gpio_get_cd(slot->mmc)))
 			break;
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index e1f90b80b77a..7b41c6db1bb6 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -235,10 +235,8 @@ struct dw_mci_dma_ops {
 };
 
 /* IP Quirks/flags. */
-/* Unreliable card detection */
-#define DW_MCI_QUIRK_BROKEN_CARD_DETECTION	BIT(0)
 /* Timer for broken data transfer over scheme */
-#define DW_MCI_QUIRK_BROKEN_DTO			BIT(1)
+#define DW_MCI_QUIRK_BROKEN_DTO			BIT(0)
 
 struct dma_pdata;
 
-- 
cgit v1.2.3


From 30f065bf1f05b98ac6b5620c4ce6d0e865f9dc95 Mon Sep 17 00:00:00 2001
From: Maarten ter Huurne <maarten@treewalker.org>
Date: Sun, 28 Feb 2016 16:53:26 +0100
Subject: regulator: act8865: Rename platform_data field to init_data

Make the field name match its type.

Signed-off-by: Maarten ter Huurne <maarten@treewalker.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/act8865-regulator.c | 4 ++--
 include/linux/regulator/act8865.h     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/act8865-regulator.c b/drivers/regulator/act8865-regulator.c
index 984c30fa9516..1994795407c5 100644
--- a/drivers/regulator/act8865-regulator.c
+++ b/drivers/regulator/act8865-regulator.c
@@ -369,7 +369,7 @@ static int act8865_pdata_from_dt(struct device *dev,
 	for (i = 0; i < num_matches; i++) {
 		regulator->id = i;
 		regulator->name = matches[i].name;
-		regulator->platform_data = matches[i].init_data;
+		regulator->init_data = matches[i].init_data;
 		of_node[i] = matches[i].of_node;
 		regulator++;
 	}
@@ -396,7 +396,7 @@ static struct regulator_init_data
 
 	for (i = 0; i < pdata->num_regulators; i++) {
 		if (pdata->regulators[i].id == id)
-			return pdata->regulators[i].platform_data;
+			return pdata->regulators[i].init_data;
 	}
 
 	return NULL;
diff --git a/include/linux/regulator/act8865.h b/include/linux/regulator/act8865.h
index 15fa8f2d35c9..2eb386017fa5 100644
--- a/include/linux/regulator/act8865.h
+++ b/include/linux/regulator/act8865.h
@@ -68,12 +68,12 @@ enum {
  * act8865_regulator_data - regulator data
  * @id: regulator id
  * @name: regulator name
- * @platform_data: regulator init data
+ * @init_data: regulator init data
  */
 struct act8865_regulator_data {
 	int id;
 	const char *name;
-	struct regulator_init_data *platform_data;
+	struct regulator_init_data *init_data;
 };
 
 /**
-- 
cgit v1.2.3


From 0e451e883bd13ce616f439e2414b8c17fa28318a Mon Sep 17 00:00:00 2001
From: Marina Varshaver <marinav@mellanox.com>
Date: Thu, 18 Feb 2016 18:31:06 +0200
Subject: IB/mlx4: Add support for the don't trap rule

Add support for receiving multicast/unicast traffic with
the don't trap rule.

Sniffing these packets requires a flow steering rule of type NORMAL
at priority 0 with flag IB_FLOW_ATTR_FLAGS_DONT_TRAP set.
Choosing between multicast or unicast is done via ethernet L2 dest_mac
mask and value:
- If mask is all zeros - unicast and multicast are set.
- If mask non zero - only mask with multicast bit 1 and rest 0 is
                     supported, the mac value will choose if it is
                     multicast or unicast rule.

If the mask multicast bit is on and some other bits are on too, it means
a request for specific multicast or unicast, this is not supported,
either receive all multicast or all unicast.

Only when limitations are met registered QP will receive requested type
but other QPs can receive same traffic if registered for it.
Otherwise, if limitations are not met, an error will be returned.

Limitations:
- Rule must be with priority 0.
- A0 mode is not supported.
- Sniffer QP cannot appear in any other flow steering rule.

Signed-off-by: Marina Varshaver <marinav@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx4/main.c        | 71 ++++++++++++++++++++++++++++++--
 drivers/net/ethernet/mellanox/mlx4/fw.c  |  5 ++-
 drivers/net/ethernet/mellanox/mlx4/mcg.c |  6 ++-
 include/linux/mlx4/device.h              |  3 ++
 4 files changed, 78 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 41f2c25df0d6..914bc98e753f 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1643,6 +1643,56 @@ static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_
 	return err;
 }
 
+static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
+				      struct ib_flow_attr *flow_attr,
+				      enum mlx4_net_trans_promisc_mode *type)
+{
+	int err = 0;
+
+	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
+	    (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
+	    (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_attr->num_of_specs == 0) {
+		type[0] = MLX4_FS_MC_SNIFFER;
+		type[1] = MLX4_FS_UC_SNIFFER;
+	} else {
+		union ib_flow_spec *ib_spec;
+
+		ib_spec = (union ib_flow_spec *)(flow_attr + 1);
+		if (ib_spec->type !=  IB_FLOW_SPEC_ETH)
+			return -EINVAL;
+
+		/* if all is zero than MC and UC */
+		if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
+			type[0] = MLX4_FS_MC_SNIFFER;
+			type[1] = MLX4_FS_UC_SNIFFER;
+		} else {
+			u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
+					    ib_spec->eth.mask.dst_mac[1],
+					    ib_spec->eth.mask.dst_mac[2],
+					    ib_spec->eth.mask.dst_mac[3],
+					    ib_spec->eth.mask.dst_mac[4],
+					    ib_spec->eth.mask.dst_mac[5]};
+
+			/* Above xor was only on MC bit, non empty mask is valid
+			 * only if this bit is set and rest are zero.
+			 */
+			if (!is_zero_ether_addr(&mac[0]))
+				return -EINVAL;
+
+			if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
+				type[0] = MLX4_FS_MC_SNIFFER;
+			else
+				type[0] = MLX4_FS_UC_SNIFFER;
+		}
+	}
+
+	return err;
+}
+
 static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 				    struct ib_flow_attr *flow_attr,
 				    int domain)
@@ -1653,7 +1703,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 	struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
 	int is_bonded = mlx4_is_bonded(dev);
 
-	if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
+	if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
+	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
 		return ERR_PTR(-EOPNOTSUPP);
 
 	memset(type, 0, sizeof(type));
@@ -1666,7 +1717,19 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 
 	switch (flow_attr->type) {
 	case IB_FLOW_ATTR_NORMAL:
-		type[0] = MLX4_FS_REGULAR;
+		/* If dont trap flag (continue match) is set, under specific
+		 * condition traffic be replicated to given qp,
+		 * without stealing it
+		 */
+		if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
+			err = mlx4_ib_add_dont_trap_rule(dev,
+							 flow_attr,
+							 type);
+			if (err)
+				goto err_free;
+		} else {
+			type[0] = MLX4_FS_REGULAR;
+		}
 		break;
 
 	case IB_FLOW_ATTR_ALL_DEFAULT:
@@ -1678,8 +1741,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 		break;
 
 	case IB_FLOW_ATTR_SNIFFER:
-		type[0] = MLX4_FS_UC_SNIFFER;
-		type[1] = MLX4_FS_MC_SNIFFER;
+		type[0] = MLX4_FS_MIRROR_RX_PORT;
+		type[1] = MLX4_FS_MIRROR_SX_PORT;
 		break;
 
 	default:
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index d66c690a8597..e97094598b2d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -157,7 +157,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[29] = "802.1ad offload support",
 		[31] = "Modifying loopback source checks using UPDATE_QP support",
 		[32] = "Loopback source checks support",
-		[33] = "RoCEv2 support"
+		[33] = "RoCEv2 support",
+		[34] = "DMFS Sniffer support (UC & MC)"
 	};
 	int i;
 
@@ -810,6 +811,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	if (field & 0x80)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN;
 	dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f;
+	if (field & 0x20)
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_BEACON_OFFSET);
 	if (field & 0x80)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_BEACON;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 1d4e2e054647..42d8de892bfe 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -752,8 +752,10 @@ static const u8 __promisc_mode[] = {
 	[MLX4_FS_REGULAR]   = 0x0,
 	[MLX4_FS_ALL_DEFAULT] = 0x1,
 	[MLX4_FS_MC_DEFAULT] = 0x3,
-	[MLX4_FS_UC_SNIFFER] = 0x4,
-	[MLX4_FS_MC_SNIFFER] = 0x5,
+	[MLX4_FS_MIRROR_RX_PORT] = 0x4,
+	[MLX4_FS_MIRROR_SX_PORT] = 0x5,
+	[MLX4_FS_UC_SNIFFER] = 0x6,
+	[MLX4_FS_MC_SNIFFER] = 0x7,
 };
 
 int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index a0e8cc8dcc67..8541a913f6a3 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -219,6 +219,7 @@ enum {
 	MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
 	MLX4_DEV_CAP_FLAG2_LB_SRC_CHK           = 1ULL << 32,
 	MLX4_DEV_CAP_FLAG2_ROCE_V1_V2		= 1ULL <<  33,
+	MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER   = 1ULL <<  34,
 };
 
 enum {
@@ -1160,6 +1161,8 @@ enum mlx4_net_trans_promisc_mode {
 	MLX4_FS_REGULAR = 1,
 	MLX4_FS_ALL_DEFAULT,
 	MLX4_FS_MC_DEFAULT,
+	MLX4_FS_MIRROR_RX_PORT,
+	MLX4_FS_MIRROR_SX_PORT,
 	MLX4_FS_UC_SNIFFER,
 	MLX4_FS_MC_SNIFFER,
 	MLX4_FS_MODE_NUM, /* should be last */
-- 
cgit v1.2.3


From 4d8b229d5ea610affe672e919021e9d02cd877da Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 26 Feb 2016 17:32:49 -0800
Subject: watchdog: Add 'action' and 'data' parameters to restart handler
 callback

The 'action' (or restart mode) and data parameters may be used by restart
handlers, so they should be passed to the restart callback functions.

Cc: Sylvain Lemieux <slemieux@tycoint.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/bcm47xx_wdt.c   | 3 ++-
 drivers/watchdog/da9063_wdt.c    | 3 ++-
 drivers/watchdog/digicolor_wdt.c | 3 ++-
 drivers/watchdog/imgpdc_wdt.c    | 3 ++-
 drivers/watchdog/imx2_wdt.c      | 3 ++-
 drivers/watchdog/lpc18xx_wdt.c   | 3 ++-
 drivers/watchdog/meson_wdt.c     | 3 ++-
 drivers/watchdog/moxart_wdt.c    | 3 ++-
 drivers/watchdog/mtk_wdt.c       | 3 ++-
 drivers/watchdog/qcom-wdt.c      | 3 ++-
 drivers/watchdog/s3c2410_wdt.c   | 3 ++-
 drivers/watchdog/sunxi_wdt.c     | 3 ++-
 drivers/watchdog/watchdog_core.c | 2 +-
 include/linux/watchdog.h         | 2 +-
 14 files changed, 26 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/watchdog/bcm47xx_wdt.c b/drivers/watchdog/bcm47xx_wdt.c
index df1c2a4b0165..a1900b9ab6c4 100644
--- a/drivers/watchdog/bcm47xx_wdt.c
+++ b/drivers/watchdog/bcm47xx_wdt.c
@@ -87,7 +87,8 @@ static int bcm47xx_wdt_hard_set_timeout(struct watchdog_device *wdd,
 	return 0;
 }
 
-static int bcm47xx_wdt_restart(struct watchdog_device *wdd)
+static int bcm47xx_wdt_restart(struct watchdog_device *wdd,
+			       unsigned long action, void *data)
 {
 	struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd);
 
diff --git a/drivers/watchdog/da9063_wdt.c b/drivers/watchdog/da9063_wdt.c
index 11e887572649..a100f648880d 100644
--- a/drivers/watchdog/da9063_wdt.c
+++ b/drivers/watchdog/da9063_wdt.c
@@ -119,7 +119,8 @@ static int da9063_wdt_set_timeout(struct watchdog_device *wdd,
 	return ret;
 }
 
-static int da9063_wdt_restart(struct watchdog_device *wdd)
+static int da9063_wdt_restart(struct watchdog_device *wdd, unsigned long action,
+			      void *data)
 {
 	struct da9063_watchdog *wdt = watchdog_get_drvdata(wdd);
 	int ret;
diff --git a/drivers/watchdog/digicolor_wdt.c b/drivers/watchdog/digicolor_wdt.c
index 1ccb0b239348..77df772406b0 100644
--- a/drivers/watchdog/digicolor_wdt.c
+++ b/drivers/watchdog/digicolor_wdt.c
@@ -48,7 +48,8 @@ static void dc_wdt_set(struct dc_wdt *wdt, u32 ticks)
 	spin_unlock_irqrestore(&wdt->lock, flags);
 }
 
-static int dc_wdt_restart(struct watchdog_device *wdog)
+static int dc_wdt_restart(struct watchdog_device *wdog, unsigned long action,
+			  void *data)
 {
 	struct dc_wdt *wdt = watchdog_get_drvdata(wdog);
 
diff --git a/drivers/watchdog/imgpdc_wdt.c b/drivers/watchdog/imgpdc_wdt.c
index 3679f2e1922f..516fbef00856 100644
--- a/drivers/watchdog/imgpdc_wdt.c
+++ b/drivers/watchdog/imgpdc_wdt.c
@@ -150,7 +150,8 @@ static int pdc_wdt_start(struct watchdog_device *wdt_dev)
 	return 0;
 }
 
-static int pdc_wdt_restart(struct watchdog_device *wdt_dev)
+static int pdc_wdt_restart(struct watchdog_device *wdt_dev,
+			   unsigned long action, void *data)
 {
 	struct pdc_wdt_dev *wdt = watchdog_get_drvdata(wdt_dev);
 
diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c
index e47966aa2db0..4cb59a23aab0 100644
--- a/drivers/watchdog/imx2_wdt.c
+++ b/drivers/watchdog/imx2_wdt.c
@@ -80,7 +80,8 @@ static const struct watchdog_info imx2_wdt_info = {
 	.options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE,
 };
 
-static int imx2_wdt_restart(struct watchdog_device *wdog)
+static int imx2_wdt_restart(struct watchdog_device *wdog, unsigned long action,
+			    void *data)
 {
 	struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
 	unsigned int wcr_enable = IMX2_WDT_WCR_WDE;
diff --git a/drivers/watchdog/lpc18xx_wdt.c b/drivers/watchdog/lpc18xx_wdt.c
index 6914c83aa6d9..fd171e6caa16 100644
--- a/drivers/watchdog/lpc18xx_wdt.c
+++ b/drivers/watchdog/lpc18xx_wdt.c
@@ -153,7 +153,8 @@ static int lpc18xx_wdt_start(struct watchdog_device *wdt_dev)
 	return 0;
 }
 
-static int lpc18xx_wdt_restart(struct watchdog_device *wdt_dev)
+static int lpc18xx_wdt_restart(struct watchdog_device *wdt_dev,
+			       unsigned long action, void *data)
 {
 	struct lpc18xx_wdt_dev *lpc18xx_wdt = watchdog_get_drvdata(wdt_dev);
 	unsigned long flags;
diff --git a/drivers/watchdog/meson_wdt.c b/drivers/watchdog/meson_wdt.c
index aea5d2f44ad7..56ea1caf71c3 100644
--- a/drivers/watchdog/meson_wdt.c
+++ b/drivers/watchdog/meson_wdt.c
@@ -62,7 +62,8 @@ struct meson_wdt_dev {
 	const struct meson_wdt_data *data;
 };
 
-static int meson_wdt_restart(struct watchdog_device *wdt_dev)
+static int meson_wdt_restart(struct watchdog_device *wdt_dev,
+			     unsigned long action, void *data)
 {
 	struct meson_wdt_dev *meson_wdt = watchdog_get_drvdata(wdt_dev);
 	u32 tc_reboot = MESON_WDT_DC_RESET;
diff --git a/drivers/watchdog/moxart_wdt.c b/drivers/watchdog/moxart_wdt.c
index 885c81bc4210..2c4a73d1e214 100644
--- a/drivers/watchdog/moxart_wdt.c
+++ b/drivers/watchdog/moxart_wdt.c
@@ -31,7 +31,8 @@ struct moxart_wdt_dev {
 
 static int heartbeat;
 
-static int moxart_wdt_restart(struct watchdog_device *wdt_dev)
+static int moxart_wdt_restart(struct watchdog_device *wdt_dev,
+			      unsigned long action, void *data)
 {
 	struct moxart_wdt_dev *moxart_wdt = watchdog_get_drvdata(wdt_dev);
 
diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c
index b78776c05554..7ed417a765c7 100644
--- a/drivers/watchdog/mtk_wdt.c
+++ b/drivers/watchdog/mtk_wdt.c
@@ -64,7 +64,8 @@ struct mtk_wdt_dev {
 	void __iomem *wdt_base;
 };
 
-static int mtk_wdt_restart(struct watchdog_device *wdt_dev)
+static int mtk_wdt_restart(struct watchdog_device *wdt_dev,
+			   unsigned long action, void *data)
 {
 	struct mtk_wdt_dev *mtk_wdt = watchdog_get_drvdata(wdt_dev);
 	void __iomem *wdt_base;
diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c
index 424f9a952fee..20563ccb7be0 100644
--- a/drivers/watchdog/qcom-wdt.c
+++ b/drivers/watchdog/qcom-wdt.c
@@ -70,7 +70,8 @@ static int qcom_wdt_set_timeout(struct watchdog_device *wdd,
 	return qcom_wdt_start(wdd);
 }
 
-static int qcom_wdt_restart(struct watchdog_device *wdd)
+static int qcom_wdt_restart(struct watchdog_device *wdd, unsigned long action,
+			    void *data)
 {
 	struct qcom_wdt *wdt = to_qcom_wdt(wdd);
 	u32 timeout;
diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index 0093450441fe..d57f19b5077f 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -349,7 +349,8 @@ static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned timeou
 	return 0;
 }
 
-static int s3c2410wdt_restart(struct watchdog_device *wdd)
+static int s3c2410wdt_restart(struct watchdog_device *wdd, unsigned long action,
+			      void *data)
 {
 	struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
 	void __iomem *wdt_base = wdt->reg_base;
diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c
index e027deb54740..953bb7b7446f 100644
--- a/drivers/watchdog/sunxi_wdt.c
+++ b/drivers/watchdog/sunxi_wdt.c
@@ -83,7 +83,8 @@ static const int wdt_timeout_map[] = {
 };
 
 
-static int sunxi_wdt_restart(struct watchdog_device *wdt_dev)
+static int sunxi_wdt_restart(struct watchdog_device *wdt_dev,
+			     unsigned long action, void *data)
 {
 	struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev);
 	void __iomem *wdt_base = sunxi_wdt->wdt_base;
diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c
index e600fd93b7de..d9b3c9c023c4 100644
--- a/drivers/watchdog/watchdog_core.c
+++ b/drivers/watchdog/watchdog_core.c
@@ -164,7 +164,7 @@ static int watchdog_restart_notifier(struct notifier_block *nb,
 
 	int ret;
 
-	ret = wdd->ops->restart(wdd);
+	ret = wdd->ops->restart(wdd, action, data);
 	if (ret)
 		return NOTIFY_BAD;
 
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index b585fa2507ee..0b565f2ad242 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -46,7 +46,7 @@ struct watchdog_ops {
 	unsigned int (*status)(struct watchdog_device *);
 	int (*set_timeout)(struct watchdog_device *, unsigned int);
 	unsigned int (*get_timeleft)(struct watchdog_device *);
-	int (*restart)(struct watchdog_device *);
+	int (*restart)(struct watchdog_device *, unsigned long, void *);
 	long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
 };
 
-- 
cgit v1.2.3


From b54ba2772b7af82a07eb48f88c88f7cadfb33401 Mon Sep 17 00:00:00 2001
From: Meny Yossefi <menyy@mellanox.com>
Date: Thu, 18 Feb 2016 18:14:59 +0200
Subject: net/mlx5_core: Add helper function to read virtual port counters

Added helper function to read 64bit virtual port Infiniband traffic
counters.

Signed-off-by: Meny Yossefi <menyy@mellanox.com>
Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/vport.c | 40 +++++++++++++++++++++++++
 include/linux/mlx5/mlx5_ifc.h                   |  3 +-
 include/linux/mlx5/vport.h                      |  2 ++
 3 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index c7398b95aecd..90ab09e375b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -850,3 +850,43 @@ int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
 	return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
 }
 EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
+
+int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
+				  u8 port_num, void *out, size_t out_sz)
+{
+	int	in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in);
+	int	is_group_manager;
+	void   *in;
+	int	err;
+
+	is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+	in = mlx5_vzalloc(in_sz);
+	if (!in) {
+		err = -ENOMEM;
+		return err;
+	}
+
+	MLX5_SET(query_vport_counter_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_VPORT_COUNTER);
+	if (other_vport) {
+		if (is_group_manager) {
+			MLX5_SET(query_vport_counter_in, in, other_vport, 1);
+			MLX5_SET(query_vport_counter_in, in, vport_number, 0);
+		} else {
+			err = -EPERM;
+			goto free;
+		}
+	}
+	if (MLX5_CAP_GEN(dev, num_ports) == 2)
+		MLX5_SET(query_vport_counter_in, in, port_num, port_num);
+
+	err = mlx5_cmd_exec(dev, in, in_sz, out,  out_sz);
+	if (err)
+		goto free;
+	err = mlx5_cmd_status_to_err_v2(out);
+
+free:
+	kvfree(in);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 51f1e540fc2b..0732e6c1fe7a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -3126,7 +3126,8 @@ struct mlx5_ifc_query_vport_counter_in_bits {
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_at_41[0xf];
+	u8         reserved_at_41[0xb];
+	u8	   port_num[0x4];
 	u8         vport_number[0x10];
 
 	u8         reserved_at_60[0x60];
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 123771003e68..a9f2bcc98cab 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -92,5 +92,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
 
 int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
 int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
+int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
+				  u8 port_num, void *out, size_t out_sz);
 
 #endif /* __MLX5_VPORT_H__ */
-- 
cgit v1.2.3


From 1c64bf6f291cae7cbe779e407db9477378bb4e7d Mon Sep 17 00:00:00 2001
From: Meny Yossefi <menyy@mellanox.com>
Date: Thu, 18 Feb 2016 18:15:00 +0200
Subject: net/mlx5_core: Add helper function to read IB error counters

Added helper function to read IB standard error counters
via the PPCNT register.

The PPCNT register read command provides the 32-bit error counters
of both IB/RoCE link layer and transport layer.

Signed-off-by: Meny Yossefi <menyy@mellanox.com>
Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 23 ++++++++++++++++++
 include/linux/mlx5/device.h                    |  3 ++-
 include/linux/mlx5/driver.h                    |  2 ++
 include/linux/mlx5/mlx5_ifc.h                  | 32 ++++++++++++++++++++++++++
 4 files changed, 59 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index a87e773e93f3..5635ce7ad693 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -324,6 +324,29 @@ int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap);
 
+int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
+			     u8 port_num, void *out, size_t sz)
+{
+	u32 *in;
+	int err;
+
+	in  = mlx5_vzalloc(sz);
+	if (!in) {
+		err = -ENOMEM;
+		return err;
+	}
+
+	MLX5_SET(ppcnt_reg, in, local_port, port_num);
+
+	MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP);
+	err = mlx5_core_access_reg(dev, in, sz, out,
+				   sz, MLX5_REG_PPCNT, 0, 0);
+
+	kvfree(in);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt);
+
 int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause)
 {
 	u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 987764afa65c..99e2edc93d27 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1284,7 +1284,8 @@ enum {
 	MLX5_RFC_3635_COUNTERS_GROUP	      = 0x3,
 	MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP = 0x5,
 	MLX5_PER_PRIORITY_COUNTERS_GROUP      = 0x10,
-	MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11
+	MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11,
+	MLX5_INFINIBAND_PORT_COUNTERS_GROUP   = 0x20,
 };
 
 static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 1e3006dcf35d..8edcd08853dd 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -847,6 +847,8 @@ int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
 void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
 int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
 			struct mlx5_odp_caps *odp_caps);
+int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
+			     u8 port_num, void *out, size_t sz);
 
 static inline int fw_initializing(struct mlx5_core_dev *dev)
 {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 0732e6c1fe7a..9f404de5f99b 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1208,6 +1208,36 @@ struct mlx5_ifc_phys_layer_cntrs_bits {
 	u8         reserved_at_640[0x180];
 };
 
+struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits {
+	u8	   symbol_error_counter[0x10];
+
+	u8         link_error_recovery_counter[0x8];
+
+	u8         link_downed_counter[0x8];
+
+	u8         port_rcv_errors[0x10];
+
+	u8         port_rcv_remote_physical_errors[0x10];
+
+	u8         port_rcv_switch_relay_errors[0x10];
+
+	u8         port_xmit_discards[0x10];
+
+	u8         port_xmit_constraint_errors[0x8];
+
+	u8         port_rcv_constraint_errors[0x8];
+
+	u8         reserved_at_70[0x8];
+
+	u8         link_overrun_errors[0x8];
+
+	u8	   reserved_at_80[0x10];
+
+	u8         vl_15_dropped[0x10];
+
+	u8	   reserved_at_a0[0xa0];
+};
+
 struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits {
 	u8         transmit_queue_high[0x20];
 
@@ -2618,6 +2648,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
 	struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout;
 	struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout;
 	struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout;
+	struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout;
 	struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
 	u8         reserved_at_0[0x7c0];
 };
@@ -6955,6 +6986,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_peir_reg_bits peir_reg;
 	struct mlx5_ifc_pelc_reg_bits pelc_reg;
 	struct mlx5_ifc_pfcc_reg_bits pfcc_reg;
+	struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout;
 	struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
 	struct mlx5_ifc_pifr_reg_bits pifr_reg;
 	struct mlx5_ifc_pipg_reg_bits pipg_reg;
-- 
cgit v1.2.3


From 3efd9a11212d500e36c2837db853178cdaa86d5a Mon Sep 17 00:00:00 2001
From: Meny Yossefi <menyy@mellanox.com>
Date: Thu, 18 Feb 2016 18:15:01 +0200
Subject: IB/mlx5: Modify MAD reading counters method to use counter registers

Modify mlx5_ib_process_mad to use PPCNT and query_vport commands
instead of MAD_IFC, as MAD_IFC is deprecated on new firmware
versions (and doesn't support RoCE anyway).

Traffic counters exist in both 32-bit and 64-bit forms.
Declaring support of extended coutners results in traffic counters
to be read in their 64-bit form only via the query_vport command.
Error counters exist only in 32-bit form and read via PPCNT command.

This commit also adds counters support in RoCE.

Signed-off-by: Meny Yossefi <menyy@mellanox.com>
Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/mad.c | 166 ++++++++++++++++++++++++++++++++++++---
 include/linux/mlx5/device.h      |  23 ++++++
 2 files changed, 178 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index b84d13a487cc..41d8a0036465 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -31,8 +31,10 @@
  */
 
 #include <linux/mlx5/cmd.h>
+#include <linux/mlx5/vport.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_smi.h>
+#include <rdma/ib_pma.h>
 #include "mlx5_ib.h"
 
 enum {
@@ -57,20 +59,12 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
 	return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port);
 }
 
-int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-			const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-			const struct ib_mad_hdr *in, size_t in_mad_size,
-			struct ib_mad_hdr *out, size_t *out_mad_size,
-			u16 *out_mad_pkey_index)
+static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+		       const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+		       const struct ib_mad *in_mad, struct ib_mad *out_mad)
 {
 	u16 slid;
 	int err;
-	const struct ib_mad *in_mad = (const struct ib_mad *)in;
-	struct ib_mad *out_mad = (struct ib_mad *)out;
-
-	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-			 *out_mad_size != sizeof(*out_mad)))
-		return IB_MAD_RESULT_FAILURE;
 
 	slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
 
@@ -117,6 +111,156 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 }
 
+static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext,
+			       void *out)
+{
+#define MLX5_SUM_CNT(p, cntr1, cntr2)	\
+	(MLX5_GET64(query_vport_counter_out, p, cntr1) + \
+	MLX5_GET64(query_vport_counter_out, p, cntr2))
+
+	pma_cnt_ext->port_xmit_data =
+		cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets,
+					 transmitted_ib_multicast.octets) >> 2);
+	pma_cnt_ext->port_xmit_data =
+		cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets,
+					 received_ib_multicast.octets) >> 2);
+	pma_cnt_ext->port_xmit_packets =
+		cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.packets,
+					 transmitted_ib_multicast.packets));
+	pma_cnt_ext->port_rcv_packets =
+		cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.packets,
+					 received_ib_multicast.packets));
+	pma_cnt_ext->port_unicast_xmit_packets =
+		MLX5_GET64_BE(query_vport_counter_out,
+			      out, transmitted_ib_unicast.packets);
+	pma_cnt_ext->port_unicast_rcv_packets =
+		MLX5_GET64_BE(query_vport_counter_out,
+			      out, received_ib_unicast.packets);
+	pma_cnt_ext->port_multicast_xmit_packets =
+		MLX5_GET64_BE(query_vport_counter_out,
+			      out, transmitted_ib_multicast.packets);
+	pma_cnt_ext->port_multicast_rcv_packets =
+		MLX5_GET64_BE(query_vport_counter_out,
+			      out, received_ib_multicast.packets);
+}
+
+static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
+			   void *out)
+{
+	/* Traffic counters will be reported in
+	 * their 64bit form via ib_pma_portcounters_ext by default.
+	 */
+	void *out_pma = MLX5_ADDR_OF(ppcnt_reg, out,
+				     counter_set);
+
+#define MLX5_ASSIGN_PMA_CNTR(counter_var, counter_name)	{		\
+	counter_var = MLX5_GET_BE(typeof(counter_var),			\
+				  ib_port_cntrs_grp_data_layout,	\
+				  out_pma, counter_name);		\
+	}
+
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->symbol_error_counter,
+			     symbol_error_counter);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_error_recovery_counter,
+			     link_error_recovery_counter);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_downed_counter,
+			     link_downed_counter);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_errors,
+			     port_rcv_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_remphys_errors,
+			     port_rcv_remote_physical_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_switch_relay_errors,
+			     port_rcv_switch_relay_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_discards,
+			     port_xmit_discards);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_constraint_errors,
+			     port_xmit_constraint_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_constraint_errors,
+			     port_rcv_constraint_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_overrun_errors,
+			     link_overrun_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->vl15_dropped,
+			     vl_15_dropped);
+}
+
+static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
+			   const struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	int err;
+	void *out_cnt;
+
+	/* Decalring support of extended counters */
+	if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) {
+		struct ib_class_port_info cpi = {};
+
+		cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+		memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
+		return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+	}
+
+	if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) {
+		struct ib_pma_portcounters_ext *pma_cnt_ext =
+			(struct ib_pma_portcounters_ext *)(out_mad->data + 40);
+		int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+
+		out_cnt = mlx5_vzalloc(sz);
+		if (!out_cnt)
+			return IB_MAD_RESULT_FAILURE;
+
+		err = mlx5_core_query_vport_counter(dev->mdev, 0,
+						    port_num, out_cnt, sz);
+		if (!err)
+			pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
+	} else {
+		struct ib_pma_portcounters *pma_cnt =
+			(struct ib_pma_portcounters *)(out_mad->data + 40);
+		int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+		out_cnt = mlx5_vzalloc(sz);
+		if (!out_cnt)
+			return IB_MAD_RESULT_FAILURE;
+
+		err = mlx5_core_query_ib_ppcnt(dev->mdev, port_num,
+					       out_cnt, sz);
+		if (!err)
+			pma_cnt_assign(pma_cnt, out_cnt);
+		}
+
+	kvfree(out_cnt);
+	if (err)
+		return IB_MAD_RESULT_FAILURE;
+
+	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+			const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+			const struct ib_mad_hdr *in, size_t in_mad_size,
+			struct ib_mad_hdr *out, size_t *out_mad_size,
+			u16 *out_mad_pkey_index)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	const struct ib_mad *in_mad = (const struct ib_mad *)in;
+	struct ib_mad *out_mad = (struct ib_mad *)out;
+
+	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
+			 *out_mad_size != sizeof(*out_mad)))
+		return IB_MAD_RESULT_FAILURE;
+
+	memset(out_mad->data, 0, sizeof(out_mad->data));
+
+	if (MLX5_CAP_GEN(mdev, vport_counters) &&
+	    in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
+	    in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) {
+		return process_pma_cmd(ibdev, port_num, in_mad, out_mad);
+	} else {
+		return process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
+				   in_mad, out_mad);
+	}
+}
+
 int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
 {
 	struct ib_smp *in_mad  = NULL;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 99e2edc93d27..12079fd20413 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -105,6 +105,29 @@ __mlx5_mask(typ, fld))
 	___t; \
 })
 
+/* Big endian getters */
+#define MLX5_GET64_BE(typ, p, fld) (*((__be64 *)(p) +\
+	__mlx5_64_off(typ, fld)))
+
+#define MLX5_GET_BE(type_t, typ, p, fld) ({				  \
+		type_t tmp;						  \
+		switch (sizeof(tmp)) {					  \
+		case sizeof(u8):					  \
+			tmp = (__force type_t)MLX5_GET(typ, p, fld);	  \
+			break;						  \
+		case sizeof(u16):					  \
+			tmp = (__force type_t)cpu_to_be16(MLX5_GET(typ, p, fld)); \
+			break;						  \
+		case sizeof(u32):					  \
+			tmp = (__force type_t)cpu_to_be32(MLX5_GET(typ, p, fld)); \
+			break;						  \
+		case sizeof(u64):					  \
+			tmp = (__force type_t)MLX5_GET64_BE(typ, p, fld); \
+			break;						  \
+			}						  \
+		tmp;							  \
+		})
+
 enum {
 	MLX5_MAX_COMMANDS		= 32,
 	MLX5_CMD_DATA_BLOCK_SIZE	= 512,
-- 
cgit v1.2.3


From 1015c2e8ca2b94d8964f8ab30d925b6f678fd9d2 Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@mellanox.com>
Date: Sun, 21 Feb 2016 16:27:16 +0200
Subject: IB/mlx5: Define interface bits for IPoIB offloads

The HW can supply several offloads for UD QP, added  offloads for
checksumming for both TX and RX and LSO for TX.
Two new bits were added in order to expose and enable these offloads:
1. HCA capability bit: declares the support for IPoIB basic offloads.
2. QPC bit which will be used in the QP creation flow, which set these
abilities in the QP.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/mlx5/mlx5_ifc.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 9f404de5f99b..711c9dc87f63 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -736,7 +736,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         cqe_version[0x4];
 
 	u8         compact_address_vector[0x1];
-	u8         reserved_at_200[0xe];
+	u8         reserved_at_200[0x3];
+	u8         ipoib_basic_offloads[0x1];
+	u8         reserved_at_204[0xa];
 	u8         drain_sigerr[0x1];
 	u8         cmdif_checksum[0x2];
 	u8         sigerr_cqe[0x1];
@@ -1810,7 +1812,7 @@ struct mlx5_ifc_qpc_bits {
 	u8         log_sq_size[0x4];
 	u8         reserved_at_55[0x6];
 	u8         rlky[0x1];
-	u8         reserved_at_5c[0x4];
+	u8         ulp_stateless_offload_mode[0x4];
 
 	u8         counter_set_id[0x8];
 	u8         uar_page[0x18];
-- 
cgit v1.2.3


From b11a4f9cde1c06e0073662882b60c1fb95a1d597 Mon Sep 17 00:00:00 2001
From: Haggai Eran <haggaie@mellanox.com>
Date: Mon, 29 Feb 2016 15:45:03 +0200
Subject: IB/mlx5: Add support for setting source QP number

In order to create multiple GSI QPs, we need to set the source QP number to
one on all these QPs. Add the necessary definitions and infrastructure to
do that.

Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 14 ++++++++++++++
 drivers/infiniband/hw/mlx5/qp.c      | 17 ++++++++++++++++-
 include/linux/mlx5/mlx5_ifc.h        |  3 ++-
 include/linux/mlx5/qp.h              |  3 ++-
 4 files changed, 34 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 14396b0eac74..32699f92480a 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -165,6 +165,18 @@ struct mlx5_ib_flow_db {
 #define MLX5_IB_QPT_REG_UMR	IB_QPT_RESERVED1
 #define MLX5_IB_WR_UMR		IB_WR_RESERVED1
 
+/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
+ *
+ * These flags are intended for internal use by the mlx5_ib driver, and they
+ * rely on the range reserved for that use in the ib_qp_create_flags enum.
+ */
+
+/* Create a UD QP whose source QP number is 1 */
+static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void)
+{
+	return IB_QP_CREATE_RESERVED_START;
+}
+
 struct wr_list {
 	u16	opcode;
 	u16	next;
@@ -331,6 +343,8 @@ enum mlx5_ib_qp_flags {
 	MLX5_IB_QP_MANAGED_SEND             = IB_QP_CREATE_MANAGED_SEND,
 	MLX5_IB_QP_MANAGED_RECV             = IB_QP_CREATE_MANAGED_RECV,
 	MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 5,
+	/* QP uses 1 as its source QP number */
+	MLX5_IB_QP_SQPN_QP1			= 1 << 6,
 };
 
 struct mlx5_umr_wr {
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index baa88084d89d..794e760a17a0 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -793,7 +793,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 	uuari = &dev->mdev->priv.uuari;
 	if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN |
 					IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
-					IB_QP_CREATE_IPOIB_UD_LSO))
+					IB_QP_CREATE_IPOIB_UD_LSO |
+					mlx5_ib_create_qp_sqpn_qp1()))
 		return -EINVAL;
 
 	if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
@@ -838,6 +839,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 	(*in)->ctx.params1 |= cpu_to_be32(1 << 11);
 	(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
 
+	if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
+		(*in)->ctx.deth_sqpn = cpu_to_be32(1);
+		qp->flags |= MLX5_IB_QP_SQPN_QP1;
+	}
+
 	mlx5_fill_page_array(&qp->buf, (*in)->pas);
 
 	err = mlx5_db_alloc(dev->mdev, &qp->db);
@@ -1289,6 +1295,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 					    ucmd.sq_wqe_count, max_wqes);
 				return -EINVAL;
 			}
+			if (init_attr->create_flags &
+			    mlx5_ib_create_qp_sqpn_qp1()) {
+				mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n");
+				return -EINVAL;
+			}
 			err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
 					     &resp, &inlen, base);
 			if (err)
@@ -2309,6 +2320,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
 		context->sq_crq_size |= cpu_to_be16(1 << 4);
 
+	if (qp->flags & MLX5_IB_QP_SQPN_QP1)
+		context->deth_sqpn = cpu_to_be32(1);
 
 	mlx5_cur = to_mlx5_state(cur_state);
 	mlx5_new = to_mlx5_state(new_state);
@@ -3973,6 +3986,8 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 		qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
 	if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
 		qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
+	if (qp->flags & MLX5_IB_QP_SQPN_QP1)
+		qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1();
 
 	qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
 		IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 711c9dc87f63..72bba5261766 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -772,7 +772,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_22e[0x7];
 	u8         qkv[0x1];
 	u8         pkv[0x1];
-	u8         reserved_at_237[0x4];
+	u8         set_deth_sqpn[0x1];
+	u8         reserved_at_239[0x3];
 	u8         xrc[0x1];
 	u8         ud[0x1];
 	u8         uc[0x1];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 5b8c89ffaa58..e5bbcf06de95 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -499,7 +499,8 @@ struct mlx5_qp_context {
 	u8			reserved2[4];
 	__be32			next_send_psn;
 	__be32			cqn_send;
-	u8			reserved3[8];
+	__be32			deth_sqpn;
+	u8			reserved3[4];
 	__be32			last_acked_psn;
 	__be32			ssn;
 	__be32			params2;
-- 
cgit v1.2.3


From a606b0f6691daf861482f8b77326f672238ffbfd Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Mon, 29 Feb 2016 18:05:28 +0200
Subject: net/mlx5: Refactor mlx5_core_mr to mkey

Mlx5's mkey mechanism is also used for memory windows.
The current code base uses MR (memory region) naming, which is
inaccurate. Changing MR to mkey in order to represent its different
usages more accurately.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/cq.c                   | 16 +++----
 drivers/infiniband/hw/mlx5/mlx5_ib.h              |  6 +--
 drivers/infiniband/hw/mlx5/mr.c                   | 58 +++++++++++------------
 drivers/infiniband/hw/mlx5/odp.c                  | 10 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en.h      |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 ++---
 drivers/net/ethernet/mellanox/mlx5/core/main.c    |  6 +--
 drivers/net/ethernet/mellanox/mlx5/core/mr.c      | 54 +++++++++++----------
 include/linux/mlx5/driver.h                       | 24 +++++-----
 include/linux/mlx5/qp.h                           |  4 +-
 10 files changed, 98 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 2a9ad8401750..a00ba4418de9 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -434,7 +434,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
 	struct mlx5_core_qp *mqp;
 	struct mlx5_ib_wq *wq;
 	struct mlx5_sig_err_cqe *sig_err_cqe;
-	struct mlx5_core_mr *mmr;
+	struct mlx5_core_mkey *mmkey;
 	struct mlx5_ib_mr *mr;
 	uint8_t opcode;
 	uint32_t qpn;
@@ -539,17 +539,17 @@ repoll:
 	case MLX5_CQE_SIG_ERR:
 		sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
 
-		read_lock(&dev->mdev->priv.mr_table.lock);
-		mmr = __mlx5_mr_lookup(dev->mdev,
-				       mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
-		if (unlikely(!mmr)) {
-			read_unlock(&dev->mdev->priv.mr_table.lock);
+		read_lock(&dev->mdev->priv.mkey_table.lock);
+		mmkey = __mlx5_mr_lookup(dev->mdev,
+					 mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
+		if (unlikely(!mmkey)) {
+			read_unlock(&dev->mdev->priv.mkey_table.lock);
 			mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
 				     cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
 			return -EINVAL;
 		}
 
-		mr = to_mibmr(mmr);
+		mr = to_mibmr(mmkey);
 		get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
 		mr->sig->sig_err_exists = true;
 		mr->sig->sigerr_count++;
@@ -561,7 +561,7 @@ repoll:
 			     mr->sig->err_item.expected,
 			     mr->sig->err_item.actual);
 
-		read_unlock(&dev->mdev->priv.mr_table.lock);
+		read_unlock(&dev->mdev->priv.mkey_table.lock);
 		goto repoll;
 	}
 
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index f84ec2b6425c..4167d67179ff 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -446,7 +446,7 @@ struct mlx5_ib_mr {
 	int			ndescs;
 	int			max_descs;
 	int			desc_size;
-	struct mlx5_core_mr	mmr;
+	struct mlx5_core_mkey	mmkey;
 	struct ib_umem	       *umem;
 	struct mlx5_shared_mr_info	*smr_info;
 	struct list_head	list;
@@ -603,9 +603,9 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
 	return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
 }
 
-static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
+static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey)
 {
-	return container_of(mmr, struct mlx5_ib_mr, mmr);
+	return container_of(mmkey, struct mlx5_ib_mr, mmkey);
 }
 
 static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index cf26cd1f1fa2..399e2b5fb573 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -57,7 +57,7 @@ static int clean_mr(struct mlx5_ib_mr *mr);
 
 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
-	int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+	int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 	/* Wait until all page fault handlers using the mr complete. */
@@ -120,7 +120,7 @@ static void reg_mr_callback(int status, void *context)
 	struct mlx5_cache_ent *ent = &cache->ent[c];
 	u8 key;
 	unsigned long flags;
-	struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
+	struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
 	int err;
 
 	spin_lock_irqsave(&ent->lock, flags);
@@ -147,7 +147,7 @@ static void reg_mr_callback(int status, void *context)
 	spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
 	key = dev->mdev->priv.mkey_key++;
 	spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
-	mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
+	mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
 
 	cache->last_add = jiffies;
 
@@ -158,10 +158,10 @@ static void reg_mr_callback(int status, void *context)
 	spin_unlock_irqrestore(&ent->lock, flags);
 
 	write_lock_irqsave(&table->lock, flags);
-	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
-				&mr->mmr);
+	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
+				&mr->mmkey);
 	if (err)
-		pr_err("Error inserting to mr tree. 0x%x\n", -err);
+		pr_err("Error inserting to mkey tree. 0x%x\n", -err);
 	write_unlock_irqrestore(&table->lock, flags);
 }
 
@@ -202,7 +202,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 		spin_lock_irq(&ent->lock);
 		ent->pending++;
 		spin_unlock_irq(&ent->lock);
-		err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in,
+		err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in,
 					    sizeof(*in), reg_mr_callback,
 					    mr, &mr->out);
 		if (err) {
@@ -691,14 +691,14 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	seg->start_addr = 0;
 
-	err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
+	err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL,
 				    NULL);
 	if (err)
 		goto err_in;
 
 	kfree(in);
-	mr->ibmr.lkey = mr->mmr.key;
-	mr->ibmr.rkey = mr->mmr.key;
+	mr->ibmr.lkey = mr->mmkey.key;
+	mr->ibmr.rkey = mr->mmkey.key;
 	mr->umem = NULL;
 
 	return &mr->ibmr;
@@ -897,7 +897,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 
 	memset(&umrwr, 0, sizeof(umrwr));
 	umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
-	prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key,
+	prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
 			 page_shift, virt_addr, len, access_flags);
 
 	mlx5_ib_init_umr_context(&umr_context);
@@ -914,9 +914,9 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 		}
 	}
 
-	mr->mmr.iova = virt_addr;
-	mr->mmr.size = len;
-	mr->mmr.pd = to_mpd(pd)->pdn;
+	mr->mmkey.iova = virt_addr;
+	mr->mmkey.size = len;
+	mr->mmkey.pd = to_mpd(pd)->pdn;
 
 	mr->live = 1;
 
@@ -1027,7 +1027,7 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
 		wr.wr.opcode = MLX5_IB_WR_UMR;
 		wr.npages = sg.length / sizeof(u64);
 		wr.page_shift = PAGE_SHIFT;
-		wr.mkey = mr->mmr.key;
+		wr.mkey = mr->mmkey.key;
 		wr.target.offset = start_page_index;
 
 		mlx5_ib_init_umr_context(&umr_context);
@@ -1100,7 +1100,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
 							 1 << page_shift));
-	err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL,
+	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen, NULL,
 				    NULL, NULL);
 	if (err) {
 		mlx5_ib_warn(dev, "create mkey failed\n");
@@ -1111,7 +1111,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
 	mr->live = 1;
 	kvfree(in);
 
-	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
+	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
 
 	return mr;
 
@@ -1130,8 +1130,8 @@ static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
 {
 	mr->npages = npages;
 	atomic_add(npages, &dev->mdev->priv.reg_pages);
-	mr->ibmr.lkey = mr->mmr.key;
-	mr->ibmr.rkey = mr->mmr.key;
+	mr->ibmr.lkey = mr->mmkey.key;
+	mr->ibmr.rkey = mr->mmkey.key;
 	mr->ibmr.length = length;
 	mr->access_flags = access_flags;
 }
@@ -1179,7 +1179,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 		goto error;
 	}
 
-	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
+	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
 
 	mr->umem = umem;
 	set_mr_fileds(dev, mr, npages, length, access_flags);
@@ -1205,7 +1205,7 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 
 	memset(&umrwr.wr, 0, sizeof(umrwr));
 	umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
-	prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key);
+	prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key);
 
 	mlx5_ib_init_umr_context(&umr_context);
 	down(&umrc->sem);
@@ -1259,7 +1259,7 @@ static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
 		umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
 	}
 
-	prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key,
+	prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
 			    page_shift);
 
 	if (flags & IB_MR_REREG_PD) {
@@ -1371,7 +1371,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 
 	if (flags & IB_MR_REREG_PD) {
 		ib_mr->pd = pd;
-		mr->mmr.pd = to_mpd(pd)->pdn;
+		mr->mmkey.pd = to_mpd(pd)->pdn;
 	}
 
 	if (flags & IB_MR_REREG_ACCESS)
@@ -1380,8 +1380,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 	if (flags & IB_MR_REREG_TRANS) {
 		atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
 		set_mr_fileds(dev, mr, npages, len, access_flags);
-		mr->mmr.iova = addr;
-		mr->mmr.size = len;
+		mr->mmkey.iova = addr;
+		mr->mmkey.size = len;
 	}
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 	update_odp_mr(mr);
@@ -1461,7 +1461,7 @@ static int clean_mr(struct mlx5_ib_mr *mr)
 		err = destroy_mkey(dev, mr);
 		if (err) {
 			mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
-				     mr->mmr.key, err);
+				     mr->mmkey.key, err);
 			return err;
 		}
 	} else {
@@ -1587,13 +1587,13 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
 	}
 
 	in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
-	err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in),
+	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in),
 				    NULL, NULL, NULL);
 	if (err)
 		goto err_destroy_psv;
 
-	mr->ibmr.lkey = mr->mmr.key;
-	mr->ibmr.rkey = mr->mmr.key;
+	mr->ibmr.lkey = mr->mmkey.key;
+	mr->ibmr.rkey = mr->mmkey.key;
 	mr->umem = NULL;
 	kfree(in);
 
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index b8d76361a48d..34e79e709c67 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -142,13 +142,13 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
 						   u32 key)
 {
 	u32 base_key = mlx5_base_mkey(key);
-	struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key);
-	struct mlx5_ib_mr *mr = container_of(mmr, struct mlx5_ib_mr, mmr);
+	struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key);
+	struct mlx5_ib_mr *mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
 
-	if (!mmr || mmr->key != key || !mr->live)
+	if (!mmkey || mmkey->key != key || !mr->live)
 		return NULL;
 
-	return container_of(mmr, struct mlx5_ib_mr, mmr);
+	return container_of(mmkey, struct mlx5_ib_mr, mmkey);
 }
 
 static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
@@ -232,7 +232,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
 	io_virt += pfault->mpfault.bytes_committed;
 	bcnt -= pfault->mpfault.bytes_committed;
 
-	start_idx = (io_virt - (mr->mmr.iova & PAGE_MASK)) >> PAGE_SHIFT;
+	start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
 
 	if (mr->umem->writable)
 		access_mask |= ODP_WRITE_ALLOWED_BIT;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index aac071a7e830..6ef0bfded2ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -515,7 +515,7 @@ struct mlx5e_priv {
 	struct mlx5_uar            cq_uar;
 	u32                        pdn;
 	u32                        tdn;
-	struct mlx5_core_mr        mr;
+	struct mlx5_core_mkey      mkey;
 	struct mlx5e_rq            drop_rq;
 
 	struct mlx5e_channel     **channel;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d4e1c3045200..43a148939557 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -982,7 +982,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	c->cpu      = cpu;
 	c->pdev     = &priv->mdev->pdev->dev;
 	c->netdev   = priv->netdev;
-	c->mkey_be  = cpu_to_be32(priv->mr.key);
+	c->mkey_be  = cpu_to_be32(priv->mkey.key);
 	c->num_tc   = priv->params.num_tc;
 
 	mlx5e_build_channeltc_to_txq_map(priv, ix);
@@ -2194,7 +2194,7 @@ static void mlx5e_build_netdev(struct net_device *netdev)
 }
 
 static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
-			     struct mlx5_core_mr *mr)
+			     struct mlx5_core_mkey *mkey)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5_create_mkey_mbox_in *in;
@@ -2210,7 +2210,7 @@ static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
 	in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 
-	err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL,
+	err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL,
 				    NULL);
 
 	kvfree(in);
@@ -2259,7 +2259,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
 		goto err_dealloc_pd;
 	}
 
-	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
+	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mkey);
 	if (err) {
 		mlx5_core_err(mdev, "create mkey failed, %d\n", err);
 		goto err_dealloc_transport_domain;
@@ -2333,7 +2333,7 @@ err_destroy_tises:
 	mlx5e_destroy_tises(priv);
 
 err_destroy_mkey:
-	mlx5_core_destroy_mkey(mdev, &priv->mr);
+	mlx5_core_destroy_mkey(mdev, &priv->mkey);
 
 err_dealloc_transport_domain:
 	mlx5_core_dealloc_transport_domain(mdev, priv->tdn);
@@ -2367,7 +2367,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
 	mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT);
 	mlx5e_close_drop_rq(priv);
 	mlx5e_destroy_tises(priv);
-	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
+	mlx5_core_destroy_mkey(priv->mdev, &priv->mkey);
 	mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn);
 	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
 	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 1545a944c309..0916bbc69269 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1117,7 +1117,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 	mlx5_init_cq_table(dev);
 	mlx5_init_qp_table(dev);
 	mlx5_init_srq_table(dev);
-	mlx5_init_mr_table(dev);
+	mlx5_init_mkey_table(dev);
 
 	err = mlx5_init_fs(dev);
 	if (err) {
@@ -1164,7 +1164,7 @@ err_sriov:
 err_reg_dev:
 	mlx5_cleanup_fs(dev);
 err_fs:
-	mlx5_cleanup_mr_table(dev);
+	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
 	mlx5_cleanup_cq_table(dev);
@@ -1237,7 +1237,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 #endif
 
 	mlx5_cleanup_fs(dev);
-	mlx5_cleanup_mr_table(dev);
+	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
 	mlx5_cleanup_cq_table(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 6fa22b51e460..77a7293921d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -36,25 +36,26 @@
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
-void mlx5_init_mr_table(struct mlx5_core_dev *dev)
+void mlx5_init_mkey_table(struct mlx5_core_dev *dev)
 {
-	struct mlx5_mr_table *table = &dev->priv.mr_table;
+	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
 
 	memset(table, 0, sizeof(*table));
 	rwlock_init(&table->lock);
 	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
 }
 
-void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev)
+void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
 {
 }
 
-int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
+			  struct mlx5_core_mkey *mkey,
 			  struct mlx5_create_mkey_mbox_in *in, int inlen,
 			  mlx5_cmd_cbk_t callback, void *context,
 			  struct mlx5_create_mkey_mbox_out *out)
 {
-	struct mlx5_mr_table *table = &dev->priv.mr_table;
+	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
 	struct mlx5_create_mkey_mbox_out lout;
 	int err;
 	u8 key;
@@ -83,34 +84,35 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
 		return mlx5_cmd_status_to_err(&lout.hdr);
 	}
 
-	mr->iova = be64_to_cpu(in->seg.start_addr);
-	mr->size = be64_to_cpu(in->seg.len);
-	mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
-	mr->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff;
+	mkey->iova = be64_to_cpu(in->seg.start_addr);
+	mkey->size = be64_to_cpu(in->seg.len);
+	mkey->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
+	mkey->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff;
 
 	mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
-		      be32_to_cpu(lout.mkey), key, mr->key);
+		      be32_to_cpu(lout.mkey), key, mkey->key);
 
-	/* connect to MR tree */
+	/* connect to mkey tree */
 	write_lock_irq(&table->lock);
-	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->key), mr);
+	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey);
 	write_unlock_irq(&table->lock);
 	if (err) {
-		mlx5_core_warn(dev, "failed radix tree insert of mr 0x%x, %d\n",
-			       mlx5_base_mkey(mr->key), err);
-		mlx5_core_destroy_mkey(dev, mr);
+		mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n",
+			       mlx5_base_mkey(mkey->key), err);
+		mlx5_core_destroy_mkey(dev, mkey);
 	}
 
 	return err;
 }
 EXPORT_SYMBOL(mlx5_core_create_mkey);
 
-int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr)
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
+			   struct mlx5_core_mkey *mkey)
 {
-	struct mlx5_mr_table *table = &dev->priv.mr_table;
+	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
 	struct mlx5_destroy_mkey_mbox_in in;
 	struct mlx5_destroy_mkey_mbox_out out;
-	struct mlx5_core_mr *deleted_mr;
+	struct mlx5_core_mkey *deleted_mkey;
 	unsigned long flags;
 	int err;
 
@@ -118,16 +120,16 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr)
 	memset(&out, 0, sizeof(out));
 
 	write_lock_irqsave(&table->lock, flags);
-	deleted_mr = radix_tree_delete(&table->tree, mlx5_base_mkey(mr->key));
+	deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key));
 	write_unlock_irqrestore(&table->lock, flags);
-	if (!deleted_mr) {
-		mlx5_core_warn(dev, "failed radix tree delete of mr 0x%x\n",
-			       mlx5_base_mkey(mr->key));
+	if (!deleted_mkey) {
+		mlx5_core_warn(dev, "failed radix tree delete of mkey 0x%x\n",
+			       mlx5_base_mkey(mkey->key));
 		return -ENOENT;
 	}
 
 	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY);
-	in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key));
+	in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key));
 	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
 	if (err)
 		return err;
@@ -139,7 +141,7 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr)
 }
 EXPORT_SYMBOL(mlx5_core_destroy_mkey);
 
-int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
 			 struct mlx5_query_mkey_mbox_out *out, int outlen)
 {
 	struct mlx5_query_mkey_mbox_in in;
@@ -149,7 +151,7 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
 	memset(out, 0, outlen);
 
 	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY);
-	in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key));
+	in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key));
 	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
 	if (err)
 		return err;
@@ -161,7 +163,7 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
 }
 EXPORT_SYMBOL(mlx5_core_query_mkey);
 
-int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
 			     u32 *mkey)
 {
 	struct mlx5_query_special_ctxs_mbox_in in;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8edcd08853dd..9108904a6a56 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -338,7 +338,7 @@ struct mlx5_core_sig_ctx {
 	u32			sigerr_count;
 };
 
-struct mlx5_core_mr {
+struct mlx5_core_mkey {
 	u64			iova;
 	u64			size;
 	u32			key;
@@ -426,7 +426,7 @@ struct mlx5_srq_table {
 	struct radix_tree_root	tree;
 };
 
-struct mlx5_mr_table {
+struct mlx5_mkey_table {
 	/* protect radix tree
 	 */
 	rwlock_t		lock;
@@ -484,9 +484,9 @@ struct mlx5_priv {
 	struct mlx5_cq_table	cq_table;
 	/* end: cq staff */
 
-	/* start: mr staff */
-	struct mlx5_mr_table	mr_table;
-	/* end: mr staff */
+	/* start: mkey staff */
+	struct mlx5_mkey_table	mkey_table;
+	/* end: mkey staff */
 
 	/* start: alloc staff */
 	/* protect buffer alocation according to numa node */
@@ -739,16 +739,18 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 			struct mlx5_query_srq_mbox_out *out);
 int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 		      u16 lwm, int is_srq);
-void mlx5_init_mr_table(struct mlx5_core_dev *dev);
-void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev);
-int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+void mlx5_init_mkey_table(struct mlx5_core_dev *dev);
+void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev);
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
+			  struct mlx5_core_mkey *mkey,
 			  struct mlx5_create_mkey_mbox_in *in, int inlen,
 			  mlx5_cmd_cbk_t callback, void *context,
 			  struct mlx5_create_mkey_mbox_out *out);
-int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr);
-int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
+			   struct mlx5_core_mkey *mkey);
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
 			 struct mlx5_query_mkey_mbox_out *out, int outlen);
-int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
+int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
 			     u32 *mkey);
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index e5bbcf06de95..cf031a3f16c5 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -622,9 +622,9 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u
 	return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
 }
 
-static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
+static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
 {
-	return radix_tree_lookup(&dev->priv.mr_table.tree, key);
+	return radix_tree_lookup(&dev->priv.mkey_table.tree, key);
 }
 
 struct mlx5_page_fault_resume_mbox_in {
-- 
cgit v1.2.3


From d2370e0a573e5c5ea9c96373558727abb3ea71f7 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Mon, 29 Feb 2016 18:05:30 +0200
Subject: IB/mlx5: Add memory windows allocation support

This patch adds user-space support for memory windows allocation and
deallocation. It also exposes the supported types via
query_device_caps verb.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Tested-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/main.c    | 13 ++++++
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 14 ++++++
 drivers/infiniband/hw/mlx5/mr.c      | 83 ++++++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/user.h    |  7 +++
 include/linux/mlx5/mlx5_ifc.h        |  4 +-
 5 files changed, 120 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 16f7d0b41c04..4d9b7cc4ca73 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -487,6 +487,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
 	if (MLX5_CAP_GEN(mdev, xrc))
 		props->device_cap_flags |= IB_DEVICE_XRC;
+	if (MLX5_CAP_GEN(mdev, imaicl)) {
+		props->device_cap_flags |= IB_DEVICE_MEM_WINDOW |
+					   IB_DEVICE_MEM_WINDOW_TYPE_2B;
+		props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
+	}
 	props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
 	if (MLX5_CAP_GEN(mdev, sho)) {
 		props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
@@ -2306,6 +2311,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 
 	mlx5_ib_internal_fill_odp_caps(dev);
 
+	if (MLX5_CAP_GEN(mdev, imaicl)) {
+		dev->ib_dev.alloc_mw		= mlx5_ib_alloc_mw;
+		dev->ib_dev.dealloc_mw		= mlx5_ib_dealloc_mw;
+		dev->ib_dev.uverbs_cmd_mask |=
+			(1ull << IB_USER_VERBS_CMD_ALLOC_MW)	|
+			(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+	}
+
 	if (MLX5_CAP_GEN(mdev, xrc)) {
 		dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
 		dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 4167d67179ff..648d2e2e445b 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -43,6 +43,7 @@
 #include <linux/mlx5/srq.h>
 #include <linux/types.h>
 #include <linux/mlx5/transobj.h>
+#include <rdma/ib_user_verbs.h>
 
 #define mlx5_ib_dbg(dev, format, arg...)				\
 pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
@@ -461,6 +462,11 @@ struct mlx5_ib_mr {
 	int			access_flags; /* Needed for rereg MR */
 };
 
+struct mlx5_ib_mw {
+	struct ib_mw		ibmw;
+	struct mlx5_core_mkey	mmkey;
+};
+
 struct mlx5_ib_umr_context {
 	enum ib_wc_status	status;
 	struct completion	done;
@@ -633,6 +639,11 @@ static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
 	return container_of(ibmr, struct mlx5_ib_mr, ibmr);
 }
 
+static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw)
+{
+	return container_of(ibmw, struct mlx5_ib_mw, ibmw);
+}
+
 struct mlx5_ib_ah {
 	struct ib_ah		ibah;
 	struct mlx5_av		av;
@@ -693,6 +704,9 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int access_flags,
 				  struct ib_udata *udata);
+struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
+			       struct ib_udata *udata);
+int mlx5_ib_dealloc_mw(struct ib_mw *mw);
 int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
 		       int npages, int zap);
 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 399e2b5fb573..70a047dde69e 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -40,6 +40,7 @@
 #include <rdma/ib_umem_odp.h>
 #include <rdma/ib_verbs.h>
 #include "mlx5_ib.h"
+#include "user.h"
 
 enum {
 	MAX_PENDING_REG_MR = 8,
@@ -1620,6 +1621,88 @@ err_free:
 	return ERR_PTR(err);
 }
 
+struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
+			       struct ib_udata *udata)
+{
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct mlx5_create_mkey_mbox_in *in = NULL;
+	struct mlx5_ib_mw *mw = NULL;
+	int ndescs;
+	int err;
+	struct mlx5_ib_alloc_mw req = {};
+	struct {
+		__u32	comp_mask;
+		__u32	response_length;
+	} resp = {};
+
+	err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
+	if (err)
+		return ERR_PTR(err);
+
+	if (req.comp_mask || req.reserved1 || req.reserved2)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (udata->inlen > sizeof(req) &&
+	    !ib_is_udata_cleared(udata, sizeof(req),
+				 udata->inlen - sizeof(req)))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
+
+	mw = kzalloc(sizeof(*mw), GFP_KERNEL);
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!mw || !in) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	in->seg.status = MLX5_MKEY_STATUS_FREE;
+	in->seg.xlt_oct_size = cpu_to_be32(ndescs);
+	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+	in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_KLM |
+		MLX5_PERM_LOCAL_READ;
+	if (type == IB_MW_TYPE_2)
+		in->seg.flags_pd |= cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
+	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+
+	err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, sizeof(*in),
+				    NULL, NULL, NULL);
+	if (err)
+		goto free;
+
+	mw->ibmw.rkey = mw->mmkey.key;
+
+	resp.response_length = min(offsetof(typeof(resp), response_length) +
+				   sizeof(resp.response_length), udata->outlen);
+	if (resp.response_length) {
+		err = ib_copy_to_udata(udata, &resp, resp.response_length);
+		if (err) {
+			mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
+			goto free;
+		}
+	}
+
+	kfree(in);
+	return &mw->ibmw;
+
+free:
+	kfree(mw);
+	kfree(in);
+	return ERR_PTR(err);
+}
+
+int mlx5_ib_dealloc_mw(struct ib_mw *mw)
+{
+	struct mlx5_ib_mw *mmw = to_mmw(mw);
+	int err;
+
+	err =  mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
+				      &mmw->mmkey);
+	if (!err)
+		kfree(mmw);
+	return err;
+}
+
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
 			    struct ib_mr_status *mr_status)
 {
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
index b94a55404a59..61bc308bb802 100644
--- a/drivers/infiniband/hw/mlx5/user.h
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -152,6 +152,13 @@ struct mlx5_ib_create_qp_resp {
 	__u32	uuar_index;
 };
 
+struct mlx5_ib_alloc_mw {
+	__u32	comp_mask;
+	__u8	num_klms;
+	__u8	reserved1;
+	__u16	reserved2;
+};
+
 static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
 				    struct mlx5_ib_create_qp *ucmd,
 				    int inlen,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 72bba5261766..3044cfa683f1 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -769,7 +769,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         cd[0x1];
 	u8         reserved_at_22c[0x1];
 	u8         apm[0x1];
-	u8         reserved_at_22e[0x7];
+	u8         reserved_at_22e[0x2];
+	u8	   imaicl[0x1];
+	u8         reserved_at_231[0x4];
 	u8         qkv[0x1];
 	u8         pkv[0x1];
 	u8         set_deth_sqpn[0x1];
-- 
cgit v1.2.3


From 871b642adebe300be2e50aa5f65a418510f636ec Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 26 Feb 2016 10:45:37 +0100
Subject: netdev: introduce ndo_set_rx_headroom

This method allows the controlling device (i.e. the bridge) to specify
additional headroom to be allocated for skb head on frame reception.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e52077ffe5ed..efe7cec111fa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1093,6 +1093,12 @@ struct tc_to_netdev {
  *	This function is used to get egress tunnel information for given skb.
  *	This is useful for retrieving outer tunnel header parameters while
  *	sampling packet.
+ * void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom);
+ *	This function is used to specify the headroom that the skb must
+ *	consider when allocation skb during packet reception. Setting
+ *	appropriate rx headroom value allows avoiding skb head copy on
+ *	forward. Setting a negative value reset the rx headroom to the
+ *	default value.
  *
  */
 struct net_device_ops {
@@ -1278,6 +1284,8 @@ struct net_device_ops {
 							 bool proto_down);
 	int			(*ndo_fill_metadata_dst)(struct net_device *dev,
 						       struct sk_buff *skb);
+	void			(*ndo_set_rx_headroom)(struct net_device *dev,
+						       int needed_headroom);
 };
 
 /**
@@ -1315,6 +1323,8 @@ struct net_device_ops {
  * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device
  * @IFF_TEAM: device is a team device
  * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured
+ * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external
+ *	entity (i.e. the master device for bridged veth)
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1343,6 +1353,7 @@ enum netdev_priv_flags {
 	IFF_L3MDEV_SLAVE		= 1<<23,
 	IFF_TEAM			= 1<<24,
 	IFF_RXFH_CONFIGURED		= 1<<25,
+	IFF_PHONY_HEADROOM		= 1<<26,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1937,6 +1948,26 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 				    struct sk_buff *skb,
 				    void *accel_priv);
 
+/* returns the headroom that the master device needs to take in account
+ * when forwarding to this dev
+ */
+static inline unsigned netdev_get_fwd_headroom(struct net_device *dev)
+{
+	return dev->priv_flags & IFF_PHONY_HEADROOM ? 0 : dev->needed_headroom;
+}
+
+static inline void netdev_set_rx_headroom(struct net_device *dev, int new_hr)
+{
+	if (dev->netdev_ops->ndo_set_rx_headroom)
+		dev->netdev_ops->ndo_set_rx_headroom(dev, new_hr);
+}
+
+/* set the device rx headroom to the dev's default */
+static inline void netdev_reset_rx_headroom(struct net_device *dev)
+{
+	netdev_set_rx_headroom(dev, -1);
+}
+
 /*
  * Net namespace inlines
  */
-- 
cgit v1.2.3


From f6763c29ab86c3ee27760a06e07bbeab47635b61 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 1 Mar 2016 13:05:54 -0500
Subject: svcrdma: Do not send Write chunk XDR pad with inline content

The NFS server's XDR encoders adds an XDR pad for content in the
xdr_buf page list at the beginning of the xdr_buf's tail buffer.

On RDMA transports, Write chunks are sent separately and without an
XDR pad.

If a Write chunk is being sent, strip off the pad in the tail buffer
so that inline content following the Write chunk remains XDR-aligned
when it is sent to the client.

BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=294
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h            |  2 +-
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c |  2 +-
 net/sunrpc/xprtrdma/svc_rdma_sendto.c      | 22 +++++++++++++++++-----
 3 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 5322fea6fe4c..40b678584041 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -224,7 +224,7 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
 
 /* svc_rdma_sendto.c */
 extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
-			    struct svc_rdma_req_map *);
+			    struct svc_rdma_req_map *, bool);
 extern int svc_rdma_sendto(struct svc_rqst *);
 extern struct rpcrdma_read_chunk *
 	svc_rdma_get_read_chunk(struct rpcrdma_msg *);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 65a7c232a345..de3919624fac 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -107,7 +107,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
 	int ret;
 
 	vec = svc_rdma_get_req_map(rdma);
-	ret = svc_rdma_map_xdr(rdma, sndbuf, vec);
+	ret = svc_rdma_map_xdr(rdma, sndbuf, vec, false);
 	if (ret)
 		goto out_err;
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 86fea5c59125..a8fab9968891 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -57,7 +57,8 @@ static u32 xdr_padsize(u32 len)
 
 int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
 		     struct xdr_buf *xdr,
-		     struct svc_rdma_req_map *vec)
+		     struct svc_rdma_req_map *vec,
+		     bool write_chunk_present)
 {
 	int sge_no;
 	u32 sge_bytes;
@@ -97,9 +98,20 @@ int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
 
 	/* Tail SGE */
 	if (xdr->tail[0].iov_len) {
-		vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
-		vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
-		sge_no++;
+		unsigned char *base = xdr->tail[0].iov_base;
+		size_t len = xdr->tail[0].iov_len;
+		u32 xdr_pad = xdr_padsize(xdr->page_len);
+
+		if (write_chunk_present && xdr_pad) {
+			base += xdr_pad;
+			len -= xdr_pad;
+		}
+
+		if (len) {
+			vec->sge[sge_no].iov_base = base;
+			vec->sge[sge_no].iov_len = len;
+			sge_no++;
+		}
 	}
 
 	dprintk("svcrdma: %s: sge_no %d page_no %d "
@@ -594,7 +606,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	ctxt = svc_rdma_get_context(rdma);
 	ctxt->direction = DMA_TO_DEVICE;
 	vec = svc_rdma_get_req_map(rdma);
-	ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec);
+	ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
 	if (ret)
 		goto err0;
 	inline_bytes = rqstp->rq_res.len;
-- 
cgit v1.2.3


From 4500632f60fa0d85e4101c374898cdf9b7b0cfac Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 1 Mar 2016 13:06:02 -0500
Subject: nfsd: Lower NFSv4.1 callback message size limit

The maximum size of a backchannel message on RPC-over-RDMA depends
on the connection's inline threshold. Today that threshold is
typically 1024 bytes, making the maximum message size 996 bytes.

The Linux server's CREATE_SESSION operation checks that the size
of callback Calls can be as large as 1044 bytes, to accommodate
RPCSEC_GSS. Thus CREATE_SESSION fails if a client advertises the
true message size maximum of 996 bytes.

But the server's backchannel currently does not support RPCSEC_GSS.
The actual maximum size it needs is much smaller. It is safe to
reduce the limit to enable NFSv4.1 on RDMA backchannel operation.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c         | 21 +++++++++++++--------
 include/linux/sunrpc/auth.h |  7 +++++++
 net/sunrpc/auth_null.c      |  4 ++--
 net/sunrpc/auth_unix.c      |  6 ++----
 4 files changed, 24 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d4b0b789ea24..0462eeddfff9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2587,21 +2587,26 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
 	return nfs_ok;
 }
 
+/*
+ * Server's NFSv4.1 backchannel support is AUTH_SYS-only for now.
+ * These are based on similar macros in linux/sunrpc/msg_prot.h .
+ */
+#define RPC_MAX_HEADER_WITH_AUTH_SYS \
+	(RPC_CALLHDRSIZE + 2 * (2 + UNX_CALLSLACK))
+
+#define RPC_MAX_REPHEADER_WITH_AUTH_SYS \
+	(RPC_REPHDRSIZE + (2 + NUL_REPLYSLACK))
+
 #define NFSD_CB_MAX_REQ_SZ	((NFS4_enc_cb_recall_sz + \
-				 RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32))
+				 RPC_MAX_HEADER_WITH_AUTH_SYS) * sizeof(__be32))
 #define NFSD_CB_MAX_RESP_SZ	((NFS4_dec_cb_recall_sz + \
-				 RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32))
+				 RPC_MAX_REPHEADER_WITH_AUTH_SYS) * \
+				 sizeof(__be32))
 
 static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)
 {
 	ca->headerpadsz = 0;
 
-	/*
-	 * These RPC_MAX_HEADER macros are overkill, especially since we
-	 * don't even do gss on the backchannel yet.  But this is still
-	 * less than 1k.  Tighten up this estimate in the unlikely event
-	 * it turns out to be a problem for some client:
-	 */
 	if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ)
 		return nfserr_toosmall;
 	if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ)
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 1ecf13e148b8..6a241a277249 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -20,11 +20,18 @@
 #include <linux/uidgid.h>
 #include <linux/utsname.h>
 
+/*
+ * Maximum size of AUTH_NONE authentication information, in XDR words.
+ */
+#define NUL_CALLSLACK	(4)
+#define NUL_REPLYSLACK	(2)
+
 /*
  * Size of the nodename buffer. RFC1831 specifies a hard limit of 255 bytes,
  * but Linux hostnames are actually limited to __NEW_UTS_LEN bytes.
  */
 #define UNX_MAXNODENAME	__NEW_UTS_LEN
+#define UNX_CALLSLACK	(21 + XDR_QUADLEN(UNX_MAXNODENAME))
 
 struct rpcsec_gss_info;
 
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index c2a2b584a056..8d9eb4d5ddd8 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -113,8 +113,8 @@ const struct rpc_authops authnull_ops = {
 
 static
 struct rpc_auth null_auth = {
-	.au_cslack	= 4,
-	.au_rslack	= 2,
+	.au_cslack	= NUL_CALLSLACK,
+	.au_rslack	= NUL_REPLYSLACK,
 	.au_ops		= &authnull_ops,
 	.au_flavor	= RPC_AUTH_NULL,
 	.au_count	= ATOMIC_INIT(0),
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 548240dd15fc..0d3dd364c22f 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -23,8 +23,6 @@ struct unx_cred {
 };
 #define uc_uid			uc_base.cr_uid
 
-#define UNX_WRITESLACK		(21 + XDR_QUADLEN(UNX_MAXNODENAME))
-
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
@@ -228,8 +226,8 @@ const struct rpc_authops authunix_ops = {
 
 static
 struct rpc_auth		unix_auth = {
-	.au_cslack	= UNX_WRITESLACK,
-	.au_rslack	= 2,			/* assume AUTH_NULL verf */
+	.au_cslack	= UNX_CALLSLACK,
+	.au_rslack	= NUL_REPLYSLACK,
 	.au_ops		= &authunix_ops,
 	.au_flavor	= RPC_AUTH_UNIX,
 	.au_count	= ATOMIC_INIT(0),
-- 
cgit v1.2.3


From bf36387ad394ad4fc93ad85fdd4a95dfa583556a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 1 Mar 2016 13:06:20 -0500
Subject: svcrdma: svc_rdma_post_recv() should close connection on error

Clean up: Most svc_rdma_post_recv() call sites close the transport
connection when a receive cannot be posted. Wrap that in a common
helper.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Devesh Sharma <devesh.sharma@broadcom.com>
Tested-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h            |  1 +
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 11 ++---------
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c    | 10 +---------
 net/sunrpc/xprtrdma/svc_rdma_sendto.c      |  7 +------
 net/sunrpc/xprtrdma/svc_rdma_transport.c   | 15 +++++++++++++++
 5 files changed, 20 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 40b678584041..aef47dd2bd1a 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -234,6 +234,7 @@ extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
 extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
 				enum rpcrdma_errcode);
 extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t);
+extern int svc_rdma_repost_recv(struct svcxprt_rdma *, gfp_t);
 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index de3919624fac..254be8661981 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -111,16 +111,9 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
 	if (ret)
 		goto out_err;
 
-	/* Post a recv buffer to handle the reply for this request. */
-	ret = svc_rdma_post_recv(rdma, GFP_NOIO);
-	if (ret) {
-		pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n",
-		       ret);
-		pr_err("svcrdma: closing transport %p.\n", rdma);
-		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
-		ret = -ENOTCONN;
+	ret = svc_rdma_repost_recv(rdma, GFP_NOIO);
+	if (ret)
 		goto out_err;
-	}
 
 	ctxt = svc_rdma_get_context(rdma);
 	ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index c8b8a8b4181e..acf15b8bca70 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -711,13 +711,5 @@ defer:
 	return 0;
 
 repost:
-	ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL);
-	if (ret) {
-		pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
-		       ret);
-		pr_err("svcrdma: closing transport %p.\n", rdma_xprt);
-		set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags);
-		ret = -ENOTCONN;
-	}
-	return ret;
+	return svc_rdma_repost_recv(rdma_xprt, GFP_KERNEL);
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 0f8798d4ed88..ace9efa7aa6c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -475,13 +475,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	int pages;
 	int ret;
 
-	/* Post a recv buffer to handle another request. */
-	ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
+	ret = svc_rdma_repost_recv(rdma, GFP_KERNEL);
 	if (ret) {
-		printk(KERN_INFO
-		       "svcrdma: could not post a receive buffer, err=%d."
-		       "Closing transport %p.\n", ret, rdma);
-		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
 		svc_rdma_put_context(ctxt, 0);
 		return -ENOTCONN;
 	}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 5763825d09bf..03fdfce45493 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -722,6 +722,21 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
 	return -ENOMEM;
 }
 
+int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
+{
+	int ret = 0;
+
+	ret = svc_rdma_post_recv(xprt, flags);
+	if (ret) {
+		pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
+		       ret);
+		pr_err("svcrdma: closing transport %p.\n", xprt);
+		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+		ret = -ENOTCONN;
+	}
+	return ret;
+}
+
 /*
  * This function handles the CONNECT_REQUEST event on a listening
  * endpoint. It is passed the cma_id for the _new_ connection. The context in
-- 
cgit v1.2.3


From c6db03ea577846a72dc80638f4a70b392c21962f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 1 Mar 2016 13:06:29 -0500
Subject: rpcrdma: Add RPCRDMA_HDRLEN_ERR

Error headers are shorter than either RDMA_MSG or RDMA_NOMSG.

Since HDRLEN_MIN is already used in several other places that would
be annoying to change, add RPCRDMA_HDRLEN_ERR for the one or two
spots where the shorter length is needed.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/rpc_rdma.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
index f33c5a4d6fe4..8c6d23cb0cae 100644
--- a/include/linux/sunrpc/rpc_rdma.h
+++ b/include/linux/sunrpc/rpc_rdma.h
@@ -102,6 +102,7 @@ struct rpcrdma_msg {
  * Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks
  */
 #define RPCRDMA_HDRLEN_MIN	(sizeof(__be32) * 7)
+#define RPCRDMA_HDRLEN_ERR	(sizeof(__be32) * 5)
 
 enum rpcrdma_errcode {
 	ERR_VERS = 1,
-- 
cgit v1.2.3


From a6081b82c533d78041acb76738716aa7dafb339a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 1 Mar 2016 13:06:38 -0500
Subject: svcrdma: Make RDMA_ERROR messages work

Fix several issues with svc_rdma_send_error():

 - Post a receive buffer to replace the one that was consumed by
   the incoming request
 - Posting a send should use DMA_TO_DEVICE, not DMA_FROM_DEVICE
 - No need to put_page _and_ free pages in svc_rdma_put_context
 - Make sure the sge is set up completely in case the error
   path goes through svc_rdma_unmap_dma()
 - Replace the use of ENOSYS, which has a reserved meaning

Related fixes in svc_rdma_recvfrom():

 - Don't leak the ctxt associated with the incoming request
 - Don't close the connection after sending an error reply
 - Let svc_rdma_send_error() figure out the right header error code

As a last clean up, move svc_rdma_send_error() to svc_rdma_sendto.c
with other similar functions. There is some common logic in these
functions that could someday be combined to reduce code duplication.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Devesh Sharma <devesh.sharma@broadcom.com>
Tested-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  4 +--
 net/sunrpc/xprtrdma/svc_rdma_marshal.c   |  2 +-
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 19 +++++-----
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 62 ++++++++++++++++++++++++++++++++
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 54 ----------------------------
 5 files changed, 74 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index aef47dd2bd1a..42e852230a03 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -228,11 +228,11 @@ extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
 extern int svc_rdma_sendto(struct svc_rqst *);
 extern struct rpcrdma_read_chunk *
 	svc_rdma_get_read_chunk(struct rpcrdma_msg *);
+extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
+				int);
 
 /* svc_rdma_transport.c */
 extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
-extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
-				enum rpcrdma_errcode);
 extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t);
 extern int svc_rdma_repost_recv(struct svcxprt_rdma *, gfp_t);
 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index e2fca7617242..f74fc523d95b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -162,7 +162,7 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
 	}
 
 	if (rmsgp->rm_vers != rpcrdma_version)
-		return -ENOSYS;
+		return -EPROTONOSUPPORT;
 
 	/* Pull in the extra for the padded case and bump our pointer */
 	if (rmsgp->rm_type == rdma_msgp) {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index acf15b8bca70..0f09052110a7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -612,7 +612,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	struct svc_rdma_op_ctxt *ctxt = NULL;
 	struct rpcrdma_msg *rmsgp;
 	int ret = 0;
-	int len;
 
 	dprintk("svcrdma: rqstp=%p\n", rqstp);
 
@@ -654,15 +653,10 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
 
 	/* Decode the RDMA header. */
-	len = svc_rdma_xdr_decode_req(&rmsgp, rqstp);
-	rqstp->rq_xprt_hlen = len;
-
-	/* If the request is invalid, reply with an error */
-	if (len < 0) {
-		if (len == -ENOSYS)
-			svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
-		goto close_out;
-	}
+	ret = svc_rdma_xdr_decode_req(&rmsgp, rqstp);
+	if (ret < 0)
+		goto out_err;
+	rqstp->rq_xprt_hlen = ret;
 
 	if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
 		ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
@@ -698,6 +692,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	svc_xprt_copy_addrs(rqstp, xprt);
 	return ret;
 
+out_err:
+	svc_rdma_send_error(rdma_xprt, rmsgp, ret);
+	svc_rdma_put_context(ctxt, 0);
+	return 0;
+
  close_out:
 	if (ctxt)
 		svc_rdma_put_context(ctxt, 1);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index ace9efa7aa6c..a26ca569f257 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -652,3 +652,65 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
 	return -ENOTCONN;
 }
+
+void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
+			 int status)
+{
+	struct ib_send_wr err_wr;
+	struct page *p;
+	struct svc_rdma_op_ctxt *ctxt;
+	enum rpcrdma_errcode err;
+	__be32 *va;
+	int length;
+	int ret;
+
+	ret = svc_rdma_repost_recv(xprt, GFP_KERNEL);
+	if (ret)
+		return;
+
+	p = alloc_page(GFP_KERNEL);
+	if (!p)
+		return;
+	va = page_address(p);
+
+	/* XDR encode an error reply */
+	err = ERR_CHUNK;
+	if (status == -EPROTONOSUPPORT)
+		err = ERR_VERS;
+	length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
+
+	ctxt = svc_rdma_get_context(xprt);
+	ctxt->direction = DMA_TO_DEVICE;
+	ctxt->count = 1;
+	ctxt->pages[0] = p;
+
+	/* Prepare SGE for local address */
+	ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
+	ctxt->sge[0].length = length;
+	ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
+					    p, 0, length, DMA_TO_DEVICE);
+	if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
+		dprintk("svcrdma: Error mapping buffer for protocol error\n");
+		svc_rdma_put_context(ctxt, 1);
+		return;
+	}
+	atomic_inc(&xprt->sc_dma_used);
+
+	/* Prepare SEND WR */
+	memset(&err_wr, 0, sizeof(err_wr));
+	ctxt->wr_op = IB_WR_SEND;
+	err_wr.wr_id = (unsigned long)ctxt;
+	err_wr.sg_list = ctxt->sge;
+	err_wr.num_sge = 1;
+	err_wr.opcode = IB_WR_SEND;
+	err_wr.send_flags = IB_SEND_SIGNALED;
+
+	/* Post It */
+	ret = svc_rdma_send(xprt, &err_wr);
+	if (ret) {
+		dprintk("svcrdma: Error %d posting send for protocol error\n",
+			ret);
+		svc_rdma_unmap_dma(ctxt);
+		svc_rdma_put_context(ctxt, 1);
+	}
+}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 03fdfce45493..15c8fa3ee794 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1433,57 +1433,3 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 	}
 	return ret;
 }
-
-void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
-			 enum rpcrdma_errcode err)
-{
-	struct ib_send_wr err_wr;
-	struct page *p;
-	struct svc_rdma_op_ctxt *ctxt;
-	__be32 *va;
-	int length;
-	int ret;
-
-	p = alloc_page(GFP_KERNEL);
-	if (!p)
-		return;
-	va = page_address(p);
-
-	/* XDR encode error */
-	length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
-
-	ctxt = svc_rdma_get_context(xprt);
-	ctxt->direction = DMA_FROM_DEVICE;
-	ctxt->count = 1;
-	ctxt->pages[0] = p;
-
-	/* Prepare SGE for local address */
-	ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
-					    p, 0, length, DMA_FROM_DEVICE);
-	if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
-		put_page(p);
-		svc_rdma_put_context(ctxt, 1);
-		return;
-	}
-	atomic_inc(&xprt->sc_dma_used);
-	ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
-	ctxt->sge[0].length = length;
-
-	/* Prepare SEND WR */
-	memset(&err_wr, 0, sizeof err_wr);
-	ctxt->wr_op = IB_WR_SEND;
-	err_wr.wr_id = (unsigned long)ctxt;
-	err_wr.sg_list = ctxt->sge;
-	err_wr.num_sge = 1;
-	err_wr.opcode = IB_WR_SEND;
-	err_wr.send_flags = IB_SEND_SIGNALED;
-
-	/* Post It */
-	ret = svc_rdma_send(xprt, &err_wr);
-	if (ret) {
-		dprintk("svcrdma: Error %d posting send for protocol error\n",
-			ret);
-		svc_rdma_unmap_dma(ctxt);
-		svc_rdma_put_context(ctxt, 1);
-	}
-}
-- 
cgit v1.2.3


From f3ea53fb3bc3908b6e9ef39e53a75b55df7f78f8 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 1 Mar 2016 13:06:47 -0500
Subject: svcrdma: Use correct XID in error replies

When constructing an error reply, svc_rdma_xdr_encode_error()
needs to view the client's request message so it can get the
failing request's XID.

svc_rdma_xdr_decode_req() is supposed to return a pointer to the
client's request header. But if it fails to decode the client's
message (and thus an error reply is needed) it does not return the
pointer. The server then sends a bogus XID in the error reply.

Instead, unconditionally generate the pointer to the client's header
in svc_rdma_recvfrom(), and pass that pointer to both functions.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Devesh Sharma <devesh.sharma@broadcom.com>
Tested-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h         | 2 +-
 net/sunrpc/xprtrdma/svc_rdma_marshal.c  | 7 +------
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 3 ++-
 3 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 42e852230a03..c2b0d95602d8 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -199,7 +199,7 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
 				    struct xdr_buf *rcvbuf);
 
 /* svc_rdma_marshal.c */
-extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
+extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg *, struct svc_rqst *);
 extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
 				     struct rpcrdma_msg *,
 				     enum rpcrdma_errcode, __be32 *);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index f74fc523d95b..b9ce01f6af90 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -145,15 +145,11 @@ static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
 	return (__be32 *)&ary->wc_array[nchunks];
 }
 
-int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
-			    struct svc_rqst *rqstp)
+int svc_rdma_xdr_decode_req(struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp)
 {
-	struct rpcrdma_msg *rmsgp = NULL;
 	__be32 *va, *vaend;
 	u32 hdr_len;
 
-	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
-
 	/* Verify that there's enough bytes for header + something */
 	if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) {
 		dprintk("svcrdma: header too short = %d\n",
@@ -201,7 +197,6 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
 	hdr_len = (unsigned long)va - (unsigned long)rmsgp;
 	rqstp->rq_arg.head[0].iov_len -= hdr_len;
 
-	*rdma_req = rmsgp;
 	return hdr_len;
 }
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 0f09052110a7..8f68cb6d89fe 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -653,7 +653,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
 
 	/* Decode the RDMA header. */
-	ret = svc_rdma_xdr_decode_req(&rmsgp, rqstp);
+	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
+	ret = svc_rdma_xdr_decode_req(rmsgp, rqstp);
 	if (ret < 0)
 		goto out_err;
 	rqstp->rq_xprt_hlen = ret;
-- 
cgit v1.2.3


From 8bd5ba86d9ba7169e137fc4f32c553080c056a02 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 1 Mar 2016 13:07:13 -0500
Subject: svcrdma: Use new CQ API for RPC-over-RDMA server receive CQs

Calling ib_poll_cq() to sort through WCs during a completion is a
common pattern amongst RDMA consumers. Since commit 14d3a3b2498e
("IB: add a proper completion queue abstraction"), WC sorting can
be handled by the IB core.

By converting to this new API, svcrdma is made a better neighbor to
other RDMA consumers, as it allows the core to schedule the delivery
of completions more fairly amongst all active consumers.

Because each ib_cqe carries a pointer to a completion method, the
core can now post operations on a consumer's QP, and handle the
completions itself.

svcrdma receive completions no longer use the dto_tasklet. Each
polled Receive WC is now handled individually in soft IRQ context.

The server transport's rdma_stat_rq_poll and rdma_stat_rq_prod
metrics are no longer updated.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |   2 +-
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 129 ++++++++++---------------------
 2 files changed, 40 insertions(+), 91 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index c2b0d95602d8..cf79ab86d3d4 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -75,6 +75,7 @@ struct svc_rdma_op_ctxt {
 	struct svc_rdma_fastreg_mr *frmr;
 	int hdr_count;
 	struct xdr_buf arg;
+	struct ib_cqe cqe;
 	struct list_head dto_q;
 	enum ib_wr_opcode wr_op;
 	enum ib_wc_status wc_status;
@@ -174,7 +175,6 @@ struct svcxprt_rdma {
 	struct work_struct   sc_work;
 };
 /* sc_flags */
-#define RDMAXPRT_RQ_PENDING	1
 #define RDMAXPRT_SQ_PENDING	2
 #define RDMAXPRT_CONN_PENDING	3
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 15c8fa3ee794..5dfa1b6bf0c2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -68,7 +68,6 @@ static void svc_rdma_detach(struct svc_xprt *xprt);
 static void svc_rdma_free(struct svc_xprt *xprt);
 static int svc_rdma_has_wspace(struct svc_xprt *xprt);
 static int svc_rdma_secure_port(struct svc_rqst *);
-static void rq_cq_reap(struct svcxprt_rdma *xprt);
 static void sq_cq_reap(struct svcxprt_rdma *xprt);
 
 static DECLARE_TASKLET(dto_tasklet, dto_tasklet_func, 0UL);
@@ -413,7 +412,6 @@ static void dto_tasklet_func(unsigned long data)
 		list_del_init(&xprt->sc_dto_q);
 		spin_unlock_irqrestore(&dto_lock, flags);
 
-		rq_cq_reap(xprt);
 		sq_cq_reap(xprt);
 
 		svc_xprt_put(&xprt->sc_xprt);
@@ -422,93 +420,48 @@ static void dto_tasklet_func(unsigned long data)
 	spin_unlock_irqrestore(&dto_lock, flags);
 }
 
-/*
- * Receive Queue Completion Handler
- *
- * Since an RQ completion handler is called on interrupt context, we
- * need to defer the handling of the I/O to a tasklet
- */
-static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
-{
-	struct svcxprt_rdma *xprt = cq_context;
-	unsigned long flags;
-
-	/* Guard against unconditional flush call for destroyed QP */
-	if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
-		return;
-
-	/*
-	 * Set the bit regardless of whether or not it's on the list
-	 * because it may be on the list already due to an SQ
-	 * completion.
-	 */
-	set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags);
-
-	/*
-	 * If this transport is not already on the DTO transport queue,
-	 * add it
-	 */
-	spin_lock_irqsave(&dto_lock, flags);
-	if (list_empty(&xprt->sc_dto_q)) {
-		svc_xprt_get(&xprt->sc_xprt);
-		list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
-	}
-	spin_unlock_irqrestore(&dto_lock, flags);
-
-	/* Tasklet does all the work to avoid irqsave locks. */
-	tasklet_schedule(&dto_tasklet);
-}
-
-/*
- * rq_cq_reap - Process the RQ CQ.
+/**
+ * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
+ * @cq:        completion queue
+ * @wc:        completed WR
  *
- * Take all completing WC off the CQE and enqueue the associated DTO
- * context on the dto_q for the transport.
- *
- * Note that caller must hold a transport reference.
  */
-static void rq_cq_reap(struct svcxprt_rdma *xprt)
+static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 {
-	int ret;
-	struct ib_wc wc;
-	struct svc_rdma_op_ctxt *ctxt = NULL;
-
-	if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags))
-		return;
+	struct svcxprt_rdma *xprt = cq->cq_context;
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct svc_rdma_op_ctxt *ctxt;
 
-	ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP);
-	atomic_inc(&rdma_stat_rq_poll);
+	/* WARNING: Only wc->wr_cqe and wc->status are reliable */
+	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
+	ctxt->wc_status = wc->status;
+	svc_rdma_unmap_dma(ctxt);
 
-	while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) {
-		ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
-		ctxt->wc_status = wc.status;
-		ctxt->byte_len = wc.byte_len;
-		svc_rdma_unmap_dma(ctxt);
-		if (wc.status != IB_WC_SUCCESS) {
-			/* Close the transport */
-			dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt);
-			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-			svc_rdma_put_context(ctxt, 1);
-			svc_xprt_put(&xprt->sc_xprt);
-			continue;
-		}
-		spin_lock_bh(&xprt->sc_rq_dto_lock);
-		list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
-		spin_unlock_bh(&xprt->sc_rq_dto_lock);
-		svc_xprt_put(&xprt->sc_xprt);
-	}
+	if (wc->status != IB_WC_SUCCESS)
+		goto flushed;
 
-	if (ctxt)
-		atomic_inc(&rdma_stat_rq_prod);
+	/* All wc fields are now known to be valid */
+	ctxt->byte_len = wc->byte_len;
+	spin_lock(&xprt->sc_rq_dto_lock);
+	list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
+	spin_unlock(&xprt->sc_rq_dto_lock);
 
 	set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
-	/*
-	 * If data arrived before established event,
-	 * don't enqueue. This defers RPC I/O until the
-	 * RDMA connection is complete.
-	 */
-	if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
-		svc_xprt_enqueue(&xprt->sc_xprt);
+	if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
+		goto out;
+	svc_xprt_enqueue(&xprt->sc_xprt);
+	goto out;
+
+flushed:
+	if (wc->status != IB_WC_WR_FLUSH_ERR)
+		pr_warn("svcrdma: receive: %s (%u/0x%x)\n",
+			ib_wc_status_msg(wc->status),
+			wc->status, wc->vendor_err);
+	set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+	svc_rdma_put_context(ctxt, 1);
+
+out:
+	svc_xprt_put(&xprt->sc_xprt);
 }
 
 /*
@@ -681,6 +634,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
 	ctxt = svc_rdma_get_context(xprt);
 	buflen = 0;
 	ctxt->direction = DMA_FROM_DEVICE;
+	ctxt->cqe.done = svc_rdma_wc_receive;
 	for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
 		if (sge_no >= xprt->sc_max_sge) {
 			pr_err("svcrdma: Too many sges (%d)\n", sge_no);
@@ -705,7 +659,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
 	recv_wr.next = NULL;
 	recv_wr.sg_list = &ctxt->sge[0];
 	recv_wr.num_sge = ctxt->count;
-	recv_wr.wr_id = (u64)(unsigned long)ctxt;
+	recv_wr.wr_cqe = &ctxt->cqe;
 
 	svc_xprt_get(&xprt->sc_xprt);
 	ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
@@ -1094,12 +1048,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 		dprintk("svcrdma: error creating SQ CQ for connect request\n");
 		goto errout;
 	}
-	cq_attr.cqe = newxprt->sc_rq_depth;
-	newxprt->sc_rq_cq = ib_create_cq(dev,
-					 rq_comp_handler,
-					 cq_event_handler,
-					 newxprt,
-					 &cq_attr);
+	newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth,
+					0, IB_POLL_SOFTIRQ);
 	if (IS_ERR(newxprt->sc_rq_cq)) {
 		dprintk("svcrdma: error creating RQ CQ for connect request\n");
 		goto errout;
@@ -1193,7 +1143,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	 * miss the first message
 	 */
 	ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
-	ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
 
 	/* Accept Connection */
 	set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
@@ -1337,7 +1286,7 @@ static void __svc_rdma_free(struct work_struct *work)
 		ib_destroy_cq(rdma->sc_sq_cq);
 
 	if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
-		ib_destroy_cq(rdma->sc_rq_cq);
+		ib_free_cq(rdma->sc_rq_cq);
 
 	if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
 		ib_dealloc_pd(rdma->sc_pd);
-- 
cgit v1.2.3


From be99bb11400ce02552c35a6d3bf054de393ce30e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 1 Mar 2016 13:07:22 -0500
Subject: svcrdma: Use new CQ API for RPC-over-RDMA server send CQs

Calling ib_poll_cq() to sort through WCs during a completion is a
common pattern amongst RDMA consumers. Since commit 14d3a3b2498e
("IB: add a proper completion queue abstraction"), WC sorting can
be handled by the IB core.

By converting to this new API, svcrdma is made a better neighbor to
other RDMA consumers, as it allows the core to schedule the delivery
of completions more fairly amongst all active consumers.

This new API also aims each completion at a function that is
specific to the WR's opcode. Thus the ctxt->wr_op field and the
switch in process_context is replaced by a set of methods that
handle each completion type.

Because each ib_cqe carries a pointer to a completion method, the
core can now post operations on a consumer's QP, and handle the
completions itself.

The server's rdma_stat_sq_poll and rdma_stat_sq_prod metrics are no
longer updated.

As a clean up, the cq_event_handler, the dto_tasklet, and all
associated locking is removed, as they are no longer referenced or
used.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h            |   9 +-
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c |   4 +-
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c    |  14 +-
 net/sunrpc/xprtrdma/svc_rdma_sendto.c      |  12 +-
 net/sunrpc/xprtrdma/svc_rdma_transport.c   | 259 +++++++++++------------------
 5 files changed, 121 insertions(+), 177 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index cf79ab86d3d4..3081339968c3 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -76,8 +76,9 @@ struct svc_rdma_op_ctxt {
 	int hdr_count;
 	struct xdr_buf arg;
 	struct ib_cqe cqe;
+	struct ib_cqe reg_cqe;
+	struct ib_cqe inv_cqe;
 	struct list_head dto_q;
-	enum ib_wr_opcode wr_op;
 	enum ib_wc_status wc_status;
 	u32 byte_len;
 	u32 position;
@@ -175,7 +176,6 @@ struct svcxprt_rdma {
 	struct work_struct   sc_work;
 };
 /* sc_flags */
-#define RDMAXPRT_SQ_PENDING	2
 #define RDMAXPRT_CONN_PENDING	3
 
 #define RPCRDMA_LISTEN_BACKLOG  10
@@ -232,6 +232,11 @@ extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
 				int);
 
 /* svc_rdma_transport.c */
+extern void svc_rdma_wc_send(struct ib_cq *, struct ib_wc *);
+extern void svc_rdma_wc_write(struct ib_cq *, struct ib_wc *);
+extern void svc_rdma_wc_reg(struct ib_cq *, struct ib_wc *);
+extern void svc_rdma_wc_read(struct ib_cq *, struct ib_wc *);
+extern void svc_rdma_wc_inv(struct ib_cq *, struct ib_wc *);
 extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
 extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t);
 extern int svc_rdma_repost_recv(struct svcxprt_rdma *, gfp_t);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 254be8661981..a2a7519b0f23 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -119,7 +119,6 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
 	ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
 	ctxt->count = 1;
 
-	ctxt->wr_op = IB_WR_SEND;
 	ctxt->direction = DMA_TO_DEVICE;
 	ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
 	ctxt->sge[0].length = sndbuf->len;
@@ -133,7 +132,8 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
 	atomic_inc(&rdma->sc_dma_used);
 
 	memset(&send_wr, 0, sizeof(send_wr));
-	send_wr.wr_id = (unsigned long)ctxt;
+	ctxt->cqe.done = svc_rdma_wc_send;
+	send_wr.wr_cqe = &ctxt->cqe;
 	send_wr.sg_list = ctxt->sge;
 	send_wr.num_sge = 1;
 	send_wr.opcode = IB_WR_SEND;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index d3718e94c169..3b24a646eb46 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -180,9 +180,9 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
 		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
 
 	memset(&read_wr, 0, sizeof(read_wr));
-	read_wr.wr.wr_id = (unsigned long)ctxt;
+	ctxt->cqe.done = svc_rdma_wc_read;
+	read_wr.wr.wr_cqe = &ctxt->cqe;
 	read_wr.wr.opcode = IB_WR_RDMA_READ;
-	ctxt->wr_op = read_wr.wr.opcode;
 	read_wr.wr.send_flags = IB_SEND_SIGNALED;
 	read_wr.rkey = rs_handle;
 	read_wr.remote_addr = rs_offset;
@@ -299,8 +299,9 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 	ctxt->read_hdr = head;
 
 	/* Prepare REG WR */
+	ctxt->reg_cqe.done = svc_rdma_wc_reg;
+	reg_wr.wr.wr_cqe = &ctxt->reg_cqe;
 	reg_wr.wr.opcode = IB_WR_REG_MR;
-	reg_wr.wr.wr_id = 0;
 	reg_wr.wr.send_flags = IB_SEND_SIGNALED;
 	reg_wr.wr.num_sge = 0;
 	reg_wr.mr = frmr->mr;
@@ -310,6 +311,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 
 	/* Prepare RDMA_READ */
 	memset(&read_wr, 0, sizeof(read_wr));
+	ctxt->cqe.done = svc_rdma_wc_read;
+	read_wr.wr.wr_cqe = &ctxt->cqe;
 	read_wr.wr.send_flags = IB_SEND_SIGNALED;
 	read_wr.rkey = rs_handle;
 	read_wr.remote_addr = rs_offset;
@@ -317,19 +320,18 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 	read_wr.wr.num_sge = 1;
 	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
 		read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
-		read_wr.wr.wr_id = (unsigned long)ctxt;
 		read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
 	} else {
 		read_wr.wr.opcode = IB_WR_RDMA_READ;
 		read_wr.wr.next = &inv_wr;
 		/* Prepare invalidate */
 		memset(&inv_wr, 0, sizeof(inv_wr));
-		inv_wr.wr_id = (unsigned long)ctxt;
+		ctxt->inv_cqe.done = svc_rdma_wc_inv;
+		inv_wr.wr_cqe = &ctxt->inv_cqe;
 		inv_wr.opcode = IB_WR_LOCAL_INV;
 		inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
 		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
 	}
-	ctxt->wr_op = read_wr.wr.opcode;
 
 	/* Post the chain */
 	ret = svc_rdma_send(xprt, &reg_wr.wr);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index a26ca569f257..4f1b1c4f45f9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -297,8 +297,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 
 	/* Prepare WRITE WR */
 	memset(&write_wr, 0, sizeof write_wr);
-	ctxt->wr_op = IB_WR_RDMA_WRITE;
-	write_wr.wr.wr_id = (unsigned long)ctxt;
+	ctxt->cqe.done = svc_rdma_wc_write;
+	write_wr.wr.wr_cqe = &ctxt->cqe;
 	write_wr.wr.sg_list = &sge[0];
 	write_wr.wr.num_sge = sge_no;
 	write_wr.wr.opcode = IB_WR_RDMA_WRITE;
@@ -549,8 +549,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		goto err;
 	}
 	memset(&send_wr, 0, sizeof send_wr);
-	ctxt->wr_op = IB_WR_SEND;
-	send_wr.wr_id = (unsigned long)ctxt;
+	ctxt->cqe.done = svc_rdma_wc_send;
+	send_wr.wr_cqe = &ctxt->cqe;
 	send_wr.sg_list = ctxt->sge;
 	send_wr.num_sge = sge_no;
 	send_wr.opcode = IB_WR_SEND;
@@ -698,8 +698,8 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 
 	/* Prepare SEND WR */
 	memset(&err_wr, 0, sizeof(err_wr));
-	ctxt->wr_op = IB_WR_SEND;
-	err_wr.wr_id = (unsigned long)ctxt;
+	ctxt->cqe.done = svc_rdma_wc_send;
+	err_wr.wr_cqe = &ctxt->cqe;
 	err_wr.sg_list = ctxt->sge;
 	err_wr.num_sge = 1;
 	err_wr.opcode = IB_WR_SEND;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 5dfa1b6bf0c2..90668969d559 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -63,16 +63,10 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 					int flags);
 static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
 static void svc_rdma_release_rqst(struct svc_rqst *);
-static void dto_tasklet_func(unsigned long data);
 static void svc_rdma_detach(struct svc_xprt *xprt);
 static void svc_rdma_free(struct svc_xprt *xprt);
 static int svc_rdma_has_wspace(struct svc_xprt *xprt);
 static int svc_rdma_secure_port(struct svc_rqst *);
-static void sq_cq_reap(struct svcxprt_rdma *xprt);
-
-static DECLARE_TASKLET(dto_tasklet, dto_tasklet_func, 0UL);
-static DEFINE_SPINLOCK(dto_lock);
-static LIST_HEAD(dto_xprt_q);
 
 static struct svc_xprt_ops svc_rdma_ops = {
 	.xpo_create = svc_rdma_create,
@@ -351,15 +345,6 @@ static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
 	}
 }
 
-/* ib_cq event handler */
-static void cq_event_handler(struct ib_event *event, void *context)
-{
-	struct svc_xprt *xprt = context;
-	dprintk("svcrdma: received CQ event %s (%d), context=%p\n",
-		ib_event_msg(event->event), event->event, context);
-	set_bit(XPT_CLOSE, &xprt->xpt_flags);
-}
-
 /* QP event handler */
 static void qp_event_handler(struct ib_event *event, void *context)
 {
@@ -391,35 +376,6 @@ static void qp_event_handler(struct ib_event *event, void *context)
 	}
 }
 
-/*
- * Data Transfer Operation Tasklet
- *
- * Walks a list of transports with I/O pending, removing entries as
- * they are added to the server's I/O pending list. Two bits indicate
- * if SQ, RQ, or both have I/O pending. The dto_lock is an irqsave
- * spinlock that serializes access to the transport list with the RQ
- * and SQ interrupt handlers.
- */
-static void dto_tasklet_func(unsigned long data)
-{
-	struct svcxprt_rdma *xprt;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dto_lock, flags);
-	while (!list_empty(&dto_xprt_q)) {
-		xprt = list_entry(dto_xprt_q.next,
-				  struct svcxprt_rdma, sc_dto_q);
-		list_del_init(&xprt->sc_dto_q);
-		spin_unlock_irqrestore(&dto_lock, flags);
-
-		sq_cq_reap(xprt);
-
-		svc_xprt_put(&xprt->sc_xprt);
-		spin_lock_irqsave(&dto_lock, flags);
-	}
-	spin_unlock_irqrestore(&dto_lock, flags);
-}
-
 /**
  * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
  * @cq:        completion queue
@@ -464,132 +420,127 @@ out:
 	svc_xprt_put(&xprt->sc_xprt);
 }
 
-/*
- * Process a completion context
- */
-static void process_context(struct svcxprt_rdma *xprt,
-			    struct svc_rdma_op_ctxt *ctxt)
+static void svc_rdma_send_wc_common(struct svcxprt_rdma *xprt,
+				    struct ib_wc *wc,
+				    const char *opname)
 {
-	struct svc_rdma_op_ctxt *read_hdr;
-	int free_pages = 0;
-
-	svc_rdma_unmap_dma(ctxt);
-
-	switch (ctxt->wr_op) {
-	case IB_WR_SEND:
-		free_pages = 1;
-		break;
+	if (wc->status != IB_WC_SUCCESS)
+		goto err;
 
-	case IB_WR_RDMA_WRITE:
-		break;
+out:
+	atomic_dec(&xprt->sc_sq_count);
+	wake_up(&xprt->sc_send_wait);
+	return;
 
-	case IB_WR_RDMA_READ:
-	case IB_WR_RDMA_READ_WITH_INV:
-		svc_rdma_put_frmr(xprt, ctxt->frmr);
+err:
+	set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+	if (wc->status != IB_WC_WR_FLUSH_ERR)
+		pr_err("svcrdma: %s: %s (%u/0x%x)\n",
+		       opname, ib_wc_status_msg(wc->status),
+		       wc->status, wc->vendor_err);
+	goto out;
+}
 
-		if (!test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags))
-			break;
+static void svc_rdma_send_wc_common_put(struct ib_cq *cq, struct ib_wc *wc,
+					const char *opname)
+{
+	struct svcxprt_rdma *xprt = cq->cq_context;
 
-		read_hdr = ctxt->read_hdr;
-		svc_rdma_put_context(ctxt, 0);
+	svc_rdma_send_wc_common(xprt, wc, opname);
+	svc_xprt_put(&xprt->sc_xprt);
+}
 
-		spin_lock_bh(&xprt->sc_rq_dto_lock);
-		set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
-		list_add_tail(&read_hdr->dto_q,
-			      &xprt->sc_read_complete_q);
-		spin_unlock_bh(&xprt->sc_rq_dto_lock);
-		svc_xprt_enqueue(&xprt->sc_xprt);
-		return;
+/**
+ * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
+ * @cq:        completion queue
+ * @wc:        completed WR
+ *
+ */
+void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct svc_rdma_op_ctxt *ctxt;
 
-	default:
-		dprintk("svcrdma: unexpected completion opcode=%d\n",
-			ctxt->wr_op);
-		break;
-	}
+	svc_rdma_send_wc_common_put(cq, wc, "send");
 
-	svc_rdma_put_context(ctxt, free_pages);
+	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 1);
 }
 
-/*
- * Send Queue Completion Handler - potentially called on interrupt context.
+/**
+ * svc_rdma_wc_write - Invoked by RDMA provider for each polled Write WC
+ * @cq:        completion queue
+ * @wc:        completed WR
  *
- * Note that caller must hold a transport reference.
  */
-static void sq_cq_reap(struct svcxprt_rdma *xprt)
+void svc_rdma_wc_write(struct ib_cq *cq, struct ib_wc *wc)
 {
-	struct svc_rdma_op_ctxt *ctxt = NULL;
-	struct ib_wc wc_a[6];
-	struct ib_wc *wc;
-	struct ib_cq *cq = xprt->sc_sq_cq;
-	int ret;
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct svc_rdma_op_ctxt *ctxt;
 
-	memset(wc_a, 0, sizeof(wc_a));
+	svc_rdma_send_wc_common_put(cq, wc, "write");
 
-	if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
-		return;
+	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 0);
+}
 
-	ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
-	atomic_inc(&rdma_stat_sq_poll);
-	while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
-		int i;
+/**
+ * svc_rdma_wc_reg - Invoked by RDMA provider for each polled FASTREG WC
+ * @cq:        completion queue
+ * @wc:        completed WR
+ *
+ */
+void svc_rdma_wc_reg(struct ib_cq *cq, struct ib_wc *wc)
+{
+	svc_rdma_send_wc_common_put(cq, wc, "fastreg");
+}
 
-		for (i = 0; i < ret; i++) {
-			wc = &wc_a[i];
-			if (wc->status != IB_WC_SUCCESS) {
-				dprintk("svcrdma: sq wc err status %s (%d)\n",
-					ib_wc_status_msg(wc->status),
-					wc->status);
+/**
+ * svc_rdma_wc_read - Invoked by RDMA provider for each polled Read WC
+ * @cq:        completion queue
+ * @wc:        completed WR
+ *
+ */
+void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct svcxprt_rdma *xprt = cq->cq_context;
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct svc_rdma_op_ctxt *ctxt;
 
-				/* Close the transport */
-				set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-			}
+	svc_rdma_send_wc_common(xprt, wc, "read");
 
-			/* Decrement used SQ WR count */
-			atomic_dec(&xprt->sc_sq_count);
-			wake_up(&xprt->sc_send_wait);
+	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_frmr(xprt, ctxt->frmr);
 
-			ctxt = (struct svc_rdma_op_ctxt *)
-				(unsigned long)wc->wr_id;
-			if (ctxt)
-				process_context(xprt, ctxt);
+	if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
+		struct svc_rdma_op_ctxt *read_hdr;
 
-			svc_xprt_put(&xprt->sc_xprt);
-		}
+		read_hdr = ctxt->read_hdr;
+		spin_lock(&xprt->sc_rq_dto_lock);
+		list_add_tail(&read_hdr->dto_q,
+			      &xprt->sc_read_complete_q);
+		spin_unlock(&xprt->sc_rq_dto_lock);
+
+		set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+		svc_xprt_enqueue(&xprt->sc_xprt);
 	}
 
-	if (ctxt)
-		atomic_inc(&rdma_stat_sq_prod);
+	svc_rdma_put_context(ctxt, 0);
+	svc_xprt_put(&xprt->sc_xprt);
 }
 
-static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
+/**
+ * svc_rdma_wc_inv - Invoked by RDMA provider for each polled LOCAL_INV WC
+ * @cq:        completion queue
+ * @wc:        completed WR
+ *
+ */
+void svc_rdma_wc_inv(struct ib_cq *cq, struct ib_wc *wc)
 {
-	struct svcxprt_rdma *xprt = cq_context;
-	unsigned long flags;
-
-	/* Guard against unconditional flush call for destroyed QP */
-	if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
-		return;
-
-	/*
-	 * Set the bit regardless of whether or not it's on the list
-	 * because it may be on the list already due to an RQ
-	 * completion.
-	 */
-	set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags);
-
-	/*
-	 * If this transport is not already on the DTO transport queue,
-	 * add it
-	 */
-	spin_lock_irqsave(&dto_lock, flags);
-	if (list_empty(&xprt->sc_dto_q)) {
-		svc_xprt_get(&xprt->sc_xprt);
-		list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
-	}
-	spin_unlock_irqrestore(&dto_lock, flags);
-
-	/* Tasklet does all the work to avoid irqsave locks. */
-	tasklet_schedule(&dto_tasklet);
+	svc_rdma_send_wc_common_put(cq, wc, "localInv");
 }
 
 static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
@@ -980,7 +931,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	struct svcxprt_rdma *listen_rdma;
 	struct svcxprt_rdma *newxprt = NULL;
 	struct rdma_conn_param conn_param;
-	struct ib_cq_init_attr cq_attr = {};
 	struct ib_qp_init_attr qp_attr;
 	struct ib_device *dev;
 	unsigned int i;
@@ -1038,12 +988,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 		dprintk("svcrdma: error creating PD for connect request\n");
 		goto errout;
 	}
-	cq_attr.cqe = newxprt->sc_sq_depth;
-	newxprt->sc_sq_cq = ib_create_cq(dev,
-					 sq_comp_handler,
-					 cq_event_handler,
-					 newxprt,
-					 &cq_attr);
+	newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
+					0, IB_POLL_SOFTIRQ);
 	if (IS_ERR(newxprt->sc_sq_cq)) {
 		dprintk("svcrdma: error creating SQ CQ for connect request\n");
 		goto errout;
@@ -1138,12 +1084,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	/* Swap out the handler */
 	newxprt->sc_cm_id->event_handler = rdma_cma_handler;
 
-	/*
-	 * Arm the CQs for the SQ and RQ before accepting so we can't
-	 * miss the first message
-	 */
-	ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
-
 	/* Accept Connection */
 	set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
 	memset(&conn_param, 0, sizeof conn_param);
@@ -1283,7 +1223,7 @@ static void __svc_rdma_free(struct work_struct *work)
 		ib_destroy_qp(rdma->sc_qp);
 
 	if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
-		ib_destroy_cq(rdma->sc_sq_cq);
+		ib_free_cq(rdma->sc_sq_cq);
 
 	if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
 		ib_free_cq(rdma->sc_rq_cq);
@@ -1347,9 +1287,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 			spin_unlock_bh(&xprt->sc_lock);
 			atomic_inc(&rdma_stat_sq_starve);
 
-			/* See if we can opportunistically reap SQ WR to make room */
-			sq_cq_reap(xprt);
-
 			/* Wait until SQ WR available if SQ still full */
 			wait_event(xprt->sc_send_wait,
 				   atomic_read(&xprt->sc_sq_count) <
-- 
cgit v1.2.3


From 09d4d087cd4869859fcc5dfc692f0830550a1b48 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 26 Feb 2016 17:32:24 +0100
Subject: mlx4: Implement devlink interface

Implement newly introduced devlink interface. Add devlink port instances
for every port and set the port types accordingly.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
v2->v3:
-add dev param to devlink_register (api change)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c              |  7 ++++
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  8 ++++-
 drivers/net/ethernet/mellanox/mlx4/intf.c      |  9 ++++++
 drivers/net/ethernet/mellanox/mlx4/main.c      | 44 +++++++++++++++++++-------
 drivers/net/ethernet/mellanox/mlx4/mlx4.h      |  2 ++
 include/linux/mlx4/driver.h                    |  3 ++
 6 files changed, 60 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 1c7ab6cabbb8..a15a7b37d386 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -41,6 +41,7 @@
 #include <linux/if_vlan.h>
 #include <net/ipv6.h>
 #include <net/addrconf.h>
+#include <net/devlink.h>
 
 #include <rdma/ib_smi.h>
 #include <rdma/ib_user_verbs.h>
@@ -2519,6 +2520,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	}
 
 	ibdev->ib_active = true;
+	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+		devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),
+					 &ibdev->ib_dev);
 
 	if (mlx4_is_mfunc(ibdev->dev))
 		init_pkeys(ibdev);
@@ -2643,7 +2647,10 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
 {
 	struct mlx4_ib_dev *ibdev = ibdev_ptr;
 	int p;
+	int i;
 
+	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+		devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
 	ibdev->ib_active = false;
 	flush_workqueue(wq);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 96d95cb36c52..e26b110e27da 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -40,6 +40,7 @@
 #include <net/ip.h>
 #include <net/busy_poll.h>
 #include <net/vxlan.h>
+#include <net/devlink.h>
 
 #include <linux/mlx4/driver.h>
 #include <linux/mlx4/device.h>
@@ -2033,8 +2034,11 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 	en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port);
 
 	/* Unregister device - this will close the port if it was up */
-	if (priv->registered)
+	if (priv->registered) {
+		devlink_port_type_clear(mlx4_get_devlink_port(mdev->dev,
+							      priv->port));
 		unregister_netdev(dev);
+	}
 
 	if (priv->allocated)
 		mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE);
@@ -3051,6 +3055,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	}
 
 	priv->registered = 1;
+	devlink_port_type_eth_set(mlx4_get_devlink_port(mdev->dev, priv->port),
+				  dev);
 
 	return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index 0472941af820..dec77d6f0ac9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -34,6 +34,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <linux/errno.h>
+#include <net/devlink.h>
 
 #include "mlx4.h"
 
@@ -249,3 +250,11 @@ void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int
 	return result;
 }
 EXPORT_SYMBOL_GPL(mlx4_get_protocol_dev);
+
+struct devlink_port *mlx4_get_devlink_port(struct mlx4_dev *dev, int port)
+{
+	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+
+	return &info->devlink_port;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_devlink_port);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 2cc3c626c3fe..4f5cfe4989ce 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -42,6 +42,7 @@
 #include <linux/io-mapping.h>
 #include <linux/delay.h>
 #include <linux/kmod.h>
+#include <net/devlink.h>
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/doorbell.h>
@@ -2881,8 +2882,13 @@ no_msi:
 
 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 {
+	struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
-	int err = 0;
+	int err;
+
+	err = devlink_port_register(devlink, &info->devlink_port, port);
+	if (err)
+		return err;
 
 	info->dev = dev;
 	info->port = port;
@@ -2907,6 +2913,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 	err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
 	if (err) {
 		mlx4_err(dev, "Failed to create file for port %d\n", port);
+		devlink_port_unregister(&info->devlink_port);
 		info->port = -1;
 	}
 
@@ -3680,21 +3687,23 @@ err_disable_pdev:
 
 static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
+	struct devlink *devlink;
 	struct mlx4_priv *priv;
 	struct mlx4_dev *dev;
 	int ret;
 
 	printk_once(KERN_INFO "%s", mlx4_version);
 
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
+	devlink = devlink_alloc(NULL, sizeof(*priv));
+	if (!devlink)
 		return -ENOMEM;
+	priv = devlink_priv(devlink);
 
 	dev       = &priv->dev;
 	dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL);
 	if (!dev->persist) {
-		kfree(priv);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto err_devlink_free;
 	}
 	dev->persist->pdev = pdev;
 	dev->persist->dev = dev;
@@ -3703,14 +3712,23 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	mutex_init(&dev->persist->device_state_mutex);
 	mutex_init(&dev->persist->interface_state_mutex);
 
+	ret = devlink_register(devlink, &pdev->dev);
+	if (ret)
+		goto err_persist_free;
+
 	ret =  __mlx4_init_one(pdev, id->driver_data, priv);
-	if (ret) {
-		kfree(dev->persist);
-		kfree(priv);
-	} else {
-		pci_save_state(pdev);
-	}
+	if (ret)
+		goto err_devlink_unregister;
 
+	pci_save_state(pdev);
+	return 0;
+
+err_devlink_unregister:
+	devlink_unregister(devlink);
+err_persist_free:
+	kfree(dev->persist);
+err_devlink_free:
+	devlink_free(devlink);
 	return ret;
 }
 
@@ -3811,6 +3829,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
 	struct mlx4_dev  *dev  = persist->dev;
 	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct devlink *devlink = priv_to_devlink(priv);
 	int active_vfs = 0;
 
 	mutex_lock(&persist->interface_state_mutex);
@@ -3841,8 +3860,9 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
+	devlink_unregister(devlink);
 	kfree(dev->persist);
-	kfree(priv);
+	devlink_free(devlink);
 	pci_set_drvdata(pdev, NULL);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 7baef52db6b7..ef9683101ead 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -45,6 +45,7 @@
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
+#include <net/devlink.h>
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/driver.h>
@@ -828,6 +829,7 @@ struct mlx4_port_info {
 	struct mlx4_roce_gid_table gid_table;
 	int			base_qpn;
 	struct cpu_rmap		*rmap;
+	struct devlink_port	devlink_port;
 };
 
 struct mlx4_sense {
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index 2e8af001c5da..bd0e7075ea6d 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -33,6 +33,7 @@
 #ifndef MLX4_DRIVER_H
 #define MLX4_DRIVER_H
 
+#include <net/devlink.h>
 #include <linux/mlx4/device.h>
 
 struct mlx4_dev;
@@ -89,6 +90,8 @@ int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p);
 
 void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port);
 
+struct devlink_port *mlx4_get_devlink_port(struct mlx4_dev *dev, int port);
+
 static inline u64 mlx4_mac_to_u64(u8 *addr)
 {
 	u64 mac = 0;
-- 
cgit v1.2.3


From 6b6c07bdcdc97ccac2596063bfc32a5faddfe884 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 2 Mar 2016 00:13:39 +0200
Subject: net/mlx5: Make command timeout way shorter

The command timeout is terribly long, whole two hours. Make it 60s so if
things do go wrong, the user gets feedback in relatively short time, so
they can take corrective actions and/or investigate using tools and such.

Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a815da92d4eb..3388a43b78f6 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -54,7 +54,7 @@ enum {
 	/* one minute for the sake of bringup. Generally, commands must always
 	 * complete and we may need to increase this timeout value
 	 */
-	MLX5_CMD_TIMEOUT_MSEC	= 7200 * 1000,
+	MLX5_CMD_TIMEOUT_MSEC	= 60 * 1000,
 	MLX5_CMD_WQ_MAX_NAME	= 32,
 };
 
-- 
cgit v1.2.3


From 0ba422410bbf7081c3c7d7b2dcc10e9eb5cb46f7 Mon Sep 17 00:00:00 2001
From: Moshe Lazer <moshel@mellanox.com>
Date: Wed, 2 Mar 2016 00:13:40 +0200
Subject: net/mlx5: Fix global UAR mapping

Avoid double mapping of io mapped memory, Device page may be
mapped to non-cached(NC) or to write-combining(WC).
The code before this fix tries to map it both to WC and NC
contrary to what stated in Intel's software developer manual.

Here we remove the global WC mapping of all UARS
"dev->priv.bf_mapping", since UAR mapping should be decided
per UAR (e.g we want different mappings for EQs, CQs vs QPs).

Caller will now have to choose whether to map via
write-combining API or not.

mlx5e SQs will choose write-combining in order to perform
BlueFlame writes.

Fixes: 88a85f99e51f ('TX latency optimization to save DMA reads')
Signed-off-by: Moshe Lazer <moshel@mellanox.com>
Reviewed-by: Achiad Shochat <achiad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      | 16 +++++--------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 ++++++----
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c    | 28 +---------------------
 drivers/net/ethernet/mellanox/mlx5/core/uar.c     | 29 ++++++++++++++---------
 include/linux/mlx5/driver.h                       |  5 ++--
 6 files changed, 36 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index b289660568cf..9c0e80e64b43 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -388,6 +388,7 @@ struct mlx5e_sq_dma {
 
 enum {
 	MLX5E_SQ_STATE_WAKE_TXQ_ENABLE,
+	MLX5E_SQ_STATE_BF_ENABLE,
 };
 
 struct mlx5e_sq {
@@ -416,7 +417,6 @@ struct mlx5e_sq {
 	struct mlx5_wq_cyc         wq;
 	u32                        dma_fifo_mask;
 	void __iomem              *uar_map;
-	void __iomem              *uar_bf_map;
 	struct netdev_queue       *txq;
 	u32                        sqn;
 	u16                        bf_buf_size;
@@ -664,16 +664,12 @@ static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq,
 	 * doorbell
 	 */
 	wmb();
-
-	if (bf_sz) {
-		__iowrite64_copy(sq->uar_bf_map + ofst, &wqe->ctrl, bf_sz);
-
-		/* flush the write-combining mapped buffer */
-		wmb();
-
-	} else {
+	if (bf_sz)
+		__iowrite64_copy(sq->uar_map + ofst, &wqe->ctrl, bf_sz);
+	else
 		mlx5_write64((__be32 *)&wqe->ctrl, sq->uar_map + ofst, NULL);
-	}
+	/* flush the write-combining mapped buffer */
+	wmb();
 
 	sq->bf_offset ^= sq->bf_buf_size;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b20a35bd1d4f..5063c0e0f8ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -548,7 +548,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
 	int txq_ix;
 	int err;
 
-	err = mlx5_alloc_map_uar(mdev, &sq->uar);
+	err = mlx5_alloc_map_uar(mdev, &sq->uar, true);
 	if (err)
 		return err;
 
@@ -560,8 +560,12 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
 		goto err_unmap_free_uar;
 
 	sq->wq.db       = &sq->wq.db[MLX5_SND_DBR];
-	sq->uar_map     = sq->uar.map;
-	sq->uar_bf_map  = sq->uar.bf_map;
+	if (sq->uar.bf_map) {
+		set_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state);
+		sq->uar_map = sq->uar.bf_map;
+	} else {
+		sq->uar_map = sq->uar.map;
+	}
 	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
 	sq->max_inline  = param->max_inline;
 
@@ -2418,7 +2422,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
 
 	priv = netdev_priv(netdev);
 
-	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
+	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar, false);
 	if (err) {
 		mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err);
 		goto err_free_netdev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index a05c070cbc2f..c34f4f3e9537 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -303,7 +303,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 	if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) {
 		int bf_sz = 0;
 
-		if (bf && sq->uar_bf_map)
+		if (bf && test_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state))
 			bf_sz = wi->num_wqebbs << 3;
 
 		cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 1545a944c309..8b7133de498e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -767,22 +767,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 	return -ENOTSUPP;
 }
 
-static int map_bf_area(struct mlx5_core_dev *dev)
-{
-	resource_size_t bf_start = pci_resource_start(dev->pdev, 0);
-	resource_size_t bf_len = pci_resource_len(dev->pdev, 0);
-
-	dev->priv.bf_mapping = io_mapping_create_wc(bf_start, bf_len);
-
-	return dev->priv.bf_mapping ? 0 : -ENOMEM;
-}
-
-static void unmap_bf_area(struct mlx5_core_dev *dev)
-{
-	if (dev->priv.bf_mapping)
-		io_mapping_free(dev->priv.bf_mapping);
-}
-
 static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 {
 	struct mlx5_device_context *dev_ctx;
@@ -1103,14 +1087,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 		goto err_stop_eqs;
 	}
 
-	if (map_bf_area(dev))
-		dev_err(&pdev->dev, "Failed to map blue flame area\n");
-
 	err = mlx5_irq_set_affinity_hints(dev);
-	if (err) {
+	if (err)
 		dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
-		goto err_unmap_bf_area;
-	}
 
 	MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
 
@@ -1169,10 +1148,6 @@ err_fs:
 	mlx5_cleanup_qp_table(dev);
 	mlx5_cleanup_cq_table(dev);
 	mlx5_irq_clear_affinity_hints(dev);
-
-err_unmap_bf_area:
-	unmap_bf_area(dev);
-
 	free_comp_eqs(dev);
 
 err_stop_eqs:
@@ -1242,7 +1217,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 	mlx5_cleanup_qp_table(dev);
 	mlx5_cleanup_cq_table(dev);
 	mlx5_irq_clear_affinity_hints(dev);
-	unmap_bf_area(dev);
 	free_comp_eqs(dev);
 	mlx5_stop_eqs(dev);
 	mlx5_free_uuars(dev, &priv->uuari);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index eb05c845ece9..8ba080e441a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -226,7 +226,8 @@ int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari)
 	return 0;
 }
 
-int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar)
+int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar,
+		       bool map_wc)
 {
 	phys_addr_t pfn;
 	phys_addr_t uar_bar_start;
@@ -240,20 +241,26 @@ int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar)
 
 	uar_bar_start = pci_resource_start(mdev->pdev, 0);
 	pfn           = (uar_bar_start >> PAGE_SHIFT) + uar->index;
-	uar->map      = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
-	if (!uar->map) {
-		mlx5_core_warn(mdev, "ioremap() failed, %d\n", err);
-		err = -ENOMEM;
-		goto err_free_uar;
-	}
 
-	if (mdev->priv.bf_mapping)
-		uar->bf_map = io_mapping_map_wc(mdev->priv.bf_mapping,
-						uar->index << PAGE_SHIFT);
+	if (map_wc) {
+		uar->bf_map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
+		if (!uar->bf_map) {
+			mlx5_core_warn(mdev, "ioremap_wc() failed\n");
+			uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+			if (!uar->map)
+				goto err_free_uar;
+		}
+	} else {
+		uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+		if (!uar->map)
+			goto err_free_uar;
+	}
 
 	return 0;
 
 err_free_uar:
+	mlx5_core_warn(mdev, "ioremap() failed\n");
+	err = -ENOMEM;
 	mlx5_cmd_free_uar(mdev, uar->index);
 
 	return err;
@@ -262,8 +269,8 @@ EXPORT_SYMBOL(mlx5_alloc_map_uar);
 
 void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar)
 {
-	io_mapping_unmap(uar->bf_map);
 	iounmap(uar->map);
+	iounmap(uar->bf_map);
 	mlx5_cmd_free_uar(mdev, uar->index);
 }
 EXPORT_SYMBOL(mlx5_unmap_free_uar);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3388a43b78f6..bb1a880a5bc5 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -460,8 +460,6 @@ struct mlx5_priv {
 	struct mlx5_uuar_info	uuari;
 	MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock);
 
-	struct io_mapping	*bf_mapping;
-
 	/* pages stuff */
 	struct workqueue_struct *pg_wq;
 	struct rb_root		page_root;
@@ -719,7 +717,8 @@ int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
 int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
 int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
 int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
-int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar);
+int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar,
+		       bool map_wc);
 void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar);
 void mlx5_health_cleanup(struct mlx5_core_dev *dev);
 int mlx5_health_init(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From 64d4e3431e686dc37ce388ba531c4c4e866fb141 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Sat, 27 Feb 2016 20:19:54 -0800
Subject: net: remove skb_sender_cpu_clear()

After commit 52bd2d62ce67 ("net: better skb->sender_cpu and skb->napi_id cohabitation")
skb_sender_cpu_clear() becomes empty and can be removed.

Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h          | 4 ----
 net/bridge/br_forward.c         | 1 -
 net/core/filter.c               | 2 --
 net/core/skbuff.c               | 1 -
 net/ipv4/ip_forward.c           | 1 -
 net/ipv6/ip6_output.c           | 1 -
 net/netfilter/ipvs/ip_vs_xmit.c | 6 ------
 net/netfilter/nf_dup_netdev.c   | 1 -
 net/sched/act_mirred.c          | 1 -
 9 files changed, 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index eab4f8fbed58..797cefb888fb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1161,10 +1161,6 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
 	to->l4_hash = from->l4_hash;
 };
 
-static inline void skb_sender_cpu_clear(struct sk_buff *skb)
-{
-}
-
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
 {
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index fcdb86dd5a23..f47759f05b6d 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -44,7 +44,6 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb
 
 	skb_push(skb, ETH_HLEN);
 	br_drop_fake_rtable(skb);
-	skb_sender_cpu_clear(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
 	    (skb->protocol == htons(ETH_P_8021Q) ||
diff --git a/net/core/filter.c b/net/core/filter.c
index a3aba15a8025..5e2a3b5e5196 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1597,7 +1597,6 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
 	}
 
 	skb2->dev = dev;
-	skb_sender_cpu_clear(skb2);
 	return dev_queue_xmit(skb2);
 }
 
@@ -1650,7 +1649,6 @@ int skb_do_redirect(struct sk_buff *skb)
 	}
 
 	skb->dev = dev;
-	skb_sender_cpu_clear(skb);
 	return dev_queue_xmit(skb);
 }
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 488566b09c6d..7af7ec635d90 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4302,7 +4302,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	skb->skb_iif = 0;
 	skb->ignore_df = 0;
 	skb_dst_drop(skb);
-	skb_sender_cpu_clear(skb);
 	secpath_reset(skb);
 	nf_reset(skb);
 	nf_reset_trace(skb);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index da0d7ce85844..af18f1e4889e 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -71,7 +71,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
 
-	skb_sender_cpu_clear(skb);
 	return dst_output(net, sk, skb);
 }
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a163102f1803..9428345d3a07 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -332,7 +332,6 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 				     struct sk_buff *skb)
 {
-	skb_sender_cpu_clear(skb);
 	return dst_output(net, sk, skb);
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index a3f5cd9b3c4c..dc196a0f501d 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -531,8 +531,6 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
 	if (ret == NF_ACCEPT) {
 		nf_reset(skb);
 		skb_forward_csum(skb);
-		if (!skb->sk)
-			skb_sender_cpu_clear(skb);
 	}
 	return ret;
 }
@@ -573,8 +571,6 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
 
 	if (!local) {
 		skb_forward_csum(skb);
-		if (!skb->sk)
-			skb_sender_cpu_clear(skb);
 		NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
 			NULL, skb_dst(skb)->dev, dst_output);
 	} else
@@ -595,8 +591,6 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
 	if (!local) {
 		ip_vs_drop_early_demux_sk(skb);
 		skb_forward_csum(skb);
-		if (!skb->sk)
-			skb_sender_cpu_clear(skb);
 		NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
 			NULL, skb_dst(skb)->dev, dst_output);
 	} else
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index 8414ee1a0319..7ec69723940f 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -31,7 +31,6 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
 		skb_push(skb, skb->mac_len);
 
 	skb->dev = dev;
-	skb_sender_cpu_clear(skb);
 	dev_queue_xmit(skb);
 }
 EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6b284d991e0b..e8a760cf7775 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -182,7 +182,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 
 	skb2->skb_iif = skb->dev->ifindex;
 	skb2->dev = dev;
-	skb_sender_cpu_clear(skb2);
 	err = dev_queue_xmit(skb2);
 
 	if (err) {
-- 
cgit v1.2.3


From 4ac801b77e6f06e6b12c069fd29216a4102065fb Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Sun, 28 Feb 2016 12:26:52 +0200
Subject: qed: Semantic refactoring of interrupt code

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c  |   6 +-
 drivers/net/ethernet/qlogic/qed/qed_int.c  | 155 ++++++++++++++++-------------
 drivers/net/ethernet/qlogic/qed/qed_int.h  |   6 +-
 drivers/net/ethernet/qlogic/qed/qed_main.c |  15 +--
 include/linux/qed/qed_if.h                 |   6 ++
 5 files changed, 111 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index acfe7be49a58..d9a5175ebd04 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1011,13 +1011,17 @@ static void qed_hw_get_resc(struct qed_hwfn *p_hwfn)
 {
 	u32 *resc_start = p_hwfn->hw_info.resc_start;
 	u32 *resc_num = p_hwfn->hw_info.resc_num;
+	struct qed_sb_cnt_info sb_cnt_info;
 	int num_funcs, i;
 
 	num_funcs = MAX_NUM_PFS_BB;
 
+	memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
+	qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
+
 	resc_num[QED_SB] = min_t(u32,
 				 (MAX_SB_PER_PATH_BB / num_funcs),
-				 qed_int_get_num_sbs(p_hwfn, NULL));
+				 sb_cnt_info.sb_cnt);
 	resc_num[QED_L2_QUEUE] = MAX_NUM_L2_QUEUES_BB / num_funcs;
 	resc_num[QED_VPORT] = MAX_NUM_VPORTS_BB / num_funcs;
 	resc_num[QED_RSS_ENG] = ETH_RSS_ENGINE_NUM_BB / num_funcs;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index fa73daa94655..7fd1be61de5c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -343,17 +343,17 @@ void qed_int_sp_dpc(unsigned long hwfn_cookie)
 
 static void qed_int_sb_attn_free(struct qed_hwfn *p_hwfn)
 {
-	struct qed_dev *cdev   = p_hwfn->cdev;
-	struct qed_sb_attn_info *p_sb   = p_hwfn->p_sb_attn;
-
-	if (p_sb) {
-		if (p_sb->sb_attn)
-			dma_free_coherent(&cdev->pdev->dev,
-					  SB_ATTN_ALIGNED_SIZE(p_hwfn),
-					  p_sb->sb_attn,
-					  p_sb->sb_phys);
-		kfree(p_sb);
-	}
+	struct qed_sb_attn_info *p_sb = p_hwfn->p_sb_attn;
+
+	if (!p_sb)
+		return;
+
+	if (p_sb->sb_attn)
+		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+				  SB_ATTN_ALIGNED_SIZE(p_hwfn),
+				  p_sb->sb_attn,
+				  p_sb->sb_phys);
+	kfree(p_sb);
 }
 
 static void qed_int_sb_attn_setup(struct qed_hwfn *p_hwfn,
@@ -433,6 +433,7 @@ void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn,
 			   u16 vf_number,
 			   u8 vf_valid)
 {
+	struct qed_dev *cdev = p_hwfn->cdev;
 	u32 cau_state;
 
 	memset(p_sb_entry, 0, sizeof(*p_sb_entry));
@@ -451,14 +452,12 @@ void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn,
 
 	cau_state = CAU_HC_DISABLE_STATE;
 
-	if (p_hwfn->cdev->int_coalescing_mode == QED_COAL_MODE_ENABLE) {
+	if (cdev->int_coalescing_mode == QED_COAL_MODE_ENABLE) {
 		cau_state = CAU_HC_ENABLE_STATE;
-		if (!p_hwfn->cdev->rx_coalesce_usecs)
-			p_hwfn->cdev->rx_coalesce_usecs =
-				QED_CAU_DEF_RX_USECS;
-		if (!p_hwfn->cdev->tx_coalesce_usecs)
-			p_hwfn->cdev->tx_coalesce_usecs =
-				QED_CAU_DEF_TX_USECS;
+		if (!cdev->rx_coalesce_usecs)
+			cdev->rx_coalesce_usecs = QED_CAU_DEF_RX_USECS;
+		if (!cdev->tx_coalesce_usecs)
+			cdev->tx_coalesce_usecs = QED_CAU_DEF_TX_USECS;
 	}
 
 	SET_FIELD(p_sb_entry->data, CAU_SB_ENTRY_STATE0, cau_state);
@@ -638,8 +637,10 @@ int qed_int_sb_release(struct qed_hwfn *p_hwfn,
 	sb_info->sb_ack = 0;
 	memset(sb_info->sb_virt, 0, sizeof(*sb_info->sb_virt));
 
-	p_hwfn->sbs_info[sb_id] = NULL;
-	p_hwfn->num_sbs--;
+	if (p_hwfn->sbs_info[sb_id] != NULL) {
+		p_hwfn->sbs_info[sb_id] = NULL;
+		p_hwfn->num_sbs--;
+	}
 
 	return 0;
 }
@@ -648,14 +649,15 @@ static void qed_int_sp_sb_free(struct qed_hwfn *p_hwfn)
 {
 	struct qed_sb_sp_info *p_sb = p_hwfn->p_sp_sb;
 
-	if (p_sb) {
-		if (p_sb->sb_info.sb_virt)
-			dma_free_coherent(&p_hwfn->cdev->pdev->dev,
-					  SB_ALIGNED_SIZE(p_hwfn),
-					  p_sb->sb_info.sb_virt,
-					  p_sb->sb_info.sb_phys);
-		kfree(p_sb);
-	}
+	if (!p_sb)
+		return;
+
+	if (p_sb->sb_info.sb_virt)
+		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+				  SB_ALIGNED_SIZE(p_hwfn),
+				  p_sb->sb_info.sb_virt,
+				  p_sb->sb_info.sb_phys);
+	kfree(p_sb);
 }
 
 static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn,
@@ -718,36 +720,36 @@ int qed_int_register_cb(struct qed_hwfn *p_hwfn,
 			__le16 **p_fw_cons)
 {
 	struct qed_sb_sp_info *p_sp_sb = p_hwfn->p_sp_sb;
-	int qed_status = -ENOMEM;
+	int rc = -ENOMEM;
 	u8 pi;
 
 	/* Look for a free index */
 	for (pi = 0; pi < ARRAY_SIZE(p_sp_sb->pi_info_arr); pi++) {
-		if (!p_sp_sb->pi_info_arr[pi].comp_cb) {
-			p_sp_sb->pi_info_arr[pi].comp_cb = comp_cb;
-			p_sp_sb->pi_info_arr[pi].cookie = cookie;
-			*sb_idx = pi;
-			*p_fw_cons = &p_sp_sb->sb_info.sb_virt->pi_array[pi];
-			qed_status = 0;
-			break;
-		}
+		if (p_sp_sb->pi_info_arr[pi].comp_cb)
+			continue;
+
+		p_sp_sb->pi_info_arr[pi].comp_cb = comp_cb;
+		p_sp_sb->pi_info_arr[pi].cookie = cookie;
+		*sb_idx = pi;
+		*p_fw_cons = &p_sp_sb->sb_info.sb_virt->pi_array[pi];
+		rc = 0;
+		break;
 	}
 
-	return qed_status;
+	return rc;
 }
 
 int qed_int_unregister_cb(struct qed_hwfn *p_hwfn, u8 pi)
 {
 	struct qed_sb_sp_info *p_sp_sb = p_hwfn->p_sp_sb;
-	int qed_status = -ENOMEM;
 
-	if (p_sp_sb->pi_info_arr[pi].comp_cb) {
-		p_sp_sb->pi_info_arr[pi].comp_cb = NULL;
-		p_sp_sb->pi_info_arr[pi].cookie = NULL;
-		qed_status = 0;
-	}
+	if (p_sp_sb->pi_info_arr[pi].comp_cb == NULL)
+		return -ENOMEM;
 
-	return qed_status;
+	p_sp_sb->pi_info_arr[pi].comp_cb = NULL;
+	p_sp_sb->pi_info_arr[pi].cookie = NULL;
+
+	return 0;
 }
 
 u16 qed_int_get_sp_sb_id(struct qed_hwfn *p_hwfn)
@@ -937,6 +939,39 @@ void qed_int_igu_init_pure_rt(struct qed_hwfn *p_hwfn,
 	}
 }
 
+static u32 qed_int_igu_read_cam_block(struct qed_hwfn	*p_hwfn,
+				      struct qed_ptt	*p_ptt,
+				      u16		sb_id)
+{
+	u32 val = qed_rd(p_hwfn, p_ptt,
+			 IGU_REG_MAPPING_MEMORY +
+			 sizeof(u32) * sb_id);
+	struct qed_igu_block *p_block;
+
+	p_block = &p_hwfn->hw_info.p_igu_info->igu_map.igu_blocks[sb_id];
+
+	/* stop scanning when hit first invalid PF entry */
+	if (!GET_FIELD(val, IGU_MAPPING_LINE_VALID) &&
+	    GET_FIELD(val, IGU_MAPPING_LINE_PF_VALID))
+		goto out;
+
+	/* Fill the block information */
+	p_block->status		= QED_IGU_STATUS_VALID;
+	p_block->function_id	= GET_FIELD(val,
+					    IGU_MAPPING_LINE_FUNCTION_NUMBER);
+	p_block->is_pf		= GET_FIELD(val, IGU_MAPPING_LINE_PF_VALID);
+	p_block->vector_number	= GET_FIELD(val,
+					    IGU_MAPPING_LINE_VECTOR_NUMBER);
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
+		   "IGU_BLOCK: [SB 0x%04x, Value in CAM 0x%08x] func_id = %d is_pf = %d vector_num = 0x%x\n",
+		   sb_id, val, p_block->function_id,
+		   p_block->is_pf, p_block->vector_number);
+
+out:
+	return val;
+}
+
 int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn,
 			 struct qed_ptt *p_ptt)
 {
@@ -963,26 +998,13 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn,
 	     sb_id++) {
 		blk = &p_igu_info->igu_map.igu_blocks[sb_id];
 
-		val = qed_rd(p_hwfn, p_ptt,
-			     IGU_REG_MAPPING_MEMORY + sizeof(u32) * sb_id);
+		val	= qed_int_igu_read_cam_block(p_hwfn, p_ptt, sb_id);
 
 		/* stop scanning when hit first invalid PF entry */
 		if (!GET_FIELD(val, IGU_MAPPING_LINE_VALID) &&
 		    GET_FIELD(val, IGU_MAPPING_LINE_PF_VALID))
 			break;
 
-		blk->status = QED_IGU_STATUS_VALID;
-		blk->function_id = GET_FIELD(val,
-					     IGU_MAPPING_LINE_FUNCTION_NUMBER);
-		blk->is_pf = GET_FIELD(val, IGU_MAPPING_LINE_PF_VALID);
-		blk->vector_number = GET_FIELD(val,
-					       IGU_MAPPING_LINE_VECTOR_NUMBER);
-
-		DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
-			   "IGU_BLOCK[sb_id]:%x:func_id = %d is_pf = %d vector_num = 0x%x\n",
-			   val, blk->function_id, blk->is_pf,
-			   blk->vector_number);
-
 		if (blk->is_pf) {
 			if (blk->function_id == p_hwfn->rel_pf_id) {
 				blk->status |= QED_IGU_STATUS_PF;
@@ -1121,18 +1143,17 @@ void qed_int_setup(struct qed_hwfn *p_hwfn,
 	qed_int_sp_dpc_setup(p_hwfn);
 }
 
-int qed_int_get_num_sbs(struct qed_hwfn *p_hwfn,
-			int *p_iov_blks)
+void qed_int_get_num_sbs(struct qed_hwfn	*p_hwfn,
+			 struct qed_sb_cnt_info *p_sb_cnt_info)
 {
 	struct qed_igu_info *info = p_hwfn->hw_info.p_igu_info;
 
-	if (!info)
-		return 0;
-
-	if (p_iov_blks)
-		*p_iov_blks = info->free_blks;
+	if (!info || !p_sb_cnt_info)
+		return;
 
-	return info->igu_sb_cnt;
+	p_sb_cnt_info->sb_cnt		= info->igu_sb_cnt;
+	p_sb_cnt_info->sb_iov_cnt	= info->igu_sb_cnt_iov;
+	p_sb_cnt_info->sb_free_blk	= info->free_blks;
 }
 
 void qed_int_disable_post_isr_release(struct qed_dev *cdev)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index 51e0b09a7f47..c57f2e680770 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -161,12 +161,12 @@ void qed_int_sp_dpc(unsigned long hwfn_cookie);
  *        blocks configured for this funciton in the igu.
  *
  * @param p_hwfn
- * @param p_iov_blks - configured free blks for vfs
+ * @param p_sb_cnt_info
  *
  * @return int - number of status blocks configured
  */
-int qed_int_get_num_sbs(struct qed_hwfn *p_hwfn,
-			int *p_iov_blks);
+void qed_int_get_num_sbs(struct qed_hwfn	*p_hwfn,
+			 struct qed_sb_cnt_info *p_sb_cnt_info);
 
 /**
  * @brief qed_int_disable_post_isr_release - performs the cleanup post ISR
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 25d6e91335ea..caa689e6575c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -634,15 +634,18 @@ static int qed_get_int_fp(struct qed_dev *cdev, struct qed_int_info *info)
 static int qed_slowpath_setup_int(struct qed_dev *cdev,
 				  enum qed_int_mode int_mode)
 {
-	int rc, i;
-	u8 num_vectors = 0;
-
+	struct qed_sb_cnt_info sb_cnt_info;
+	int rc;
+	int i;
 	memset(&cdev->int_params, 0, sizeof(struct qed_int_params));
 
 	cdev->int_params.in.int_mode = int_mode;
-	for_each_hwfn(cdev, i)
-		num_vectors +=  qed_int_get_num_sbs(&cdev->hwfns[i], NULL) + 1;
-	cdev->int_params.in.num_vectors = num_vectors;
+	for_each_hwfn(cdev, i) {
+		memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
+		qed_int_get_num_sbs(&cdev->hwfns[i], &sb_cnt_info);
+		cdev->int_params.in.num_vectors += sb_cnt_info.sb_cnt;
+		cdev->int_params.in.num_vectors++; /* slowpath */
+	}
 
 	/* We want a minimum of one slowpath and one fastpath vector per hwfn */
 	cdev->int_params.in.min_msix_cnt = cdev->num_hwfns * 2;
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 3d43c1d4ecef..1f7599c77cd4 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -446,6 +446,12 @@ struct qed_eth_stats {
 #define RX_PI           0
 #define TX_PI(tc)       (RX_PI + 1 + tc)
 
+struct qed_sb_cnt_info {
+	int	sb_cnt;
+	int	sb_iov_cnt;
+	int	sb_free_blk;
+};
+
 static inline u16 qed_sb_update_sb_idx(struct qed_sb_info *sb_info)
 {
 	u32 prod = 0;
-- 
cgit v1.2.3


From 811b0d6538b9f26f3eb0f90fe4e6118f2480ec6f Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 26 Feb 2016 20:59:18 +0100
Subject: nvmem: Add flag to export NVMEM to root only

Legacy AT24, AT25 EEPROMs are exported in sys so that only root can
read the contents. The EEPROMs may contain sensitive information. Add
a flag so the provide can indicate that NVMEM should also restrict
access to root only.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/core.c           | 57 ++++++++++++++++++++++++++++++++++++++++--
 include/linux/nvmem-provider.h |  1 +
 2 files changed, 56 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index de14fae6f7f6..b03690bc8f09 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -161,6 +161,53 @@ static const struct attribute_group *nvmem_ro_dev_groups[] = {
 	NULL,
 };
 
+/* default read/write permissions, root only */
+static struct bin_attribute bin_attr_rw_root_nvmem = {
+	.attr	= {
+		.name	= "nvmem",
+		.mode	= S_IWUSR | S_IRUSR,
+	},
+	.read	= bin_attr_nvmem_read,
+	.write	= bin_attr_nvmem_write,
+};
+
+static struct bin_attribute *nvmem_bin_rw_root_attributes[] = {
+	&bin_attr_rw_root_nvmem,
+	NULL,
+};
+
+static const struct attribute_group nvmem_bin_rw_root_group = {
+	.bin_attrs	= nvmem_bin_rw_root_attributes,
+};
+
+static const struct attribute_group *nvmem_rw_root_dev_groups[] = {
+	&nvmem_bin_rw_root_group,
+	NULL,
+};
+
+/* read only permission, root only */
+static struct bin_attribute bin_attr_ro_root_nvmem = {
+	.attr	= {
+		.name	= "nvmem",
+		.mode	= S_IRUSR,
+	},
+	.read	= bin_attr_nvmem_read,
+};
+
+static struct bin_attribute *nvmem_bin_ro_root_attributes[] = {
+	&bin_attr_ro_root_nvmem,
+	NULL,
+};
+
+static const struct attribute_group nvmem_bin_ro_root_group = {
+	.bin_attrs	= nvmem_bin_ro_root_attributes,
+};
+
+static const struct attribute_group *nvmem_ro_root_dev_groups[] = {
+	&nvmem_bin_ro_root_group,
+	NULL,
+};
+
 static void nvmem_release(struct device *dev)
 {
 	struct nvmem_device *nvmem = to_nvmem_device(dev);
@@ -355,8 +402,14 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
 	nvmem->read_only = of_property_read_bool(np, "read-only") |
 			   config->read_only;
 
-	nvmem->dev.groups = nvmem->read_only ? nvmem_ro_dev_groups :
-					       nvmem_rw_dev_groups;
+	if (config->root_only)
+		nvmem->dev.groups = nvmem->read_only ?
+			nvmem_ro_root_dev_groups :
+			nvmem_rw_root_dev_groups;
+	else
+		nvmem->dev.groups = nvmem->read_only ?
+			nvmem_ro_dev_groups :
+			nvmem_rw_dev_groups;
 
 	device_initialize(&nvmem->dev);
 
diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index 0b68caff1b3c..d24fefa0c11d 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -23,6 +23,7 @@ struct nvmem_config {
 	const struct nvmem_cell_info	*cells;
 	int			ncells;
 	bool			read_only;
+	bool			root_only;
 };
 
 #if IS_ENABLED(CONFIG_NVMEM)
-- 
cgit v1.2.3


From b6c217ab9be6895384cf0b284ace84ad79e5c53b Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 26 Feb 2016 20:59:19 +0100
Subject: nvmem: Add backwards compatibility support for older EEPROM drivers.

Older drivers made an 'eeprom' file available in the /sys device
directory. Have the NVMEM core provide this to retain backwards
compatibility.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/core.c           | 84 ++++++++++++++++++++++++++++++++++++++----
 include/linux/nvmem-provider.h |  4 +-
 2 files changed, 79 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index b03690bc8f09..0de3d878c439 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -38,8 +38,13 @@ struct nvmem_device {
 	int			users;
 	size_t			size;
 	bool			read_only;
+	int			flags;
+	struct bin_attribute	eeprom;
+	struct device		*base_dev;
 };
 
+#define FLAG_COMPAT		BIT(0)
+
 struct nvmem_cell {
 	const char		*name;
 	int			offset;
@@ -56,16 +61,26 @@ static DEFINE_IDA(nvmem_ida);
 static LIST_HEAD(nvmem_cells);
 static DEFINE_MUTEX(nvmem_cells_mutex);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key eeprom_lock_key;
+#endif
+
 #define to_nvmem_device(d) container_of(d, struct nvmem_device, dev)
 
 static ssize_t bin_attr_nvmem_read(struct file *filp, struct kobject *kobj,
 				    struct bin_attribute *attr,
 				    char *buf, loff_t pos, size_t count)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct nvmem_device *nvmem = to_nvmem_device(dev);
+	struct device *dev;
+	struct nvmem_device *nvmem;
 	int rc;
 
+	if (attr->private)
+		dev = attr->private;
+	else
+		dev = container_of(kobj, struct device, kobj);
+	nvmem = to_nvmem_device(dev);
+
 	/* Stop the user from reading */
 	if (pos >= nvmem->size)
 		return 0;
@@ -90,10 +105,16 @@ static ssize_t bin_attr_nvmem_write(struct file *filp, struct kobject *kobj,
 				     struct bin_attribute *attr,
 				     char *buf, loff_t pos, size_t count)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct nvmem_device *nvmem = to_nvmem_device(dev);
+	struct device *dev;
+	struct nvmem_device *nvmem;
 	int rc;
 
+	if (attr->private)
+		dev = attr->private;
+	else
+		dev = container_of(kobj, struct device, kobj);
+	nvmem = to_nvmem_device(dev);
+
 	/* Stop the user from writing */
 	if (pos >= nvmem->size)
 		return 0;
@@ -349,6 +370,43 @@ err:
 	return rval;
 }
 
+/*
+ * nvmem_setup_compat() - Create an additional binary entry in
+ * drivers sys directory, to be backwards compatible with the older
+ * drivers/misc/eeprom drivers.
+ */
+static int nvmem_setup_compat(struct nvmem_device *nvmem,
+			      const struct nvmem_config *config)
+{
+	int rval;
+
+	if (!config->base_dev)
+		return -EINVAL;
+
+	if (nvmem->read_only)
+		nvmem->eeprom = bin_attr_ro_root_nvmem;
+	else
+		nvmem->eeprom = bin_attr_rw_root_nvmem;
+	nvmem->eeprom.attr.name = "eeprom";
+	nvmem->eeprom.size = nvmem->size;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	nvmem->eeprom.attr.key = &eeprom_lock_key;
+#endif
+	nvmem->eeprom.private = &nvmem->dev;
+	nvmem->base_dev = config->base_dev;
+
+	rval = device_create_bin_file(nvmem->base_dev, &nvmem->eeprom);
+	if (rval) {
+		dev_err(&nvmem->dev,
+			"Failed to create eeprom binary file %d\n", rval);
+		return rval;
+	}
+
+	nvmem->flags |= FLAG_COMPAT;
+
+	return 0;
+}
+
 /**
  * nvmem_register() - Register a nvmem device for given nvmem_config.
  * Also creates an binary entry in /sys/bus/nvmem/devices/dev-name/nvmem
@@ -416,16 +474,23 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
 	dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name);
 
 	rval = device_add(&nvmem->dev);
-	if (rval) {
-		ida_simple_remove(&nvmem_ida, nvmem->id);
-		kfree(nvmem);
-		return ERR_PTR(rval);
+	if (rval)
+		goto out;
+
+	if (config->compat) {
+		rval = nvmem_setup_compat(nvmem, config);
+		if (rval)
+			goto out;
 	}
 
 	if (config->cells)
 		nvmem_add_cells(nvmem, config);
 
 	return nvmem;
+out:
+	ida_simple_remove(&nvmem_ida, nvmem->id);
+	kfree(nvmem);
+	return ERR_PTR(rval);
 }
 EXPORT_SYMBOL_GPL(nvmem_register);
 
@@ -445,6 +510,9 @@ int nvmem_unregister(struct nvmem_device *nvmem)
 	}
 	mutex_unlock(&nvmem_mutex);
 
+	if (nvmem->flags & FLAG_COMPAT)
+		device_remove_bin_file(nvmem->base_dev, &nvmem->eeprom);
+
 	nvmem_device_remove_all_cells(nvmem);
 	device_del(&nvmem->dev);
 
diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index d24fefa0c11d..a4fcc90b0f20 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -24,6 +24,9 @@ struct nvmem_config {
 	int			ncells;
 	bool			read_only;
 	bool			root_only;
+	/* To be only used by old driver/misc/eeprom drivers */
+	bool			compat;
+	struct device		*base_dev;
 };
 
 #if IS_ENABLED(CONFIG_NVMEM)
@@ -44,5 +47,4 @@ static inline int nvmem_unregister(struct nvmem_device *nvmem)
 }
 
 #endif /* CONFIG_NVMEM */
-
 #endif  /* ifndef _LINUX_NVMEM_PROVIDER_H */
-- 
cgit v1.2.3


From 3ccea0e1fdf896645f8cccddcfcf60cb289fdf76 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 26 Feb 2016 20:59:21 +0100
Subject: eeprom: at25: Remove in kernel API for accessing the EEPROM

The setup() callback is not used by any in kernel code. Remove it.
Any new code which requires access to the eeprom can use the NVMEM
API.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/eeprom/at25.c | 26 --------------------------
 include/linux/spi/eeprom.h |  2 --
 2 files changed, 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 3e9e5a28acaa..00f859cc0955 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -29,7 +29,6 @@
 
 struct at25_data {
 	struct spi_device	*spi;
-	struct memory_accessor	mem;
 	struct mutex		lock;
 	struct spi_eeprom	chip;
 	struct bin_attribute	bin;
@@ -281,26 +280,6 @@ at25_bin_write(struct file *filp, struct kobject *kobj,
 
 /*-------------------------------------------------------------------------*/
 
-/* Let in-kernel code access the eeprom data. */
-
-static ssize_t at25_mem_read(struct memory_accessor *mem, char *buf,
-			 off_t offset, size_t count)
-{
-	struct at25_data *at25 = container_of(mem, struct at25_data, mem);
-
-	return at25_ee_read(at25, buf, offset, count);
-}
-
-static ssize_t at25_mem_write(struct memory_accessor *mem, const char *buf,
-			  off_t offset, size_t count)
-{
-	struct at25_data *at25 = container_of(mem, struct at25_data, mem);
-
-	return at25_ee_write(at25, buf, offset, count);
-}
-
-/*-------------------------------------------------------------------------*/
-
 static int at25_fw_to_chip(struct device *dev, struct spi_eeprom *chip)
 {
 	u32 val;
@@ -415,22 +394,17 @@ static int at25_probe(struct spi_device *spi)
 	at25->bin.attr.name = "eeprom";
 	at25->bin.attr.mode = S_IRUSR;
 	at25->bin.read = at25_bin_read;
-	at25->mem.read = at25_mem_read;
 
 	at25->bin.size = at25->chip.byte_len;
 	if (!(chip.flags & EE_READONLY)) {
 		at25->bin.write = at25_bin_write;
 		at25->bin.attr.mode |= S_IWUSR;
-		at25->mem.write = at25_mem_write;
 	}
 
 	err = sysfs_create_bin_file(&spi->dev.kobj, &at25->bin);
 	if (err)
 		return err;
 
-	if (chip.setup)
-		chip.setup(&at25->mem, chip.context);
-
 	dev_info(&spi->dev, "%Zd %s %s eeprom%s, pagesize %u\n",
 		(at25->bin.size < 1024)
 			? at25->bin.size
diff --git a/include/linux/spi/eeprom.h b/include/linux/spi/eeprom.h
index 403e007aef68..e34e169f9dcb 100644
--- a/include/linux/spi/eeprom.h
+++ b/include/linux/spi/eeprom.h
@@ -30,8 +30,6 @@ struct spi_eeprom {
 	 */
 #define EE_INSTR_BIT3_IS_ADDR	0x0010
 
-	/* for exporting this chip's data to other kernel code */
-	void (*setup)(struct memory_accessor *mem, void *context);
 	void *context;
 };
 
-- 
cgit v1.2.3


From bec3c11bad0e7ac05fb90f204d0ab6f79945822b Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 26 Feb 2016 20:59:24 +0100
Subject: misc: at24: replace memory_accessor with nvmem_device_read

Now that the AT24 uses the NVMEM framework, replace the
memory_accessor in the setup() callback with nvmem API calls.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Tested-by: Sekhar Nori <nsekhar@ti.com>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-davinci/board-mityomapl138.c |  5 +++--
 arch/arm/mach-davinci/common.c             |  4 ++--
 drivers/misc/eeprom/at24.c                 | 31 +-----------------------------
 include/linux/davinci_emac.h               |  4 ++--
 include/linux/memory.h                     | 11 -----------
 include/linux/platform_data/at24.h         | 10 +++++-----
 6 files changed, 13 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c
index de1316bf643a..62ebac51bab9 100644
--- a/arch/arm/mach-davinci/board-mityomapl138.c
+++ b/arch/arm/mach-davinci/board-mityomapl138.c
@@ -115,13 +115,14 @@ static void mityomapl138_cpufreq_init(const char *partnum)
 static void mityomapl138_cpufreq_init(const char *partnum) { }
 #endif
 
-static void read_factory_config(struct memory_accessor *a, void *context)
+static void read_factory_config(struct nvmem_device *nvmem, void *context)
 {
 	int ret;
 	const char *partnum = NULL;
 	struct davinci_soc_info *soc_info = &davinci_soc_info;
 
-	ret = a->read(a, (char *)&factory_config, 0, sizeof(factory_config));
+	ret = nvmem_device_read(nvmem, 0, sizeof(factory_config),
+				&factory_config);
 	if (ret != sizeof(struct factory_config)) {
 		pr_warn("Read Factory Config Failed: %d\n", ret);
 		goto bad_config;
diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c
index a794f6d9d444..f55ef2ef2f92 100644
--- a/arch/arm/mach-davinci/common.c
+++ b/arch/arm/mach-davinci/common.c
@@ -28,13 +28,13 @@ EXPORT_SYMBOL(davinci_soc_info);
 void __iomem *davinci_intc_base;
 int davinci_intc_type;
 
-void davinci_get_mac_addr(struct memory_accessor *mem_acc, void *context)
+void davinci_get_mac_addr(struct nvmem_device *nvmem, void *context)
 {
 	char *mac_addr = davinci_soc_info.emac_pdata->mac_addr;
 	off_t offset = (off_t)context;
 
 	/* Read MAC addr from EEPROM */
-	if (mem_acc->read(mem_acc, mac_addr, offset, ETH_ALEN) == ETH_ALEN)
+	if (nvmem_device_read(nvmem, offset, ETH_ALEN, mac_addr) == ETH_ALEN)
 		pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr);
 }
 
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index f15cda93fc4c..089d6943f68a 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -56,7 +56,6 @@
 
 struct at24_data {
 	struct at24_platform_data chip;
-	struct memory_accessor macc;
 	int use_smbus;
 	int use_smbus_write;
 
@@ -409,30 +408,6 @@ static ssize_t at24_write(struct at24_data *at24, const char *buf, loff_t off,
 
 /*-------------------------------------------------------------------------*/
 
-/*
- * This lets other kernel code access the eeprom data. For example, it
- * might hold a board's Ethernet address, or board-specific calibration
- * data generated on the manufacturing floor.
- */
-
-static ssize_t at24_macc_read(struct memory_accessor *macc, char *buf,
-			 off_t offset, size_t count)
-{
-	struct at24_data *at24 = container_of(macc, struct at24_data, macc);
-
-	return at24_read(at24, buf, offset, count);
-}
-
-static ssize_t at24_macc_write(struct memory_accessor *macc, const char *buf,
-			  off_t offset, size_t count)
-{
-	struct at24_data *at24 = container_of(macc, struct at24_data, macc);
-
-	return at24_write(at24, buf, offset, count);
-}
-
-/*-------------------------------------------------------------------------*/
-
 /*
  * Provide a regmap interface, which is registered with the NVMEM
  * framework
@@ -600,16 +575,12 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	at24->chip = chip;
 	at24->num_addresses = num_addresses;
 
-	at24->macc.read = at24_macc_read;
-
 	writable = !(chip.flags & AT24_FLAG_READONLY);
 	if (writable) {
 		if (!use_smbus || use_smbus_write) {
 
 			unsigned write_max = chip.page_size;
 
-			at24->macc.write = at24_macc_write;
-
 			if (write_max > io_limit)
 				write_max = io_limit;
 			if (use_smbus && write_max > I2C_SMBUS_BLOCK_MAX)
@@ -683,7 +654,7 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
 	/* export data to kernel code */
 	if (chip.setup)
-		chip.setup(&at24->macc, chip.context);
+		chip.setup(at24->nvmem, chip.context);
 
 	return 0;
 
diff --git a/include/linux/davinci_emac.h b/include/linux/davinci_emac.h
index 542888504994..05b97144d342 100644
--- a/include/linux/davinci_emac.h
+++ b/include/linux/davinci_emac.h
@@ -12,7 +12,7 @@
 #define _LINUX_DAVINCI_EMAC_H
 
 #include <linux/if_ether.h>
-#include <linux/memory.h>
+#include <linux/nvmem-consumer.h>
 
 struct mdio_platform_data {
 	unsigned long		bus_freq;
@@ -46,5 +46,5 @@ enum {
 	EMAC_VERSION_2,	/* DM646x */
 };
 
-void davinci_get_mac_addr(struct memory_accessor *mem_acc, void *context);
+void davinci_get_mac_addr(struct nvmem_device *nvmem, void *context);
 #endif
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 8b8d8d12348e..b723a686fc10 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -136,17 +136,6 @@ extern struct memory_block *find_memory_block(struct mem_section *);
 #define unregister_hotmemory_notifier(nb)  ({ (void)(nb); })
 #endif
 
-/*
- * 'struct memory_accessor' is a generic interface to provide
- * in-kernel access to persistent memory such as i2c or SPI EEPROMs
- */
-struct memory_accessor {
-	ssize_t (*read)(struct memory_accessor *, char *buf, off_t offset,
-			size_t count);
-	ssize_t (*write)(struct memory_accessor *, const char *buf,
-			 off_t offset, size_t count);
-};
-
 /*
  * Kernel text modification mutex, used for code patching. Users of this lock
  * can sleep.
diff --git a/include/linux/platform_data/at24.h b/include/linux/platform_data/at24.h
index c42aa89d34ee..dc9a13e5acda 100644
--- a/include/linux/platform_data/at24.h
+++ b/include/linux/platform_data/at24.h
@@ -9,7 +9,7 @@
 #define _LINUX_AT24_H
 
 #include <linux/types.h>
-#include <linux/memory.h>
+#include <linux/nvmem-consumer.h>
 
 /**
  * struct at24_platform_data - data to set up at24 (generic eeprom) driver
@@ -17,7 +17,7 @@
  * @page_size: number of byte which can be written in one go
  * @flags: tunable options, check AT24_FLAG_* defines
  * @setup: an optional callback invoked after eeprom is probed; enables kernel
-	code to access eeprom via memory_accessor, see example
+	code to access eeprom via nvmem, see example
  * @context: optional parameter passed to setup()
  *
  * If you set up a custom eeprom type, please double-check the parameters.
@@ -26,13 +26,13 @@
  *
  * An example in pseudo code for a setup() callback:
  *
- * void get_mac_addr(struct memory_accessor *mem_acc, void *context)
+ * void get_mac_addr(struct mvmem_device *nvmem, void *context)
  * {
  *	u8 *mac_addr = ethernet_pdata->mac_addr;
  *	off_t offset = context;
  *
  *	// Read MAC addr from EEPROM
- *	if (mem_acc->read(mem_acc, mac_addr, offset, ETH_ALEN) == ETH_ALEN)
+ *	if (nvmem_device_read(nvmem, offset, ETH_ALEN, mac_addr) == ETH_ALEN)
  *		pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr);
  * }
  *
@@ -48,7 +48,7 @@ struct at24_platform_data {
 #define AT24_FLAG_IRUGO		0x20	/* sysfs-entry will be world-readable */
 #define AT24_FLAG_TAKE8ADDR	0x10	/* take always 8 addresses (24c00) */
 
-	void		(*setup)(struct memory_accessor *, void *context);
+	void		(*setup)(struct nvmem_device *nvmem, void *context);
 	void		*context;
 };
 
-- 
cgit v1.2.3


From b9830d120cbe155863399f25eaef6aa8353e767f Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Fri, 26 Feb 2016 15:13:19 -0800
Subject: Drivers: hv: util: Pass the channel information during the init call

Pass the channel information to the util drivers that need to defer
reading the channel while they are processing a request. This would address
the following issue reported by Vitaly:

Commit 3cace4a61610 ("Drivers: hv: utils: run polling callback always in
interrupt context") removed direct *_transaction.state = HVUTIL_READY
assignments from *_handle_handshake() functions introducing the following
race: if a userspace daemon connects before we get first non-negotiation
request from the server hv_poll_channel() won't set transaction state to
HVUTIL_READY as (!channel) condition will fail, we set it to non-NULL on
the first real request from the server.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reported-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_fcopy.c    | 2 +-
 drivers/hv/hv_kvp.c      | 2 +-
 drivers/hv/hv_snapshot.c | 2 +-
 drivers/hv/hv_util.c     | 1 +
 include/linux/hyperv.h   | 1 +
 5 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index c37a71e13de0..23c70799ad8a 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -251,7 +251,6 @@ void hv_fcopy_onchannelcallback(void *context)
 		 */
 
 		fcopy_transaction.recv_len = recvlen;
-		fcopy_transaction.recv_channel = channel;
 		fcopy_transaction.recv_req_id = requestid;
 		fcopy_transaction.fcopy_msg = fcopy_msg;
 
@@ -317,6 +316,7 @@ static void fcopy_on_reset(void)
 int hv_fcopy_init(struct hv_util_service *srv)
 {
 	recv_buffer = srv->recv_buffer;
+	fcopy_transaction.recv_channel = srv->channel;
 
 	/*
 	 * When this driver loads, the user level daemon that
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index d4ab81bcd515..9b9b370fe22a 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -639,7 +639,6 @@ void hv_kvp_onchannelcallback(void *context)
 			 */
 
 			kvp_transaction.recv_len = recvlen;
-			kvp_transaction.recv_channel = channel;
 			kvp_transaction.recv_req_id = requestid;
 			kvp_transaction.kvp_msg = kvp_msg;
 
@@ -688,6 +687,7 @@ int
 hv_kvp_init(struct hv_util_service *srv)
 {
 	recv_buffer = srv->recv_buffer;
+	kvp_transaction.recv_channel = srv->channel;
 
 	/*
 	 * When this driver loads, the user level daemon that
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 67def4a831c8..3fba14e88f03 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -263,7 +263,6 @@ void hv_vss_onchannelcallback(void *context)
 			 */
 
 			vss_transaction.recv_len = recvlen;
-			vss_transaction.recv_channel = channel;
 			vss_transaction.recv_req_id = requestid;
 			vss_transaction.msg = (struct hv_vss_msg *)vss_msg;
 
@@ -337,6 +336,7 @@ hv_vss_init(struct hv_util_service *srv)
 		return -ENOTSUPP;
 	}
 	recv_buffer = srv->recv_buffer;
+	vss_transaction.recv_channel = srv->channel;
 
 	/*
 	 * When this driver loads, the user level daemon that
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 7994ec2e4151..d5acaa2d8e61 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -322,6 +322,7 @@ static int util_probe(struct hv_device *dev,
 	srv->recv_buffer = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
 	if (!srv->recv_buffer)
 		return -ENOMEM;
+	srv->channel = dev->channel;
 	if (srv->util_init) {
 		ret = srv->util_init(srv);
 		if (ret) {
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index d23dab0d770b..aa0fadce9308 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1251,6 +1251,7 @@ u64 hv_do_hypercall(u64 control, void *input, void *output);
 
 struct hv_util_service {
 	u8 *recv_buffer;
+	void *channel;
 	void (*util_cb)(void *);
 	int (*util_init)(struct hv_util_service *);
 	void (*util_deinit)(void);
-- 
cgit v1.2.3


From 670666b9e0aff40c65d8061a2f53e79eee238685 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Wed, 2 Mar 2016 16:24:46 +0530
Subject: regulator: core: Add support for active-discharge configuration

Add support to enable/disable active discharge of regulator via
machine constraints. This configuration is done when setting
machine constraint during regulator register and if regulator
driver implemented the callback ops.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c          | 11 +++++++++++
 drivers/regulator/of_regulator.c  |  6 ++++++
 include/linux/regulator/driver.h  |  3 +++
 include/linux/regulator/machine.h | 12 ++++++++++++
 4 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 744c9889f88d..7ebb7c899158 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1140,6 +1140,17 @@ static int set_machine_constraints(struct regulator_dev *rdev,
 		}
 	}
 
+	if (rdev->constraints->active_discharge && ops->set_active_discharge) {
+		bool ad_state = (rdev->constraints->active_discharge ==
+			      REGULATOR_ACTIVE_DISCHARGE_ENABLE) ? true : false;
+
+		ret = ops->set_active_discharge(rdev, ad_state);
+		if (ret < 0) {
+			rdev_err(rdev, "failed to set active discharge\n");
+			return ret;
+		}
+	}
+
 	print_constraints(rdev);
 	return 0;
 out:
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 499e437c7e91..fe2e33441dae 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -93,6 +93,12 @@ static void of_get_regulation_constraints(struct device_node *np,
 
 	constraints->soft_start = of_property_read_bool(np,
 					"regulator-soft-start");
+	ret = of_property_read_u32(np, "regulator-active-discharge", &pval);
+	if (!ret) {
+		constraints->active_discharge =
+				(pval) ? REGULATOR_ACTIVE_DISCHARGE_ENABLE :
+					REGULATOR_ACTIVE_DISCHARGE_DISABLE;
+	}
 
 	if (!of_property_read_u32(np, "regulator-initial-mode", &pval)) {
 		if (desc && desc->of_map_mode) {
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 16ac9e108806..59dbaf78a25c 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -93,6 +93,8 @@ struct regulator_linear_range {
  * @get_current_limit: Get the configured limit for a current-limited regulator.
  * @set_input_current_limit: Configure an input limit.
  *
+ * @set_active_discharge: Set active discharge enable/disable of regulators.
+ *
  * @set_mode: Set the configured operating mode for the regulator.
  * @get_mode: Get the configured operating mode for the regulator.
  * @get_status: Return actual (not as-configured) status of regulator, as a
@@ -149,6 +151,7 @@ struct regulator_ops {
 
 	int (*set_input_current_limit) (struct regulator_dev *, int lim_uA);
 	int (*set_over_current_protection) (struct regulator_dev *);
+	int (*set_active_discharge) (struct regulator_dev *, bool enable);
 
 	/* enable/disable regulator */
 	int (*enable) (struct regulator_dev *);
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index a1067d0b3991..5d627c83a630 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -42,6 +42,13 @@ struct regulator;
 #define REGULATOR_CHANGE_DRMS		0x10
 #define REGULATOR_CHANGE_BYPASS		0x20
 
+/* Regulator active discharge flags */
+enum regulator_active_discharge {
+	REGULATOR_ACTIVE_DISCHARGE_DEFAULT,
+	REGULATOR_ACTIVE_DISCHARGE_DISABLE,
+	REGULATOR_ACTIVE_DISCHARGE_ENABLE,
+};
+
 /**
  * struct regulator_state - regulator state during low power system states
  *
@@ -100,6 +107,9 @@ struct regulator_state {
  * @initial_state: Suspend state to set by default.
  * @initial_mode: Mode to set at startup.
  * @ramp_delay: Time to settle down after voltage change (unit: uV/us)
+ * @active_discharge: Enable/disable active discharge. The enum
+ *		      regulator_active_discharge values are used for
+ *		      initialisation.
  * @enable_time: Turn-on time of the rails (unit: microseconds)
  */
 struct regulation_constraints {
@@ -140,6 +150,8 @@ struct regulation_constraints {
 	unsigned int ramp_delay;
 	unsigned int enable_time;
 
+	unsigned int active_discharge;
+
 	/* constraint flags */
 	unsigned always_on:1;	/* regulator never off when system is on */
 	unsigned boot_on:1;	/* bootloader/firmware enabled regulator */
-- 
cgit v1.2.3


From 354794dacc213da7596cefea4dbcd8c094368807 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Wed, 2 Mar 2016 16:24:47 +0530
Subject: regulator: helper: Add helper to configure active-discharge using
 regmap

Add helper function to set the state of active-discharge of
regulator using regmap. The HW regulator driver can directly
use this by providing the necessary information in the regulator
descriptor.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/helpers.c      | 23 +++++++++++++++++++++++
 include/linux/regulator/driver.h | 14 ++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c
index 3bbb32680a94..b1e32e7482e9 100644
--- a/drivers/regulator/helpers.c
+++ b/drivers/regulator/helpers.c
@@ -465,3 +465,26 @@ int regulator_get_bypass_regmap(struct regulator_dev *rdev, bool *enable)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(regulator_get_bypass_regmap);
+
+/**
+ * regulator_set_active_discharge_regmap - Default set_active_discharge()
+ *					   using regmap
+ *
+ * @rdev: device to operate on.
+ * @enable: state to set, 0 to disable and 1 to enable.
+ */
+int regulator_set_active_discharge_regmap(struct regulator_dev *rdev,
+					  bool enable)
+{
+	unsigned int val;
+
+	if (enable)
+		val = rdev->desc->active_discharge_on;
+	else
+		val = rdev->desc->active_discharge_off;
+
+	return regmap_update_bits(rdev->regmap,
+				  rdev->desc->active_discharge_reg,
+				  rdev->desc->active_discharge_mask, val);
+}
+EXPORT_SYMBOL_GPL(regulator_set_active_discharge_regmap);
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 59dbaf78a25c..cd271e89a7e6 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -269,6 +269,14 @@ enum regulator_type {
  * @bypass_mask: Mask for control when using regmap set_bypass
  * @bypass_val_on: Enabling value for control when using regmap set_bypass
  * @bypass_val_off: Disabling value for control when using regmap set_bypass
+ * @active_discharge_off: Enabling value for control when using regmap
+ *			  set_active_discharge
+ * @active_discharge_on: Disabling value for control when using regmap
+ *			 set_active_discharge
+ * @active_discharge_mask: Mask for control when using regmap
+ *			   set_active_discharge
+ * @active_discharge_reg: Register for control when using regmap
+ *			  set_active_discharge
  *
  * @enable_time: Time taken for initial enable of regulator (in uS).
  * @off_on_delay: guard time (in uS), before re-enabling a regulator
@@ -318,6 +326,10 @@ struct regulator_desc {
 	unsigned int bypass_mask;
 	unsigned int bypass_val_on;
 	unsigned int bypass_val_off;
+	unsigned int active_discharge_on;
+	unsigned int active_discharge_off;
+	unsigned int active_discharge_mask;
+	unsigned int active_discharge_reg;
 
 	unsigned int enable_time;
 
@@ -450,6 +462,8 @@ int regulator_set_voltage_time_sel(struct regulator_dev *rdev,
 int regulator_set_bypass_regmap(struct regulator_dev *rdev, bool enable);
 int regulator_get_bypass_regmap(struct regulator_dev *rdev, bool *enable);
 
+int regulator_set_active_discharge_regmap(struct regulator_dev *rdev,
+					  bool enable);
 void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data);
 
 #endif
-- 
cgit v1.2.3


From a8463d4b0e47d1f37af684d97884ffcf35de043b Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 2 Feb 2016 21:46:32 -0800
Subject: dma: Provide simple noop dma ops

We are going to require dma_ops for several common drivers, even for
systems that do have an identity mapping. Lets provide some minimal
no-op dma_ops that can be used for that purpose.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/dma-mapping.h |  2 ++
 lib/Makefile                |  1 +
 lib/dma-noop.c              | 75 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+)
 create mode 100644 lib/dma-noop.c

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 75857cda38e9..c0b27ff2c784 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -70,6 +70,8 @@ struct dma_map_ops {
 	int is_phys;
 };
 
+extern struct dma_map_ops dma_noop_ops;
+
 #define DMA_BIT_MASK(n)	(((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
 
 #define DMA_MASK_NONE	0x0ULL
diff --git a/lib/Makefile b/lib/Makefile
index a7c26a41a738..a572b86a1b1d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,6 +18,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
+lib-$(CONFIG_HAS_DMA) += dma-noop.o
 
 lib-y	+= kobject.o klist.o
 obj-y	+= lockref.o
diff --git a/lib/dma-noop.c b/lib/dma-noop.c
new file mode 100644
index 000000000000..72145646857e
--- /dev/null
+++ b/lib/dma-noop.c
@@ -0,0 +1,75 @@
+/*
+ *	lib/dma-noop.c
+ *
+ * Simple DMA noop-ops that map 1:1 with memory
+ */
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+
+static void *dma_noop_alloc(struct device *dev, size_t size,
+			    dma_addr_t *dma_handle, gfp_t gfp,
+			    struct dma_attrs *attrs)
+{
+	void *ret;
+
+	ret = (void *)__get_free_pages(gfp, get_order(size));
+	if (ret)
+		*dma_handle = virt_to_phys(ret);
+	return ret;
+}
+
+static void dma_noop_free(struct device *dev, size_t size,
+			  void *cpu_addr, dma_addr_t dma_addr,
+			  struct dma_attrs *attrs)
+{
+	free_pages((unsigned long)cpu_addr, get_order(size));
+}
+
+static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page,
+				      unsigned long offset, size_t size,
+				      enum dma_data_direction dir,
+				      struct dma_attrs *attrs)
+{
+	return page_to_phys(page) + offset;
+}
+
+static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
+			     enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	int i;
+	struct scatterlist *sg;
+
+	for_each_sg(sgl, sg, nents, i) {
+		void *va;
+
+		BUG_ON(!sg_page(sg));
+		va = sg_virt(sg);
+		sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va);
+		sg_dma_len(sg) = sg->length;
+	}
+
+	return nents;
+}
+
+static int dma_noop_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+static int dma_noop_supported(struct device *dev, u64 mask)
+{
+	return 1;
+}
+
+struct dma_map_ops dma_noop_ops = {
+	.alloc			= dma_noop_alloc,
+	.free			= dma_noop_free,
+	.map_page		= dma_noop_map_page,
+	.map_sg			= dma_noop_map_sg,
+	.mapping_error		= dma_noop_mapping_error,
+	.dma_supported		= dma_noop_supported,
+};
+
+EXPORT_SYMBOL(dma_noop_ops);
-- 
cgit v1.2.3


From 2a2d1382fe9dccfce6f9c60a9c9fd2f0fe5bcf2b Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 2 Feb 2016 21:46:37 -0800
Subject: virtio: Add improved queue allocation API

This leaves vring_new_virtqueue alone for compatbility, but it
adds two new improved APIs:

vring_create_virtqueue: Creates a virtqueue backed by automatically
allocated coherent memory.  (Some day it this could be extended to
support non-coherent memory, too, if there ends up being a platform
on which it's worthwhile.)

__vring_new_virtqueue: Creates a virtqueue with a manually-specified
layout.  This should allow mic_virtio to work much more cleanly.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_ring.c | 209 ++++++++++++++++++++++++++++++++++++-------
 include/linux/virtio.h       |  23 ++++-
 include/linux/virtio_ring.h  |  35 ++++++++
 3 files changed, 235 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 9abc008ff7ea..e46d08107a50 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -95,6 +95,11 @@ struct vring_virtqueue {
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	bool (*notify)(struct virtqueue *vq);
 
+	/* DMA, allocation, and size information */
+	bool we_own_ring;
+	size_t queue_size_in_bytes;
+	dma_addr_t queue_dma_addr;
+
 #ifdef DEBUG
 	/* They're supposed to lock for us. */
 	unsigned int in_use;
@@ -878,36 +883,31 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
 }
 EXPORT_SYMBOL_GPL(vring_interrupt);
 
-struct virtqueue *vring_new_virtqueue(unsigned int index,
-				      unsigned int num,
-				      unsigned int vring_align,
-				      struct virtio_device *vdev,
-				      bool weak_barriers,
-				      void *pages,
-				      bool (*notify)(struct virtqueue *),
-				      void (*callback)(struct virtqueue *),
-				      const char *name)
+struct virtqueue *__vring_new_virtqueue(unsigned int index,
+					struct vring vring,
+					struct virtio_device *vdev,
+					bool weak_barriers,
+					bool (*notify)(struct virtqueue *),
+					void (*callback)(struct virtqueue *),
+					const char *name)
 {
-	struct vring_virtqueue *vq;
 	unsigned int i;
+	struct vring_virtqueue *vq;
 
-	/* We assume num is a power of 2. */
-	if (num & (num - 1)) {
-		dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
-		return NULL;
-	}
-
-	vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state),
+	vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state),
 		     GFP_KERNEL);
 	if (!vq)
 		return NULL;
 
-	vring_init(&vq->vring, num, pages, vring_align);
+	vq->vring = vring;
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.name = name;
-	vq->vq.num_free = num;
+	vq->vq.num_free = vring.num;
 	vq->vq.index = index;
+	vq->we_own_ring = false;
+	vq->queue_dma_addr = 0;
+	vq->queue_size_in_bytes = 0;
 	vq->notify = notify;
 	vq->weak_barriers = weak_barriers;
 	vq->broken = false;
@@ -932,18 +932,145 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 
 	/* Put everything in free lists. */
 	vq->free_head = 0;
-	for (i = 0; i < num-1; i++)
+	for (i = 0; i < vring.num-1; i++)
 		vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
-	memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state));
+	memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state));
 
 	return &vq->vq;
 }
+EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
+
+static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
+			      dma_addr_t *dma_handle, gfp_t flag)
+{
+	if (vring_use_dma_api(vdev)) {
+		return dma_alloc_coherent(vdev->dev.parent, size,
+					  dma_handle, flag);
+	} else {
+		void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
+		if (queue) {
+			phys_addr_t phys_addr = virt_to_phys(queue);
+			*dma_handle = (dma_addr_t)phys_addr;
+
+			/*
+			 * Sanity check: make sure we dind't truncate
+			 * the address.  The only arches I can find that
+			 * have 64-bit phys_addr_t but 32-bit dma_addr_t
+			 * are certain non-highmem MIPS and x86
+			 * configurations, but these configurations
+			 * should never allocate physical pages above 32
+			 * bits, so this is fine.  Just in case, throw a
+			 * warning and abort if we end up with an
+			 * unrepresentable address.
+			 */
+			if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
+				free_pages_exact(queue, PAGE_ALIGN(size));
+				return NULL;
+			}
+		}
+		return queue;
+	}
+}
+
+static void vring_free_queue(struct virtio_device *vdev, size_t size,
+			     void *queue, dma_addr_t dma_handle)
+{
+	if (vring_use_dma_api(vdev)) {
+		dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
+	} else {
+		free_pages_exact(queue, PAGE_ALIGN(size));
+	}
+}
+
+struct virtqueue *vring_create_virtqueue(
+	unsigned int index,
+	unsigned int num,
+	unsigned int vring_align,
+	struct virtio_device *vdev,
+	bool weak_barriers,
+	bool may_reduce_num,
+	bool (*notify)(struct virtqueue *),
+	void (*callback)(struct virtqueue *),
+	const char *name)
+{
+	struct virtqueue *vq;
+	void *queue;
+	dma_addr_t dma_addr;
+	size_t queue_size_in_bytes;
+	struct vring vring;
+
+	/* We assume num is a power of 2. */
+	if (num & (num - 1)) {
+		dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
+		return NULL;
+	}
+
+	/* TODO: allocate each queue chunk individually */
+	for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
+		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
+					  &dma_addr,
+					  GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
+		if (queue)
+			break;
+	}
+
+	if (!num)
+		return NULL;
+
+	if (!queue) {
+		/* Try to get a single page. You are my only hope! */
+		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
+					  &dma_addr, GFP_KERNEL|__GFP_ZERO);
+	}
+	if (!queue)
+		return NULL;
+
+	queue_size_in_bytes = vring_size(num, vring_align);
+	vring_init(&vring, num, queue, vring_align);
+
+	vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers,
+				   notify, callback, name);
+	if (!vq) {
+		vring_free_queue(vdev, queue_size_in_bytes, queue,
+				 dma_addr);
+		return NULL;
+	}
+
+	to_vvq(vq)->queue_dma_addr = dma_addr;
+	to_vvq(vq)->queue_size_in_bytes = queue_size_in_bytes;
+	to_vvq(vq)->we_own_ring = true;
+
+	return vq;
+}
+EXPORT_SYMBOL_GPL(vring_create_virtqueue);
+
+struct virtqueue *vring_new_virtqueue(unsigned int index,
+				      unsigned int num,
+				      unsigned int vring_align,
+				      struct virtio_device *vdev,
+				      bool weak_barriers,
+				      void *pages,
+				      bool (*notify)(struct virtqueue *vq),
+				      void (*callback)(struct virtqueue *vq),
+				      const char *name)
+{
+	struct vring vring;
+	vring_init(&vring, num, pages, vring_align);
+	return __vring_new_virtqueue(index, vring, vdev, weak_barriers,
+				     notify, callback, name);
+}
 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
 
-void vring_del_virtqueue(struct virtqueue *vq)
+void vring_del_virtqueue(struct virtqueue *_vq)
 {
-	list_del(&vq->list);
-	kfree(to_vvq(vq));
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	if (vq->we_own_ring) {
+		vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
+				 vq->vring.desc, vq->queue_dma_addr);
+	}
+	list_del(&_vq->list);
+	kfree(vq);
 }
 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
 
@@ -1007,20 +1134,42 @@ void virtio_break_device(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(virtio_break_device);
 
-void *virtqueue_get_avail(struct virtqueue *_vq)
+dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	return vq->vring.avail;
+	BUG_ON(!vq->we_own_ring);
+
+	return vq->queue_dma_addr;
 }
-EXPORT_SYMBOL_GPL(virtqueue_get_avail);
+EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
 
-void *virtqueue_get_used(struct virtqueue *_vq)
+dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	return vq->vring.used;
+	BUG_ON(!vq->we_own_ring);
+
+	return vq->queue_dma_addr +
+		((char *)vq->vring.avail - (char *)vq->vring.desc);
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
+
+dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	BUG_ON(!vq->we_own_ring);
+
+	return vq->queue_dma_addr +
+		((char *)vq->vring.used - (char *)vq->vring.desc);
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
+
+const struct vring *virtqueue_get_vring(struct virtqueue *vq)
+{
+	return &to_vvq(vq)->vring;
 }
-EXPORT_SYMBOL_GPL(virtqueue_get_used);
+EXPORT_SYMBOL_GPL(virtqueue_get_vring);
 
 MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 8f4d4bfa6d46..d5eb5479a425 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -75,8 +75,27 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *vq);
 
 bool virtqueue_is_broken(struct virtqueue *vq);
 
-void *virtqueue_get_avail(struct virtqueue *vq);
-void *virtqueue_get_used(struct virtqueue *vq);
+const struct vring *virtqueue_get_vring(struct virtqueue *vq);
+dma_addr_t virtqueue_get_desc_addr(struct virtqueue *vq);
+dma_addr_t virtqueue_get_avail_addr(struct virtqueue *vq);
+dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
+
+/*
+ * Legacy accessors -- in almost all cases, these are the wrong functions
+ * to use.
+ */
+static inline void *virtqueue_get_desc(struct virtqueue *vq)
+{
+	return virtqueue_get_vring(vq)->desc;
+}
+static inline void *virtqueue_get_avail(struct virtqueue *vq)
+{
+	return virtqueue_get_vring(vq)->avail;
+}
+static inline void *virtqueue_get_used(struct virtqueue *vq)
+{
+	return virtqueue_get_vring(vq)->used;
+}
 
 /**
  * virtio_device - representation of a device using virtio
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index a156e2b6ccfe..e8d36938f09a 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -59,6 +59,35 @@ static inline void virtio_store_mb(bool weak_barriers,
 struct virtio_device;
 struct virtqueue;
 
+/*
+ * Creates a virtqueue and allocates the descriptor ring.  If
+ * may_reduce_num is set, then this may allocate a smaller ring than
+ * expected.  The caller should query virtqueue_get_ring_size to learn
+ * the actual size of the ring.
+ */
+struct virtqueue *vring_create_virtqueue(unsigned int index,
+					 unsigned int num,
+					 unsigned int vring_align,
+					 struct virtio_device *vdev,
+					 bool weak_barriers,
+					 bool may_reduce_num,
+					 bool (*notify)(struct virtqueue *vq),
+					 void (*callback)(struct virtqueue *vq),
+					 const char *name);
+
+/* Creates a virtqueue with a custom layout. */
+struct virtqueue *__vring_new_virtqueue(unsigned int index,
+					struct vring vring,
+					struct virtio_device *vdev,
+					bool weak_barriers,
+					bool (*notify)(struct virtqueue *),
+					void (*callback)(struct virtqueue *),
+					const char *name);
+
+/*
+ * Creates a virtqueue with a standard layout but a caller-allocated
+ * ring.
+ */
 struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      unsigned int num,
 				      unsigned int vring_align,
@@ -68,7 +97,13 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      bool (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
 				      const char *name);
+
+/*
+ * Destroys a virtqueue.  If created with vring_create_virtqueue, this
+ * also frees the ring.
+ */
 void vring_del_virtqueue(struct virtqueue *vq);
+
 /* Filter out transport-specific feature bits. */
 void vring_transport_features(struct virtio_device *vdev);
 
-- 
cgit v1.2.3


From 77a8f0ad38844b5d4163424c4feb8abbbbb55af0 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sun, 17 Jan 2016 10:18:12 -0800
Subject: Input: rotary_encoder - convert to use gpiod API

Instead of using old GPIO API, let's switch to GPIOD API, which
automatically handles polarity.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 arch/arm/mach-pxa/raumfeld.c        | 21 +++++++++--
 drivers/input/misc/rotary_encoder.c | 74 +++++++++++++++++--------------------
 include/linux/rotary_encoder.h      |  4 --
 3 files changed, 50 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index 36571a9a44fe..bac88e0be58a 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -21,6 +21,7 @@
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/smsc911x.h>
 #include <linux/input.h>
 #include <linux/rotary_encoder.h>
@@ -366,14 +367,21 @@ static struct pxaohci_platform_data raumfeld_ohci_info = {
  * Rotary encoder input device
  */
 
+static struct gpiod_lookup_table raumfeld_rotary_gpios_table = {
+	.dev_id = "rotary-encoder.0",
+	.table = {
+		GPIO_LOOKUP_IDX("gpio-0",
+				GPIO_VOLENC_A, NULL, 0, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("gpio-0",
+				GPIO_VOLENC_B, NULL, 1, GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 static struct rotary_encoder_platform_data raumfeld_rotary_encoder_info = {
 	.steps		= 24,
 	.axis		= REL_X,
 	.relative_axis	= 1,
-	.gpio_a		= GPIO_VOLENC_A,
-	.gpio_b		= GPIO_VOLENC_B,
-	.inverted_a	= 1,
-	.inverted_b	= 0,
 };
 
 static struct platform_device rotary_encoder_device = {
@@ -1051,7 +1059,10 @@ static void __init raumfeld_controller_init(void)
 	int ret;
 
 	pxa3xx_mfp_config(ARRAY_AND_SIZE(raumfeld_controller_pin_config));
+
+	gpiod_add_lookup_table(&raumfeld_rotary_gpios_table);
 	platform_device_register(&rotary_encoder_device);
+
 	spi_register_board_info(ARRAY_AND_SIZE(controller_spi_devices));
 	i2c_register_board_info(0, &raumfeld_controller_i2c_board_info, 1);
 
@@ -1086,6 +1097,8 @@ static void __init raumfeld_speaker_init(void)
 	i2c_register_board_info(0, &raumfeld_connector_i2c_board_info, 1);
 
 	platform_device_register(&smc91x_device);
+
+	gpiod_add_lookup_table(&raumfeld_rotary_gpios_table);
 	platform_device_register(&rotary_encoder_device);
 
 	raumfeld_audio_init();
diff --git a/drivers/input/misc/rotary_encoder.c b/drivers/input/misc/rotary_encoder.c
index 713247146167..471175e5306b 100644
--- a/drivers/input/misc/rotary_encoder.c
+++ b/drivers/input/misc/rotary_encoder.c
@@ -20,12 +20,11 @@
 #include <linux/input.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/rotary_encoder.h>
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
-#include <linux/of_gpio.h>
 #include <linux/pm.h>
 
 #define DRV_NAME "rotary-encoder"
@@ -38,6 +37,9 @@ struct rotary_encoder {
 	unsigned int axis;
 	unsigned int pos;
 
+	struct gpio_desc *gpio_a;
+	struct gpio_desc *gpio_b;
+
 	unsigned int irq_a;
 	unsigned int irq_b;
 
@@ -47,13 +49,10 @@ struct rotary_encoder {
 	char last_stable;
 };
 
-static int rotary_encoder_get_state(const struct rotary_encoder_platform_data *pdata)
+static int rotary_encoder_get_state(struct rotary_encoder *encoder)
 {
-	int a = !!gpio_get_value_cansleep(pdata->gpio_a);
-	int b = !!gpio_get_value_cansleep(pdata->gpio_b);
-
-	a ^= pdata->inverted_a;
-	b ^= pdata->inverted_b;
+	int a = !!gpiod_get_value_cansleep(encoder->gpio_a);
+	int b = !!gpiod_get_value_cansleep(encoder->gpio_b);
 
 	return ((a << 1) | b);
 }
@@ -97,7 +96,7 @@ static irqreturn_t rotary_encoder_irq(int irq, void *dev_id)
 
 	mutex_lock(&encoder->access_mutex);
 
-	state = rotary_encoder_get_state(encoder->pdata);
+	state = rotary_encoder_get_state(encoder);
 
 	switch (state) {
 	case 0x0:
@@ -130,7 +129,7 @@ static irqreturn_t rotary_encoder_half_period_irq(int irq, void *dev_id)
 
 	mutex_lock(&encoder->access_mutex);
 
-	state = rotary_encoder_get_state(encoder->pdata);
+	state = rotary_encoder_get_state(encoder);
 
 	switch (state) {
 	case 0x00:
@@ -160,7 +159,7 @@ static irqreturn_t rotary_encoder_quarter_period_irq(int irq, void *dev_id)
 
 	mutex_lock(&encoder->access_mutex);
 
-	state = rotary_encoder_get_state(encoder->pdata);
+	state = rotary_encoder_get_state(encoder);
 
 	/*
 	 * We encode the previous and the current state using a byte.
@@ -218,7 +217,6 @@ static struct rotary_encoder_platform_data *rotary_encoder_parse_dt(struct devic
 				of_match_device(rotary_encoder_of_match, dev);
 	struct device_node *np = dev->of_node;
 	struct rotary_encoder_platform_data *pdata;
-	enum of_gpio_flags flags;
 	int error;
 
 	if (!of_id || !np)
@@ -232,12 +230,6 @@ static struct rotary_encoder_platform_data *rotary_encoder_parse_dt(struct devic
 	of_property_read_u32(np, "rotary-encoder,steps", &pdata->steps);
 	of_property_read_u32(np, "linux,axis", &pdata->axis);
 
-	pdata->gpio_a = of_get_gpio_flags(np, 0, &flags);
-	pdata->inverted_a = flags & OF_GPIO_ACTIVE_LOW;
-
-	pdata->gpio_b = of_get_gpio_flags(np, 1, &flags);
-	pdata->inverted_b = flags & OF_GPIO_ACTIVE_LOW;
-
 	pdata->relative_axis =
 		of_property_read_bool(np, "rotary-encoder,relative-axis");
 	pdata->rollover = of_property_read_bool(np, "rotary-encoder,rollover");
@@ -294,14 +286,32 @@ static int rotary_encoder_probe(struct platform_device *pdev)
 	if (!encoder)
 		return -ENOMEM;
 
+	mutex_init(&encoder->access_mutex);
+	encoder->pdata = pdata;
+
+	encoder->gpio_a = devm_gpiod_get_index(dev, NULL, 0, GPIOD_IN);
+	if (IS_ERR(encoder->gpio_a)) {
+		err = PTR_ERR(encoder->gpio_a);
+		dev_err(dev, "unable to get GPIO at index 0: %d\n", err);
+		return err;
+	}
+
+	encoder->irq_a = gpiod_to_irq(encoder->gpio_a);
+
+	encoder->gpio_b = devm_gpiod_get_index(dev, NULL, 1, GPIOD_IN);
+	if (IS_ERR(encoder->gpio_b)) {
+		err = PTR_ERR(encoder->gpio_b);
+		dev_err(dev, "unable to get GPIO at index 1: %d\n", err);
+		return err;
+	}
+
+	encoder->irq_b = gpiod_to_irq(encoder->gpio_b);
+
 	input = devm_input_allocate_device(dev);
 	if (!input)
 		return -ENOMEM;
 
-	mutex_init(&encoder->access_mutex);
-
 	encoder->input = input;
-	encoder->pdata = pdata;
 
 	input->name = pdev->name;
 	input->id.bustype = BUS_HOST;
@@ -316,32 +326,14 @@ static int rotary_encoder_probe(struct platform_device *pdev)
 				     pdata->axis, 0, pdata->steps, 0, 1);
 	}
 
-	/* request the GPIOs */
-	err = devm_gpio_request_one(dev, pdata->gpio_a, GPIOF_IN,
-				    dev_name(dev));
-	if (err) {
-		dev_err(dev, "unable to request GPIO %d\n", pdata->gpio_a);
-		return err;
-	}
-
-	err = devm_gpio_request_one(dev, pdata->gpio_b, GPIOF_IN,
-				    dev_name(dev));
-	if (err) {
-		dev_err(dev, "unable to request GPIO %d\n", pdata->gpio_b);
-		return err;
-	}
-
-	encoder->irq_a = gpio_to_irq(pdata->gpio_a);
-	encoder->irq_b = gpio_to_irq(pdata->gpio_b);
-
 	switch (pdata->steps_per_period) {
 	case 4:
 		handler = &rotary_encoder_quarter_period_irq;
-		encoder->last_stable = rotary_encoder_get_state(pdata);
+		encoder->last_stable = rotary_encoder_get_state(encoder);
 		break;
 	case 2:
 		handler = &rotary_encoder_half_period_irq;
-		encoder->last_stable = rotary_encoder_get_state(pdata);
+		encoder->last_stable = rotary_encoder_get_state(encoder);
 		break;
 	case 1:
 		handler = &rotary_encoder_irq;
diff --git a/include/linux/rotary_encoder.h b/include/linux/rotary_encoder.h
index fe3dc64e5aeb..4536c813a1e9 100644
--- a/include/linux/rotary_encoder.h
+++ b/include/linux/rotary_encoder.h
@@ -4,10 +4,6 @@
 struct rotary_encoder_platform_data {
 	unsigned int steps;
 	unsigned int axis;
-	unsigned int gpio_a;
-	unsigned int gpio_b;
-	unsigned int inverted_a;
-	unsigned int inverted_b;
 	unsigned int steps_per_period;
 	bool relative_axis;
 	bool rollover;
-- 
cgit v1.2.3


From a67dd266adf42a24df31380e9da78390bb4d65ef Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 25 Feb 2016 10:08:35 +0100
Subject: netfilter: xtables: prepare for on-demand hook register

This change prepares for upcoming on-demand xtables hook registration.

We change the protoypes of the register/unregister functions.
A followup patch will then add nf_hook_register/unregister calls
to the iptables one.

Once a hook is registered packets will be picked up, so all assignments
of the form

net->ipv4.iptable_$table = new_table

have to be moved to ip(6)t_register_table, else we can see NULL
net->ipv4.iptable_$table later.

This patch doesn't change functionality; without this the actual change
simply gets too big.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_arp/arp_tables.h  |  9 +++++----
 include/linux/netfilter_ipv4/ip_tables.h  |  9 +++++----
 include/linux/netfilter_ipv6/ip6_tables.h |  9 +++++----
 net/ipv4/netfilter/arp_tables.c           | 25 ++++++++++++++-----------
 net/ipv4/netfilter/arptable_filter.c      | 11 ++++++-----
 net/ipv4/netfilter/ip_tables.c            | 21 ++++++++++-----------
 net/ipv4/netfilter/iptable_filter.c       |  9 +++++----
 net/ipv4/netfilter/iptable_mangle.c       |  9 +++++----
 net/ipv4/netfilter/iptable_nat.c          |  8 +++++---
 net/ipv4/netfilter/iptable_raw.c          |  9 +++++----
 net/ipv4/netfilter/iptable_security.c     |  9 +++++----
 net/ipv6/netfilter/ip6_tables.c           | 23 ++++++++++++-----------
 net/ipv6/netfilter/ip6table_filter.c      |  9 +++++----
 net/ipv6/netfilter/ip6table_mangle.c      |  9 +++++----
 net/ipv6/netfilter/ip6table_nat.c         |  8 +++++---
 net/ipv6/netfilter/ip6table_raw.c         |  9 +++++----
 net/ipv6/netfilter/ip6table_security.c    |  9 +++++----
 17 files changed, 107 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 6f074db2f23d..029b95e8924e 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -48,10 +48,11 @@ struct arpt_error {
 }
 
 extern void *arpt_alloc_initial_table(const struct xt_table *);
-extern struct xt_table *arpt_register_table(struct net *net,
-					    const struct xt_table *table,
-					    const struct arpt_replace *repl);
-extern void arpt_unregister_table(struct xt_table *table);
+int arpt_register_table(struct net *net, const struct xt_table *table,
+			const struct arpt_replace *repl,
+			const struct nf_hook_ops *ops, struct xt_table **res);
+void arpt_unregister_table(struct net *net, struct xt_table *table,
+			   const struct nf_hook_ops *ops);
 extern unsigned int arpt_do_table(struct sk_buff *skb,
 				  const struct nf_hook_state *state,
 				  struct xt_table *table);
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index aa598f942c01..7bfc5893ec31 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -24,10 +24,11 @@
 
 extern void ipt_init(void) __init;
 
-extern struct xt_table *ipt_register_table(struct net *net,
-					   const struct xt_table *table,
-					   const struct ipt_replace *repl);
-extern void ipt_unregister_table(struct net *net, struct xt_table *table);
+int ipt_register_table(struct net *net, const struct xt_table *table,
+		       const struct ipt_replace *repl,
+		       const struct nf_hook_ops *ops, struct xt_table **res);
+void ipt_unregister_table(struct net *net, struct xt_table *table,
+			  const struct nf_hook_ops *ops);
 
 /* Standard entry. */
 struct ipt_standard {
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 0f76e5c674f9..b21c392d6012 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -25,10 +25,11 @@
 extern void ip6t_init(void) __init;
 
 extern void *ip6t_alloc_initial_table(const struct xt_table *);
-extern struct xt_table *ip6t_register_table(struct net *net,
-					    const struct xt_table *table,
-					    const struct ip6t_replace *repl);
-extern void ip6t_unregister_table(struct net *net, struct xt_table *table);
+int ip6t_register_table(struct net *net, const struct xt_table *table,
+			const struct ip6t_replace *repl,
+			const struct nf_hook_ops *ops, struct xt_table **res);
+void ip6t_unregister_table(struct net *net, struct xt_table *table,
+			   const struct nf_hook_ops *ops);
 extern unsigned int ip6t_do_table(struct sk_buff *skb,
 				  const struct nf_hook_state *state,
 				  struct xt_table *table);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b488cac9c5ca..00eed0852dfc 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1780,9 +1780,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 	return ret;
 }
 
-struct xt_table *arpt_register_table(struct net *net,
-				     const struct xt_table *table,
-				     const struct arpt_replace *repl)
+int arpt_register_table(struct net *net,
+			const struct xt_table *table,
+			const struct arpt_replace *repl,
+			const struct nf_hook_ops *ops,
+			struct xt_table **res)
 {
 	int ret;
 	struct xt_table_info *newinfo;
@@ -1791,10 +1793,8 @@ struct xt_table *arpt_register_table(struct net *net,
 	struct xt_table *new_table;
 
 	newinfo = xt_alloc_table_info(repl->size);
-	if (!newinfo) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!newinfo)
+		return -ENOMEM;
 
 	loc_cpu_entry = newinfo->entries;
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
@@ -1809,15 +1809,18 @@ struct xt_table *arpt_register_table(struct net *net,
 		ret = PTR_ERR(new_table);
 		goto out_free;
 	}
-	return new_table;
+
+	WRITE_ONCE(*res, new_table);
+
+	return ret;
 
 out_free:
 	xt_free_table_info(newinfo);
-out:
-	return ERR_PTR(ret);
+	return ret;
 }
 
-void arpt_unregister_table(struct xt_table *table)
+void arpt_unregister_table(struct net *net, struct xt_table *table,
+			   const struct nf_hook_ops *ops)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 1897ee160920..4c0241692576 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -38,19 +38,20 @@ static struct nf_hook_ops *arpfilter_ops __read_mostly;
 static int __net_init arptable_filter_net_init(struct net *net)
 {
 	struct arpt_replace *repl;
-	
+	int err;
+
 	repl = arpt_alloc_initial_table(&packet_filter);
 	if (repl == NULL)
 		return -ENOMEM;
-	net->ipv4.arptable_filter =
-		arpt_register_table(net, &packet_filter, repl);
+	err = arpt_register_table(net, &packet_filter, repl, arpfilter_ops,
+				  &net->ipv4.arptable_filter);
 	kfree(repl);
-	return PTR_ERR_OR_ZERO(net->ipv4.arptable_filter);
+	return err;
 }
 
 static void __net_exit arptable_filter_net_exit(struct net *net)
 {
-	arpt_unregister_table(net->ipv4.arptable_filter);
+	arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops);
 }
 
 static struct pernet_operations arptable_filter_net_ops = {
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b99affad6ba1..1eb4fe5b4702 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2062,9 +2062,9 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	return ret;
 }
 
-struct xt_table *ipt_register_table(struct net *net,
-				    const struct xt_table *table,
-				    const struct ipt_replace *repl)
+int ipt_register_table(struct net *net, const struct xt_table *table,
+		       const struct ipt_replace *repl,
+		       const struct nf_hook_ops *ops, struct xt_table **res)
 {
 	int ret;
 	struct xt_table_info *newinfo;
@@ -2073,10 +2073,8 @@ struct xt_table *ipt_register_table(struct net *net,
 	struct xt_table *new_table;
 
 	newinfo = xt_alloc_table_info(repl->size);
-	if (!newinfo) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!newinfo)
+		return -ENOMEM;
 
 	loc_cpu_entry = newinfo->entries;
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
@@ -2091,15 +2089,16 @@ struct xt_table *ipt_register_table(struct net *net,
 		goto out_free;
 	}
 
-	return new_table;
+	WRITE_ONCE(*res, new_table);
+	return ret;
 
 out_free:
 	xt_free_table_info(newinfo);
-out:
-	return ERR_PTR(ret);
+	return ret;
 }
 
-void ipt_unregister_table(struct net *net, struct xt_table *table)
+void ipt_unregister_table(struct net *net, struct xt_table *table,
+			  const struct nf_hook_ops *ops)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 397ef2dd133e..3fbe4acacb27 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -54,6 +54,7 @@ module_param(forward, bool, 0000);
 static int __net_init iptable_filter_net_init(struct net *net)
 {
 	struct ipt_replace *repl;
+	int err;
 
 	repl = ipt_alloc_initial_table(&packet_filter);
 	if (repl == NULL)
@@ -62,15 +63,15 @@ static int __net_init iptable_filter_net_init(struct net *net)
 	((struct ipt_standard *)repl->entries)[1].target.verdict =
 		forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
 
-	net->ipv4.iptable_filter =
-		ipt_register_table(net, &packet_filter, repl);
+	err = ipt_register_table(net, &packet_filter, repl, filter_ops,
+				 &net->ipv4.iptable_filter);
 	kfree(repl);
-	return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter);
+	return err;
 }
 
 static void __net_exit iptable_filter_net_exit(struct net *net)
 {
-	ipt_unregister_table(net, net->ipv4.iptable_filter);
+	ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops);
 }
 
 static struct pernet_operations iptable_filter_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index ba5d392a13c4..668e79166b81 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -96,19 +96,20 @@ static struct nf_hook_ops *mangle_ops __read_mostly;
 static int __net_init iptable_mangle_net_init(struct net *net)
 {
 	struct ipt_replace *repl;
+	int ret;
 
 	repl = ipt_alloc_initial_table(&packet_mangler);
 	if (repl == NULL)
 		return -ENOMEM;
-	net->ipv4.iptable_mangle =
-		ipt_register_table(net, &packet_mangler, repl);
+	ret = ipt_register_table(net, &packet_mangler, repl, mangle_ops,
+				 &net->ipv4.iptable_mangle);
 	kfree(repl);
-	return PTR_ERR_OR_ZERO(net->ipv4.iptable_mangle);
+	return ret;
 }
 
 static void __net_exit iptable_mangle_net_exit(struct net *net)
 {
-	ipt_unregister_table(net, net->ipv4.iptable_mangle);
+	ipt_unregister_table(net, net->ipv4.iptable_mangle, mangle_ops);
 }
 
 static struct pernet_operations iptable_mangle_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index ae2cd2752046..e984f1d3017f 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -98,18 +98,20 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
 static int __net_init iptable_nat_net_init(struct net *net)
 {
 	struct ipt_replace *repl;
+	int ret;
 
 	repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
 	if (repl == NULL)
 		return -ENOMEM;
-	net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
+	ret = ipt_register_table(net, &nf_nat_ipv4_table, repl,
+				 nf_nat_ipv4_ops, &net->ipv4.nat_table);
 	kfree(repl);
-	return PTR_ERR_OR_ZERO(net->ipv4.nat_table);
+	return ret;
 }
 
 static void __net_exit iptable_nat_net_exit(struct net *net)
 {
-	ipt_unregister_table(net, net->ipv4.nat_table);
+	ipt_unregister_table(net, net->ipv4.nat_table, nf_nat_ipv4_ops);
 }
 
 static struct pernet_operations iptable_nat_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 1ba02811acb0..9d78780a9036 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -37,19 +37,20 @@ static struct nf_hook_ops *rawtable_ops __read_mostly;
 static int __net_init iptable_raw_net_init(struct net *net)
 {
 	struct ipt_replace *repl;
+	int ret;
 
 	repl = ipt_alloc_initial_table(&packet_raw);
 	if (repl == NULL)
 		return -ENOMEM;
-	net->ipv4.iptable_raw =
-		ipt_register_table(net, &packet_raw, repl);
+	ret = ipt_register_table(net, &packet_raw, repl, rawtable_ops,
+				 &net->ipv4.iptable_raw);
 	kfree(repl);
-	return PTR_ERR_OR_ZERO(net->ipv4.iptable_raw);
+	return ret;
 }
 
 static void __net_exit iptable_raw_net_exit(struct net *net)
 {
-	ipt_unregister_table(net, net->ipv4.iptable_raw);
+	ipt_unregister_table(net, net->ipv4.iptable_raw, rawtable_ops);
 }
 
 static struct pernet_operations iptable_raw_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index c2e23d5e9cd4..88bc52fb8f4a 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -54,19 +54,20 @@ static struct nf_hook_ops *sectbl_ops __read_mostly;
 static int __net_init iptable_security_net_init(struct net *net)
 {
 	struct ipt_replace *repl;
+	int ret;
 
 	repl = ipt_alloc_initial_table(&security_table);
 	if (repl == NULL)
 		return -ENOMEM;
-	net->ipv4.iptable_security =
-		ipt_register_table(net, &security_table, repl);
+	ret = ipt_register_table(net, &security_table, repl, sectbl_ops,
+				 &net->ipv4.iptable_security);
 	kfree(repl);
-	return PTR_ERR_OR_ZERO(net->ipv4.iptable_security);
+	return ret;
 }
 
 static void __net_exit iptable_security_net_exit(struct net *net)
 {
-	ipt_unregister_table(net, net->ipv4.iptable_security);
+	ipt_unregister_table(net, net->ipv4.iptable_security, sectbl_ops);
 }
 
 static struct pernet_operations iptable_security_net_ops = {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 99425cf2819b..052d7447b52e 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2071,9 +2071,10 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	return ret;
 }
 
-struct xt_table *ip6t_register_table(struct net *net,
-				     const struct xt_table *table,
-				     const struct ip6t_replace *repl)
+int ip6t_register_table(struct net *net, const struct xt_table *table,
+			const struct ip6t_replace *repl,
+			const struct nf_hook_ops *ops,
+			struct xt_table **res)
 {
 	int ret;
 	struct xt_table_info *newinfo;
@@ -2082,10 +2083,8 @@ struct xt_table *ip6t_register_table(struct net *net,
 	struct xt_table *new_table;
 
 	newinfo = xt_alloc_table_info(repl->size);
-	if (!newinfo) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!newinfo)
+		return -ENOMEM;
 
 	loc_cpu_entry = newinfo->entries;
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
@@ -2099,15 +2098,17 @@ struct xt_table *ip6t_register_table(struct net *net,
 		ret = PTR_ERR(new_table);
 		goto out_free;
 	}
-	return new_table;
+
+	WRITE_ONCE(*res, new_table);
+	return ret;
 
 out_free:
 	xt_free_table_info(newinfo);
-out:
-	return ERR_PTR(ret);
+	return ret;
 }
 
-void ip6t_unregister_table(struct net *net, struct xt_table *table)
+void ip6t_unregister_table(struct net *net, struct xt_table *table,
+			   const struct nf_hook_ops *ops)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 8b277b983ca5..d191d54cdf50 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -47,6 +47,7 @@ module_param(forward, bool, 0000);
 static int __net_init ip6table_filter_net_init(struct net *net)
 {
 	struct ip6t_replace *repl;
+	int err;
 
 	repl = ip6t_alloc_initial_table(&packet_filter);
 	if (repl == NULL)
@@ -55,15 +56,15 @@ static int __net_init ip6table_filter_net_init(struct net *net)
 	((struct ip6t_standard *)repl->entries)[1].target.verdict =
 		forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
 
-	net->ipv6.ip6table_filter =
-		ip6t_register_table(net, &packet_filter, repl);
+	err = ip6t_register_table(net, &packet_filter, repl, filter_ops,
+				  &net->ipv6.ip6table_filter);
 	kfree(repl);
-	return PTR_ERR_OR_ZERO(net->ipv6.ip6table_filter);
+	return err;
 }
 
 static void __net_exit ip6table_filter_net_exit(struct net *net)
 {
-	ip6t_unregister_table(net, net->ipv6.ip6table_filter);
+	ip6t_unregister_table(net, net->ipv6.ip6table_filter, filter_ops);
 }
 
 static struct pernet_operations ip6table_filter_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index abe278b07932..fe43d08284bc 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -91,19 +91,20 @@ static struct nf_hook_ops *mangle_ops __read_mostly;
 static int __net_init ip6table_mangle_net_init(struct net *net)
 {
 	struct ip6t_replace *repl;
+	int ret;
 
 	repl = ip6t_alloc_initial_table(&packet_mangler);
 	if (repl == NULL)
 		return -ENOMEM;
-	net->ipv6.ip6table_mangle =
-		ip6t_register_table(net, &packet_mangler, repl);
+	ret = ip6t_register_table(net, &packet_mangler, repl, mangle_ops,
+				  &net->ipv6.ip6table_mangle);
 	kfree(repl);
-	return PTR_ERR_OR_ZERO(net->ipv6.ip6table_mangle);
+	return ret;
 }
 
 static void __net_exit ip6table_mangle_net_exit(struct net *net)
 {
-	ip6t_unregister_table(net, net->ipv6.ip6table_mangle);
+	ip6t_unregister_table(net, net->ipv6.ip6table_mangle, mangle_ops);
 }
 
 static struct pernet_operations ip6table_mangle_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index de2a10a565f5..7f9740e8ef47 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -100,18 +100,20 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
 static int __net_init ip6table_nat_net_init(struct net *net)
 {
 	struct ip6t_replace *repl;
+	int ret;
 
 	repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
 	if (repl == NULL)
 		return -ENOMEM;
-	net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
+	ret = ip6t_register_table(net, &nf_nat_ipv6_table, repl,
+				  nf_nat_ipv6_ops, &net->ipv6.ip6table_nat);
 	kfree(repl);
-	return PTR_ERR_OR_ZERO(net->ipv6.ip6table_nat);
+	return ret;
 }
 
 static void __net_exit ip6table_nat_net_exit(struct net *net)
 {
-	ip6t_unregister_table(net, net->ipv6.ip6table_nat);
+	ip6t_unregister_table(net, net->ipv6.ip6table_nat, nf_nat_ipv6_ops);
 }
 
 static struct pernet_operations ip6table_nat_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 9021963565c3..5fac433da069 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -30,19 +30,20 @@ static struct nf_hook_ops *rawtable_ops __read_mostly;
 static int __net_init ip6table_raw_net_init(struct net *net)
 {
 	struct ip6t_replace *repl;
+	int ret;
 
 	repl = ip6t_alloc_initial_table(&packet_raw);
 	if (repl == NULL)
 		return -ENOMEM;
-	net->ipv6.ip6table_raw =
-		ip6t_register_table(net, &packet_raw, repl);
+	ret = ip6t_register_table(net, &packet_raw, repl, rawtable_ops,
+				  &net->ipv6.ip6table_raw);
 	kfree(repl);
-	return PTR_ERR_OR_ZERO(net->ipv6.ip6table_raw);
+	return ret;
 }
 
 static void __net_exit ip6table_raw_net_exit(struct net *net)
 {
-	ip6t_unregister_table(net, net->ipv6.ip6table_raw);
+	ip6t_unregister_table(net, net->ipv6.ip6table_raw, rawtable_ops);
 }
 
 static struct pernet_operations ip6table_raw_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 0d856fedfeb0..cf587453e322 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -47,19 +47,20 @@ static struct nf_hook_ops *sectbl_ops __read_mostly;
 static int __net_init ip6table_security_net_init(struct net *net)
 {
 	struct ip6t_replace *repl;
+	int ret;
 
 	repl = ip6t_alloc_initial_table(&security_table);
 	if (repl == NULL)
 		return -ENOMEM;
-	net->ipv6.ip6table_security =
-		ip6t_register_table(net, &security_table, repl);
+	ret = ip6t_register_table(net, &security_table, repl, sectbl_ops,
+				  &net->ipv6.ip6table_security);
 	kfree(repl);
-	return PTR_ERR_OR_ZERO(net->ipv6.ip6table_security);
+	return ret;
 }
 
 static void __net_exit ip6table_security_net_exit(struct net *net)
 {
-	ip6t_unregister_table(net, net->ipv6.ip6table_security);
+	ip6t_unregister_table(net, net->ipv6.ip6table_security, sectbl_ops);
 }
 
 static struct pernet_operations ip6table_security_net_ops = {
-- 
cgit v1.2.3


From b9e69e127397187b70c813a4397cce7afb5e8cb1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 25 Feb 2016 10:08:36 +0100
Subject: netfilter: xtables: don't hook tables by default

delay hook registration until the table is being requested inside a
namespace.

Historically, a particular table (iptables mangle, ip6tables filter, etc)
was registered on module load.

When netns support was added to iptables only the ip/ip6tables ruleset was
made namespace aware, not the actual hook points.

This means f.e. that when ipt_filter table/module is loaded on a system,
then each namespace on that system has an (empty) iptables filter ruleset.

In other words, if a namespace sends a packet, such skb is 'caught' by
netfilter machinery and fed to hooking points for that table (i.e. INPUT,
FORWARD, etc).

Thanks to Eric Biederman, hooks are no longer global, but per namespace.

This means that we can avoid allocation of empty ruleset in a namespace and
defer hook registration until we need the functionality.

We register a tables hook entry points ONLY in the initial namespace.
When an iptables get/setockopt is issued inside a given namespace, we check
if the table is found in the per-namespace list.

If not, we attempt to find it in the initial namespace, and, if found,
create an empty default table in the requesting namespace and register the
needed hooks.

Hook points are destroyed only once namespace is deleted, there is no
'usage count' (it makes no sense since there is no 'remove table' operation
in xtables api).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h     |  6 ++--
 net/ipv4/netfilter/arp_tables.c        | 41 +++++++++++++--------
 net/ipv4/netfilter/arptable_filter.c   | 29 ++++++++-------
 net/ipv4/netfilter/ip_tables.c         | 42 ++++++++++++++--------
 net/ipv4/netfilter/iptable_filter.c    | 35 ++++++++++++------
 net/ipv4/netfilter/iptable_mangle.c    | 32 ++++++++++++-----
 net/ipv4/netfilter/iptable_nat.c       | 33 ++++++++---------
 net/ipv4/netfilter/iptable_raw.c       | 29 ++++++++++-----
 net/ipv4/netfilter/iptable_security.c  | 35 +++++++++++-------
 net/ipv6/netfilter/ip6_tables.c        | 42 ++++++++++++++--------
 net/ipv6/netfilter/ip6table_filter.c   | 38 ++++++++++++--------
 net/ipv6/netfilter/ip6table_mangle.c   | 37 +++++++++++--------
 net/ipv6/netfilter/ip6table_nat.c      | 33 ++++++++---------
 net/ipv6/netfilter/ip6table_raw.c      | 37 +++++++++++--------
 net/ipv6/netfilter/ip6table_security.c | 35 +++++++++++-------
 net/netfilter/x_tables.c               | 65 +++++++++++++++++++++-------------
 16 files changed, 361 insertions(+), 208 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index c5577410c25d..80a305b85323 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -200,6 +200,9 @@ struct xt_table {
 	u_int8_t af;		/* address/protocol family */
 	int priority;		/* hook order */
 
+	/* called when table is needed in the given netns */
+	int (*table_init)(struct net *net);
+
 	/* A unique name... */
 	const char name[XT_TABLE_MAXNAMELEN];
 };
@@ -408,8 +411,7 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
 	return cnt;
 }
 
-struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
-void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
+struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
 
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 00eed0852dfc..bf081927e06b 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1780,6 +1780,24 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 	return ret;
 }
 
+static void __arpt_unregister_table(struct xt_table *table)
+{
+	struct xt_table_info *private;
+	void *loc_cpu_entry;
+	struct module *table_owner = table->me;
+	struct arpt_entry *iter;
+
+	private = xt_unregister_table(table);
+
+	/* Decrease module usage counts and free resources */
+	loc_cpu_entry = private->entries;
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		cleanup_entry(iter);
+	if (private->number > private->initial_entries)
+		module_put(table_owner);
+	xt_free_table_info(private);
+}
+
 int arpt_register_table(struct net *net,
 			const struct xt_table *table,
 			const struct arpt_replace *repl,
@@ -1810,8 +1828,15 @@ int arpt_register_table(struct net *net,
 		goto out_free;
 	}
 
+	/* set res now, will see skbs right after nf_register_net_hooks */
 	WRITE_ONCE(*res, new_table);
 
+	ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
+	if (ret != 0) {
+		__arpt_unregister_table(new_table);
+		*res = NULL;
+	}
+
 	return ret;
 
 out_free:
@@ -1822,20 +1847,8 @@ out_free:
 void arpt_unregister_table(struct net *net, struct xt_table *table,
 			   const struct nf_hook_ops *ops)
 {
-	struct xt_table_info *private;
-	void *loc_cpu_entry;
-	struct module *table_owner = table->me;
-	struct arpt_entry *iter;
-
-	private = xt_unregister_table(table);
-
-	/* Decrease module usage counts and free resources */
-	loc_cpu_entry = private->entries;
-	xt_entry_foreach(iter, loc_cpu_entry, private->size)
-		cleanup_entry(iter);
-	if (private->number > private->initial_entries)
-		module_put(table_owner);
-	xt_free_table_info(private);
+	nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+	__arpt_unregister_table(table);
 }
 
 /* The built-in targets: standard (NULL) and error. */
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 4c0241692576..dd8c80dc32a2 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -17,12 +17,15 @@ MODULE_DESCRIPTION("arptables filter table");
 #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
 			   (1 << NF_ARP_FORWARD))
 
+static int __net_init arptable_filter_table_init(struct net *net);
+
 static const struct xt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_ARP,
 	.priority	= NF_IP_PRI_FILTER,
+	.table_init	= arptable_filter_table_init,
 };
 
 /* The work comes in here from netfilter.c */
@@ -35,11 +38,14 @@ arptable_filter_hook(void *priv, struct sk_buff *skb,
 
 static struct nf_hook_ops *arpfilter_ops __read_mostly;
 
-static int __net_init arptable_filter_net_init(struct net *net)
+static int __net_init arptable_filter_table_init(struct net *net)
 {
 	struct arpt_replace *repl;
 	int err;
 
+	if (net->ipv4.arptable_filter)
+		return 0;
+
 	repl = arpt_alloc_initial_table(&packet_filter);
 	if (repl == NULL)
 		return -ENOMEM;
@@ -51,11 +57,13 @@ static int __net_init arptable_filter_net_init(struct net *net)
 
 static void __net_exit arptable_filter_net_exit(struct net *net)
 {
+	if (!net->ipv4.arptable_filter)
+		return;
 	arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops);
+	net->ipv4.arptable_filter = NULL;
 }
 
 static struct pernet_operations arptable_filter_net_ops = {
-	.init = arptable_filter_net_init,
 	.exit = arptable_filter_net_exit,
 };
 
@@ -63,26 +71,23 @@ static int __init arptable_filter_init(void)
 {
 	int ret;
 
+	arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook);
+	if (IS_ERR(arpfilter_ops))
+		return PTR_ERR(arpfilter_ops);
+
 	ret = register_pernet_subsys(&arptable_filter_net_ops);
-	if (ret < 0)
+	if (ret < 0) {
+		kfree(arpfilter_ops);
 		return ret;
-
-	arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook);
-	if (IS_ERR(arpfilter_ops)) {
-		ret = PTR_ERR(arpfilter_ops);
-		goto cleanup_table;
 	}
-	return ret;
 
-cleanup_table:
-	unregister_pernet_subsys(&arptable_filter_net_ops);
 	return ret;
 }
 
 static void __exit arptable_filter_fini(void)
 {
-	xt_hook_unlink(&packet_filter, arpfilter_ops);
 	unregister_pernet_subsys(&arptable_filter_net_ops);
+	kfree(arpfilter_ops);
 }
 
 module_init(arptable_filter_init);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 1eb4fe5b4702..e53f8d6f326d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2062,6 +2062,24 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	return ret;
 }
 
+static void __ipt_unregister_table(struct net *net, struct xt_table *table)
+{
+	struct xt_table_info *private;
+	void *loc_cpu_entry;
+	struct module *table_owner = table->me;
+	struct ipt_entry *iter;
+
+	private = xt_unregister_table(table);
+
+	/* Decrease module usage counts and free resources */
+	loc_cpu_entry = private->entries;
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		cleanup_entry(iter, net);
+	if (private->number > private->initial_entries)
+		module_put(table_owner);
+	xt_free_table_info(private);
+}
+
 int ipt_register_table(struct net *net, const struct xt_table *table,
 		       const struct ipt_replace *repl,
 		       const struct nf_hook_ops *ops, struct xt_table **res)
@@ -2089,7 +2107,15 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
 		goto out_free;
 	}
 
+	/* set res now, will see skbs right after nf_register_net_hooks */
 	WRITE_ONCE(*res, new_table);
+
+	ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
+	if (ret != 0) {
+		__ipt_unregister_table(net, new_table);
+		*res = NULL;
+	}
+
 	return ret;
 
 out_free:
@@ -2100,20 +2126,8 @@ out_free:
 void ipt_unregister_table(struct net *net, struct xt_table *table,
 			  const struct nf_hook_ops *ops)
 {
-	struct xt_table_info *private;
-	void *loc_cpu_entry;
-	struct module *table_owner = table->me;
-	struct ipt_entry *iter;
-
-	private = xt_unregister_table(table);
-
-	/* Decrease module usage counts and free resources */
-	loc_cpu_entry = private->entries;
-	xt_entry_foreach(iter, loc_cpu_entry, private->size)
-		cleanup_entry(iter, net);
-	if (private->number > private->initial_entries)
-		module_put(table_owner);
-	xt_free_table_info(private);
+	nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+	__ipt_unregister_table(net, table);
 }
 
 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 3fbe4acacb27..7667f223d7f8 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -23,6 +23,7 @@ MODULE_DESCRIPTION("iptables filter table");
 #define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
 			    (1 << NF_INET_FORWARD) | \
 			    (1 << NF_INET_LOCAL_OUT))
+static int __net_init iptable_filter_table_init(struct net *net);
 
 static const struct xt_table packet_filter = {
 	.name		= "filter",
@@ -30,6 +31,7 @@ static const struct xt_table packet_filter = {
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV4,
 	.priority	= NF_IP_PRI_FILTER,
+	.table_init	= iptable_filter_table_init,
 };
 
 static unsigned int
@@ -48,14 +50,17 @@ iptable_filter_hook(void *priv, struct sk_buff *skb,
 static struct nf_hook_ops *filter_ops __read_mostly;
 
 /* Default to forward because I got too much mail already. */
-static bool forward = true;
+static bool forward __read_mostly = true;
 module_param(forward, bool, 0000);
 
-static int __net_init iptable_filter_net_init(struct net *net)
+static int __net_init iptable_filter_table_init(struct net *net)
 {
 	struct ipt_replace *repl;
 	int err;
 
+	if (net->ipv4.iptable_filter)
+		return 0;
+
 	repl = ipt_alloc_initial_table(&packet_filter);
 	if (repl == NULL)
 		return -ENOMEM;
@@ -69,9 +74,20 @@ static int __net_init iptable_filter_net_init(struct net *net)
 	return err;
 }
 
+static int __net_init iptable_filter_net_init(struct net *net)
+{
+	if (net == &init_net || !forward)
+		return iptable_filter_table_init(net);
+
+	return 0;
+}
+
 static void __net_exit iptable_filter_net_exit(struct net *net)
 {
+	if (!net->ipv4.iptable_filter)
+		return;
 	ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops);
+	net->ipv4.iptable_filter = NULL;
 }
 
 static struct pernet_operations iptable_filter_net_ops = {
@@ -83,24 +99,21 @@ static int __init iptable_filter_init(void)
 {
 	int ret;
 
+	filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
+	if (IS_ERR(filter_ops))
+		return PTR_ERR(filter_ops);
+
 	ret = register_pernet_subsys(&iptable_filter_net_ops);
 	if (ret < 0)
-		return ret;
-
-	/* Register hooks */
-	filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
-	if (IS_ERR(filter_ops)) {
-		ret = PTR_ERR(filter_ops);
-		unregister_pernet_subsys(&iptable_filter_net_ops);
-	}
+		kfree(filter_ops);
 
 	return ret;
 }
 
 static void __exit iptable_filter_fini(void)
 {
-	xt_hook_unlink(&packet_filter, filter_ops);
 	unregister_pernet_subsys(&iptable_filter_net_ops);
+	kfree(filter_ops);
 }
 
 module_init(iptable_filter_init);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 668e79166b81..57fc97cdac70 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -28,12 +28,15 @@ MODULE_DESCRIPTION("iptables mangle table");
 			    (1 << NF_INET_LOCAL_OUT) | \
 			    (1 << NF_INET_POST_ROUTING))
 
+static int __net_init iptable_mangle_table_init(struct net *net);
+
 static const struct xt_table packet_mangler = {
 	.name		= "mangle",
 	.valid_hooks	= MANGLE_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV4,
 	.priority	= NF_IP_PRI_MANGLE,
+	.table_init	= iptable_mangle_table_init,
 };
 
 static unsigned int
@@ -92,12 +95,14 @@ iptable_mangle_hook(void *priv,
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
-
-static int __net_init iptable_mangle_net_init(struct net *net)
+static int __net_init iptable_mangle_table_init(struct net *net)
 {
 	struct ipt_replace *repl;
 	int ret;
 
+	if (net->ipv4.iptable_mangle)
+		return 0;
+
 	repl = ipt_alloc_initial_table(&packet_mangler);
 	if (repl == NULL)
 		return -ENOMEM;
@@ -109,11 +114,13 @@ static int __net_init iptable_mangle_net_init(struct net *net)
 
 static void __net_exit iptable_mangle_net_exit(struct net *net)
 {
+	if (!net->ipv4.iptable_mangle)
+		return;
 	ipt_unregister_table(net, net->ipv4.iptable_mangle, mangle_ops);
+	net->ipv4.iptable_mangle = NULL;
 }
 
 static struct pernet_operations iptable_mangle_net_ops = {
-	.init = iptable_mangle_net_init,
 	.exit = iptable_mangle_net_exit,
 };
 
@@ -121,15 +128,22 @@ static int __init iptable_mangle_init(void)
 {
 	int ret;
 
+	mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook);
+	if (IS_ERR(mangle_ops)) {
+		ret = PTR_ERR(mangle_ops);
+		return ret;
+	}
+
 	ret = register_pernet_subsys(&iptable_mangle_net_ops);
-	if (ret < 0)
+	if (ret < 0) {
+		kfree(mangle_ops);
 		return ret;
+	}
 
-	/* Register hooks */
-	mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook);
-	if (IS_ERR(mangle_ops)) {
-		ret = PTR_ERR(mangle_ops);
+	ret = iptable_mangle_table_init(&init_net);
+	if (ret) {
 		unregister_pernet_subsys(&iptable_mangle_net_ops);
+		kfree(mangle_ops);
 	}
 
 	return ret;
@@ -137,8 +151,8 @@ static int __init iptable_mangle_init(void)
 
 static void __exit iptable_mangle_fini(void)
 {
-	xt_hook_unlink(&packet_mangler, mangle_ops);
 	unregister_pernet_subsys(&iptable_mangle_net_ops);
+	kfree(mangle_ops);
 }
 
 module_init(iptable_mangle_init);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index e984f1d3017f..138a24bc76ad 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -18,6 +18,8 @@
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_l3proto.h>
 
+static int __net_init iptable_nat_table_init(struct net *net);
+
 static const struct xt_table nf_nat_ipv4_table = {
 	.name		= "nat",
 	.valid_hooks	= (1 << NF_INET_PRE_ROUTING) |
@@ -26,6 +28,7 @@ static const struct xt_table nf_nat_ipv4_table = {
 			  (1 << NF_INET_LOCAL_IN),
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV4,
+	.table_init	= iptable_nat_table_init,
 };
 
 static unsigned int iptable_nat_do_chain(void *priv,
@@ -95,11 +98,14 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
 	},
 };
 
-static int __net_init iptable_nat_net_init(struct net *net)
+static int __net_init iptable_nat_table_init(struct net *net)
 {
 	struct ipt_replace *repl;
 	int ret;
 
+	if (net->ipv4.nat_table)
+		return 0;
+
 	repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
 	if (repl == NULL)
 		return -ENOMEM;
@@ -111,36 +117,31 @@ static int __net_init iptable_nat_net_init(struct net *net)
 
 static void __net_exit iptable_nat_net_exit(struct net *net)
 {
+	if (!net->ipv4.nat_table)
+		return;
 	ipt_unregister_table(net, net->ipv4.nat_table, nf_nat_ipv4_ops);
+	net->ipv4.nat_table = NULL;
 }
 
 static struct pernet_operations iptable_nat_net_ops = {
-	.init	= iptable_nat_net_init,
 	.exit	= iptable_nat_net_exit,
 };
 
 static int __init iptable_nat_init(void)
 {
-	int err;
-
-	err = register_pernet_subsys(&iptable_nat_net_ops);
-	if (err < 0)
-		goto err1;
+	int ret = register_pernet_subsys(&iptable_nat_net_ops);
 
-	err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
-	if (err < 0)
-		goto err2;
-	return 0;
+	if (ret)
+		return ret;
 
-err2:
-	unregister_pernet_subsys(&iptable_nat_net_ops);
-err1:
-	return err;
+	ret = iptable_nat_table_init(&init_net);
+	if (ret)
+		unregister_pernet_subsys(&iptable_nat_net_ops);
+	return ret;
 }
 
 static void __exit iptable_nat_exit(void)
 {
-	nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
 	unregister_pernet_subsys(&iptable_nat_net_ops);
 }
 
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 9d78780a9036..2642ecd2645c 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -10,12 +10,15 @@
 
 #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
 
+static int __net_init iptable_raw_table_init(struct net *net);
+
 static const struct xt_table packet_raw = {
 	.name = "raw",
 	.valid_hooks =  RAW_VALID_HOOKS,
 	.me = THIS_MODULE,
 	.af = NFPROTO_IPV4,
 	.priority = NF_IP_PRI_RAW,
+	.table_init = iptable_raw_table_init,
 };
 
 /* The work comes in here from netfilter.c. */
@@ -34,11 +37,14 @@ iptable_raw_hook(void *priv, struct sk_buff *skb,
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
 
-static int __net_init iptable_raw_net_init(struct net *net)
+static int __net_init iptable_raw_table_init(struct net *net)
 {
 	struct ipt_replace *repl;
 	int ret;
 
+	if (net->ipv4.iptable_raw)
+		return 0;
+
 	repl = ipt_alloc_initial_table(&packet_raw);
 	if (repl == NULL)
 		return -ENOMEM;
@@ -50,11 +56,13 @@ static int __net_init iptable_raw_net_init(struct net *net)
 
 static void __net_exit iptable_raw_net_exit(struct net *net)
 {
+	if (!net->ipv4.iptable_raw)
+		return;
 	ipt_unregister_table(net, net->ipv4.iptable_raw, rawtable_ops);
+	net->ipv4.iptable_raw = NULL;
 }
 
 static struct pernet_operations iptable_raw_net_ops = {
-	.init = iptable_raw_net_init,
 	.exit = iptable_raw_net_exit,
 };
 
@@ -62,15 +70,20 @@ static int __init iptable_raw_init(void)
 {
 	int ret;
 
+	rawtable_ops = xt_hook_ops_alloc(&packet_raw, iptable_raw_hook);
+	if (IS_ERR(rawtable_ops))
+		return PTR_ERR(rawtable_ops);
+
 	ret = register_pernet_subsys(&iptable_raw_net_ops);
-	if (ret < 0)
+	if (ret < 0) {
+		kfree(rawtable_ops);
 		return ret;
+	}
 
-	/* Register hooks */
-	rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook);
-	if (IS_ERR(rawtable_ops)) {
-		ret = PTR_ERR(rawtable_ops);
+	ret = iptable_raw_table_init(&init_net);
+	if (ret) {
 		unregister_pernet_subsys(&iptable_raw_net_ops);
+		kfree(rawtable_ops);
 	}
 
 	return ret;
@@ -78,8 +91,8 @@ static int __init iptable_raw_init(void)
 
 static void __exit iptable_raw_fini(void)
 {
-	xt_hook_unlink(&packet_raw, rawtable_ops);
 	unregister_pernet_subsys(&iptable_raw_net_ops);
+	kfree(rawtable_ops);
 }
 
 module_init(iptable_raw_init);
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 88bc52fb8f4a..ff226596e4b5 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -28,12 +28,15 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules");
 				(1 << NF_INET_FORWARD) | \
 				(1 << NF_INET_LOCAL_OUT)
 
+static int __net_init iptable_security_table_init(struct net *net);
+
 static const struct xt_table security_table = {
 	.name		= "security",
 	.valid_hooks	= SECURITY_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV4,
 	.priority	= NF_IP_PRI_SECURITY,
+	.table_init	= iptable_security_table_init,
 };
 
 static unsigned int
@@ -51,11 +54,14 @@ iptable_security_hook(void *priv, struct sk_buff *skb,
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
 
-static int __net_init iptable_security_net_init(struct net *net)
+static int __net_init iptable_security_table_init(struct net *net)
 {
 	struct ipt_replace *repl;
 	int ret;
 
+	if (net->ipv4.iptable_security)
+		return 0;
+
 	repl = ipt_alloc_initial_table(&security_table);
 	if (repl == NULL)
 		return -ENOMEM;
@@ -67,11 +73,14 @@ static int __net_init iptable_security_net_init(struct net *net)
 
 static void __net_exit iptable_security_net_exit(struct net *net)
 {
+	if (!net->ipv4.iptable_security)
+		return;
+
 	ipt_unregister_table(net, net->ipv4.iptable_security, sectbl_ops);
+	net->ipv4.iptable_security = NULL;
 }
 
 static struct pernet_operations iptable_security_net_ops = {
-	.init = iptable_security_net_init,
 	.exit = iptable_security_net_exit,
 };
 
@@ -79,27 +88,29 @@ static int __init iptable_security_init(void)
 {
 	int ret;
 
+	sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook);
+	if (IS_ERR(sectbl_ops))
+		return PTR_ERR(sectbl_ops);
+
 	ret = register_pernet_subsys(&iptable_security_net_ops);
-	if (ret < 0)
+	if (ret < 0) {
+		kfree(sectbl_ops);
 		return ret;
-
-	sectbl_ops = xt_hook_link(&security_table, iptable_security_hook);
-	if (IS_ERR(sectbl_ops)) {
-		ret = PTR_ERR(sectbl_ops);
-		goto cleanup_table;
 	}
 
-	return ret;
+	ret = iptable_security_table_init(&init_net);
+	if (ret) {
+		unregister_pernet_subsys(&iptable_security_net_ops);
+		kfree(sectbl_ops);
+	}
 
-cleanup_table:
-	unregister_pernet_subsys(&iptable_security_net_ops);
 	return ret;
 }
 
 static void __exit iptable_security_fini(void)
 {
-	xt_hook_unlink(&security_table, sectbl_ops);
 	unregister_pernet_subsys(&iptable_security_net_ops);
+	kfree(sectbl_ops);
 }
 
 module_init(iptable_security_init);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 052d7447b52e..84f9baf7aee8 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2071,6 +2071,24 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	return ret;
 }
 
+static void __ip6t_unregister_table(struct net *net, struct xt_table *table)
+{
+	struct xt_table_info *private;
+	void *loc_cpu_entry;
+	struct module *table_owner = table->me;
+	struct ip6t_entry *iter;
+
+	private = xt_unregister_table(table);
+
+	/* Decrease module usage counts and free resources */
+	loc_cpu_entry = private->entries;
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		cleanup_entry(iter, net);
+	if (private->number > private->initial_entries)
+		module_put(table_owner);
+	xt_free_table_info(private);
+}
+
 int ip6t_register_table(struct net *net, const struct xt_table *table,
 			const struct ip6t_replace *repl,
 			const struct nf_hook_ops *ops,
@@ -2099,7 +2117,15 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
 		goto out_free;
 	}
 
+	/* set res now, will see skbs right after nf_register_net_hooks */
 	WRITE_ONCE(*res, new_table);
+
+	ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
+	if (ret != 0) {
+		__ip6t_unregister_table(net, new_table);
+		*res = NULL;
+	}
+
 	return ret;
 
 out_free:
@@ -2110,20 +2136,8 @@ out_free:
 void ip6t_unregister_table(struct net *net, struct xt_table *table,
 			   const struct nf_hook_ops *ops)
 {
-	struct xt_table_info *private;
-	void *loc_cpu_entry;
-	struct module *table_owner = table->me;
-	struct ip6t_entry *iter;
-
-	private = xt_unregister_table(table);
-
-	/* Decrease module usage counts and free resources */
-	loc_cpu_entry = private->entries;
-	xt_entry_foreach(iter, loc_cpu_entry, private->size)
-		cleanup_entry(iter, net);
-	if (private->number > private->initial_entries)
-		module_put(table_owner);
-	xt_free_table_info(private);
+	nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+	__ip6t_unregister_table(net, table);
 }
 
 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index d191d54cdf50..1343077dde93 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -22,12 +22,15 @@ MODULE_DESCRIPTION("ip6tables filter table");
 			    (1 << NF_INET_FORWARD) | \
 			    (1 << NF_INET_LOCAL_OUT))
 
+static int __net_init ip6table_filter_table_init(struct net *net);
+
 static const struct xt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV6,
 	.priority	= NF_IP6_PRI_FILTER,
+	.table_init	= ip6table_filter_table_init,
 };
 
 /* The work comes in here from netfilter.c. */
@@ -44,11 +47,14 @@ static struct nf_hook_ops *filter_ops __read_mostly;
 static bool forward = true;
 module_param(forward, bool, 0000);
 
-static int __net_init ip6table_filter_net_init(struct net *net)
+static int __net_init ip6table_filter_table_init(struct net *net)
 {
 	struct ip6t_replace *repl;
 	int err;
 
+	if (net->ipv6.ip6table_filter)
+		return 0;
+
 	repl = ip6t_alloc_initial_table(&packet_filter);
 	if (repl == NULL)
 		return -ENOMEM;
@@ -62,9 +68,20 @@ static int __net_init ip6table_filter_net_init(struct net *net)
 	return err;
 }
 
+static int __net_init ip6table_filter_net_init(struct net *net)
+{
+	if (net == &init_net || !forward)
+		return ip6table_filter_table_init(net);
+
+	return 0;
+}
+
 static void __net_exit ip6table_filter_net_exit(struct net *net)
 {
+	if (!net->ipv6.ip6table_filter)
+		return;
 	ip6t_unregister_table(net, net->ipv6.ip6table_filter, filter_ops);
+	net->ipv6.ip6table_filter = NULL;
 }
 
 static struct pernet_operations ip6table_filter_net_ops = {
@@ -76,28 +93,21 @@ static int __init ip6table_filter_init(void)
 {
 	int ret;
 
+	filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook);
+	if (IS_ERR(filter_ops))
+		return PTR_ERR(filter_ops);
+
 	ret = register_pernet_subsys(&ip6table_filter_net_ops);
 	if (ret < 0)
-		return ret;
-
-	/* Register hooks */
-	filter_ops = xt_hook_link(&packet_filter, ip6table_filter_hook);
-	if (IS_ERR(filter_ops)) {
-		ret = PTR_ERR(filter_ops);
-		goto cleanup_table;
-	}
+		kfree(filter_ops);
 
 	return ret;
-
- cleanup_table:
-	unregister_pernet_subsys(&ip6table_filter_net_ops);
-	return ret;
 }
 
 static void __exit ip6table_filter_fini(void)
 {
-	xt_hook_unlink(&packet_filter, filter_ops);
 	unregister_pernet_subsys(&ip6table_filter_net_ops);
+	kfree(filter_ops);
 }
 
 module_init(ip6table_filter_init);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index fe43d08284bc..cb2b28883252 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -23,12 +23,15 @@ MODULE_DESCRIPTION("ip6tables mangle table");
 			    (1 << NF_INET_LOCAL_OUT) | \
 			    (1 << NF_INET_POST_ROUTING))
 
+static int __net_init ip6table_mangle_table_init(struct net *net);
+
 static const struct xt_table packet_mangler = {
 	.name		= "mangle",
 	.valid_hooks	= MANGLE_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV6,
 	.priority	= NF_IP6_PRI_MANGLE,
+	.table_init	= ip6table_mangle_table_init,
 };
 
 static unsigned int
@@ -88,11 +91,14 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
-static int __net_init ip6table_mangle_net_init(struct net *net)
+static int __net_init ip6table_mangle_table_init(struct net *net)
 {
 	struct ip6t_replace *repl;
 	int ret;
 
+	if (net->ipv6.ip6table_mangle)
+		return 0;
+
 	repl = ip6t_alloc_initial_table(&packet_mangler);
 	if (repl == NULL)
 		return -ENOMEM;
@@ -104,11 +110,14 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
 
 static void __net_exit ip6table_mangle_net_exit(struct net *net)
 {
+	if (!net->ipv6.ip6table_mangle)
+		return;
+
 	ip6t_unregister_table(net, net->ipv6.ip6table_mangle, mangle_ops);
+	net->ipv6.ip6table_mangle = NULL;
 }
 
 static struct pernet_operations ip6table_mangle_net_ops = {
-	.init = ip6table_mangle_net_init,
 	.exit = ip6table_mangle_net_exit,
 };
 
@@ -116,28 +125,28 @@ static int __init ip6table_mangle_init(void)
 {
 	int ret;
 
+	mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook);
+	if (IS_ERR(mangle_ops))
+		return PTR_ERR(mangle_ops);
+
 	ret = register_pernet_subsys(&ip6table_mangle_net_ops);
-	if (ret < 0)
+	if (ret < 0) {
+		kfree(mangle_ops);
 		return ret;
-
-	/* Register hooks */
-	mangle_ops = xt_hook_link(&packet_mangler, ip6table_mangle_hook);
-	if (IS_ERR(mangle_ops)) {
-		ret = PTR_ERR(mangle_ops);
-		goto cleanup_table;
 	}
 
-	return ret;
-
- cleanup_table:
-	unregister_pernet_subsys(&ip6table_mangle_net_ops);
+	ret = ip6table_mangle_table_init(&init_net);
+	if (ret) {
+		unregister_pernet_subsys(&ip6table_mangle_net_ops);
+		kfree(mangle_ops);
+	}
 	return ret;
 }
 
 static void __exit ip6table_mangle_fini(void)
 {
-	xt_hook_unlink(&packet_mangler, mangle_ops);
 	unregister_pernet_subsys(&ip6table_mangle_net_ops);
+	kfree(mangle_ops);
 }
 
 module_init(ip6table_mangle_init);
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 7f9740e8ef47..7d2bd940291f 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -20,6 +20,8 @@
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_l3proto.h>
 
+static int __net_init ip6table_nat_table_init(struct net *net);
+
 static const struct xt_table nf_nat_ipv6_table = {
 	.name		= "nat",
 	.valid_hooks	= (1 << NF_INET_PRE_ROUTING) |
@@ -28,6 +30,7 @@ static const struct xt_table nf_nat_ipv6_table = {
 			  (1 << NF_INET_LOCAL_IN),
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV6,
+	.table_init	= ip6table_nat_table_init,
 };
 
 static unsigned int ip6table_nat_do_chain(void *priv,
@@ -97,11 +100,14 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
 	},
 };
 
-static int __net_init ip6table_nat_net_init(struct net *net)
+static int __net_init ip6table_nat_table_init(struct net *net)
 {
 	struct ip6t_replace *repl;
 	int ret;
 
+	if (net->ipv6.ip6table_nat)
+		return 0;
+
 	repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
 	if (repl == NULL)
 		return -ENOMEM;
@@ -113,36 +119,31 @@ static int __net_init ip6table_nat_net_init(struct net *net)
 
 static void __net_exit ip6table_nat_net_exit(struct net *net)
 {
+	if (!net->ipv6.ip6table_nat)
+		return;
 	ip6t_unregister_table(net, net->ipv6.ip6table_nat, nf_nat_ipv6_ops);
+	net->ipv6.ip6table_nat = NULL;
 }
 
 static struct pernet_operations ip6table_nat_net_ops = {
-	.init	= ip6table_nat_net_init,
 	.exit	= ip6table_nat_net_exit,
 };
 
 static int __init ip6table_nat_init(void)
 {
-	int err;
-
-	err = register_pernet_subsys(&ip6table_nat_net_ops);
-	if (err < 0)
-		goto err1;
+	int ret = register_pernet_subsys(&ip6table_nat_net_ops);
 
-	err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
-	if (err < 0)
-		goto err2;
-	return 0;
+	if (ret)
+		return ret;
 
-err2:
-	unregister_pernet_subsys(&ip6table_nat_net_ops);
-err1:
-	return err;
+	ret = ip6table_nat_table_init(&init_net);
+	if (ret)
+		unregister_pernet_subsys(&ip6table_nat_net_ops);
+	return ret;
 }
 
 static void __exit ip6table_nat_exit(void)
 {
-	nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
 	unregister_pernet_subsys(&ip6table_nat_net_ops);
 }
 
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 5fac433da069..d4bc56443dc1 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -9,12 +9,15 @@
 
 #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
 
+static int __net_init ip6table_raw_table_init(struct net *net);
+
 static const struct xt_table packet_raw = {
 	.name = "raw",
 	.valid_hooks = RAW_VALID_HOOKS,
 	.me = THIS_MODULE,
 	.af = NFPROTO_IPV6,
 	.priority = NF_IP6_PRI_RAW,
+	.table_init = ip6table_raw_table_init,
 };
 
 /* The work comes in here from netfilter.c. */
@@ -27,11 +30,14 @@ ip6table_raw_hook(void *priv, struct sk_buff *skb,
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
 
-static int __net_init ip6table_raw_net_init(struct net *net)
+static int __net_init ip6table_raw_table_init(struct net *net)
 {
 	struct ip6t_replace *repl;
 	int ret;
 
+	if (net->ipv6.ip6table_raw)
+		return 0;
+
 	repl = ip6t_alloc_initial_table(&packet_raw);
 	if (repl == NULL)
 		return -ENOMEM;
@@ -43,11 +49,13 @@ static int __net_init ip6table_raw_net_init(struct net *net)
 
 static void __net_exit ip6table_raw_net_exit(struct net *net)
 {
+	if (!net->ipv6.ip6table_raw)
+		return;
 	ip6t_unregister_table(net, net->ipv6.ip6table_raw, rawtable_ops);
+	net->ipv6.ip6table_raw = NULL;
 }
 
 static struct pernet_operations ip6table_raw_net_ops = {
-	.init = ip6table_raw_net_init,
 	.exit = ip6table_raw_net_exit,
 };
 
@@ -55,28 +63,29 @@ static int __init ip6table_raw_init(void)
 {
 	int ret;
 
+	/* Register hooks */
+	rawtable_ops = xt_hook_ops_alloc(&packet_raw, ip6table_raw_hook);
+	if (IS_ERR(rawtable_ops))
+		return PTR_ERR(rawtable_ops);
+
 	ret = register_pernet_subsys(&ip6table_raw_net_ops);
-	if (ret < 0)
+	if (ret < 0) {
+		kfree(rawtable_ops);
 		return ret;
-
-	/* Register hooks */
-	rawtable_ops = xt_hook_link(&packet_raw, ip6table_raw_hook);
-	if (IS_ERR(rawtable_ops)) {
-		ret = PTR_ERR(rawtable_ops);
-		goto cleanup_table;
 	}
 
-	return ret;
-
- cleanup_table:
-	unregister_pernet_subsys(&ip6table_raw_net_ops);
+	ret = ip6table_raw_table_init(&init_net);
+	if (ret) {
+		unregister_pernet_subsys(&ip6table_raw_net_ops);
+		kfree(rawtable_ops);
+	}
 	return ret;
 }
 
 static void __exit ip6table_raw_fini(void)
 {
-	xt_hook_unlink(&packet_raw, rawtable_ops);
 	unregister_pernet_subsys(&ip6table_raw_net_ops);
+	kfree(rawtable_ops);
 }
 
 module_init(ip6table_raw_init);
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index cf587453e322..cf26ccb04056 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -27,12 +27,15 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
 				(1 << NF_INET_FORWARD) | \
 				(1 << NF_INET_LOCAL_OUT)
 
+static int __net_init ip6table_security_table_init(struct net *net);
+
 static const struct xt_table security_table = {
 	.name		= "security",
 	.valid_hooks	= SECURITY_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV6,
 	.priority	= NF_IP6_PRI_SECURITY,
+	.table_init     = ip6table_security_table_init,
 };
 
 static unsigned int
@@ -44,11 +47,14 @@ ip6table_security_hook(void *priv, struct sk_buff *skb,
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
 
-static int __net_init ip6table_security_net_init(struct net *net)
+static int __net_init ip6table_security_table_init(struct net *net)
 {
 	struct ip6t_replace *repl;
 	int ret;
 
+	if (net->ipv6.ip6table_security)
+		return 0;
+
 	repl = ip6t_alloc_initial_table(&security_table);
 	if (repl == NULL)
 		return -ENOMEM;
@@ -60,11 +66,13 @@ static int __net_init ip6table_security_net_init(struct net *net)
 
 static void __net_exit ip6table_security_net_exit(struct net *net)
 {
+	if (!net->ipv6.ip6table_security)
+		return;
 	ip6t_unregister_table(net, net->ipv6.ip6table_security, sectbl_ops);
+	net->ipv6.ip6table_security = NULL;
 }
 
 static struct pernet_operations ip6table_security_net_ops = {
-	.init = ip6table_security_net_init,
 	.exit = ip6table_security_net_exit,
 };
 
@@ -72,27 +80,28 @@ static int __init ip6table_security_init(void)
 {
 	int ret;
 
+	sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook);
+	if (IS_ERR(sectbl_ops))
+		return PTR_ERR(sectbl_ops);
+
 	ret = register_pernet_subsys(&ip6table_security_net_ops);
-	if (ret < 0)
+	if (ret < 0) {
+		kfree(sectbl_ops);
 		return ret;
-
-	sectbl_ops = xt_hook_link(&security_table, ip6table_security_hook);
-	if (IS_ERR(sectbl_ops)) {
-		ret = PTR_ERR(sectbl_ops);
-		goto cleanup_table;
 	}
 
-	return ret;
-
-cleanup_table:
-	unregister_pernet_subsys(&ip6table_security_net_ops);
+	ret = ip6table_security_table_init(&init_net);
+	if (ret) {
+		unregister_pernet_subsys(&ip6table_security_net_ops);
+		kfree(sectbl_ops);
+	}
 	return ret;
 }
 
 static void __exit ip6table_security_fini(void)
 {
-	xt_hook_unlink(&security_table, sectbl_ops);
 	unregister_pernet_subsys(&ip6table_security_net_ops);
+	kfree(sectbl_ops);
 }
 
 module_init(ip6table_security_init);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index c8a0b7da5ff4..d0cd2b9bf844 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -694,12 +694,45 @@ EXPORT_SYMBOL(xt_free_table_info);
 struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
 				    const char *name)
 {
-	struct xt_table *t;
+	struct xt_table *t, *found = NULL;
 
 	mutex_lock(&xt[af].mutex);
 	list_for_each_entry(t, &net->xt.tables[af], list)
 		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
 			return t;
+
+	if (net == &init_net)
+		goto out;
+
+	/* Table doesn't exist in this netns, re-try init */
+	list_for_each_entry(t, &init_net.xt.tables[af], list) {
+		if (strcmp(t->name, name))
+			continue;
+		if (!try_module_get(t->me))
+			return NULL;
+
+		mutex_unlock(&xt[af].mutex);
+		if (t->table_init(net) != 0) {
+			module_put(t->me);
+			return NULL;
+		}
+
+		found = t;
+
+		mutex_lock(&xt[af].mutex);
+		break;
+	}
+
+	if (!found)
+		goto out;
+
+	/* and once again: */
+	list_for_each_entry(t, &net->xt.tables[af], list)
+		if (strcmp(t->name, name) == 0)
+			return t;
+
+	module_put(found->me);
+ out:
 	mutex_unlock(&xt[af].mutex);
 	return NULL;
 }
@@ -1170,20 +1203,20 @@ static const struct file_operations xt_target_ops = {
 #endif /* CONFIG_PROC_FS */
 
 /**
- * xt_hook_link - set up hooks for a new table
+ * xt_hook_ops_alloc - set up hooks for a new table
  * @table:	table with metadata needed to set up hooks
  * @fn:		Hook function
  *
- * This function will take care of creating and registering the necessary
- * Netfilter hooks for XT tables.
+ * This function will create the nf_hook_ops that the x_table needs
+ * to hand to xt_hook_link_net().
  */
-struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
+struct nf_hook_ops *
+xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
 {
 	unsigned int hook_mask = table->valid_hooks;
 	uint8_t i, num_hooks = hweight32(hook_mask);
 	uint8_t hooknum;
 	struct nf_hook_ops *ops;
-	int ret;
 
 	ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
 	if (ops == NULL)
@@ -1200,27 +1233,9 @@ struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
 		++i;
 	}
 
-	ret = nf_register_hooks(ops, num_hooks);
-	if (ret < 0) {
-		kfree(ops);
-		return ERR_PTR(ret);
-	}
-
 	return ops;
 }
-EXPORT_SYMBOL_GPL(xt_hook_link);
-
-/**
- * xt_hook_unlink - remove hooks for a table
- * @ops:	nf_hook_ops array as returned by nf_hook_link
- * @hook_mask:	the very same mask that was passed to nf_hook_link
- */
-void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops)
-{
-	nf_unregister_hooks(ops, hweight32(table->valid_hooks));
-	kfree(ops);
-}
-EXPORT_SYMBOL_GPL(xt_hook_unlink);
+EXPORT_SYMBOL_GPL(xt_hook_ops_alloc);
 
 int xt_proto_init(struct net *net, u_int8_t af)
 {
-- 
cgit v1.2.3


From af4610c39589d839551da104f7da342d86f23ea0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 25 Feb 2016 10:08:38 +0100
Subject: netfilter: don't call hooks unless needed

With the previous patches in place, a netns nf_hook_list might be empty,
even if e.g. init_net performs filtering.

Thus change nf_hook_thresh to check the hook_list as well before
initializing hook_state and calling nf_hook_slow().

We still make use of static keys; if no netfilter modules are loaded
list is guaranteed to be empty.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 0ad556726181..9230f9aee896 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -141,22 +141,6 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
 
 #ifdef HAVE_JUMP_LABEL
 extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
-
-static inline bool nf_hook_list_active(struct list_head *hook_list,
-				       u_int8_t pf, unsigned int hook)
-{
-	if (__builtin_constant_p(pf) &&
-	    __builtin_constant_p(hook))
-		return static_key_false(&nf_hooks_needed[pf][hook]);
-
-	return !list_empty(hook_list);
-}
-#else
-static inline bool nf_hook_list_active(struct list_head *hook_list,
-				       u_int8_t pf, unsigned int hook)
-{
-	return !list_empty(hook_list);
-}
 #endif
 
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
@@ -177,9 +161,18 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 				 int (*okfn)(struct net *, struct sock *, struct sk_buff *),
 				 int thresh)
 {
-	struct list_head *hook_list = &net->nf.hooks[pf][hook];
+	struct list_head *hook_list;
+
+#ifdef HAVE_JUMP_LABEL
+	if (__builtin_constant_p(pf) &&
+	    __builtin_constant_p(hook) &&
+	    !static_key_false(&nf_hooks_needed[pf][hook]))
+		return 1;
+#endif
+
+	hook_list = &net->nf.hooks[pf][hook];
 
-	if (nf_hook_list_active(hook_list, pf, hook)) {
+	if (!list_empty(hook_list)) {
 		struct nf_hook_state state;
 
 		nf_hook_state_init(&state, hook_list, hook, thresh,
-- 
cgit v1.2.3


From afea03656add70a0e00f5b0039f87288c7af8b9f Mon Sep 17 00:00:00 2001
From: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Date: Mon, 29 Feb 2016 14:27:28 +0100
Subject: stmmac: rework DMA bus setting and introduce new platform AXI
 structure

This patch restructures the DMA bus settings and this is done
by introducing a new platform structure used for programming
the AXI Bus Mode Register inside the DMA module.
This structure can be populated from device-tree as documented in the
binding txt file.

After initializing the DMA, the AXI register can be optionally tuned
for platform drivers based.
This patch also reworks some parameters to make coherent the DMA
configuration now that AXI register is introduced.
For example, the burst_len is managed by using the mentioned axi
support above; so the snps,burst-len parameter has been removed.
It makes sense to provide the AAL parameter from DT to Address-Aligned
Beats inside the Register0 and review the PBL settings when initialize
the engine.

For PCI glue, rebuilding the story of this setting, it
was added to align a configuration so not for fixing some
known problem. No issue raised after this patch.
It is safe to use the default burst length instead of
tuning it to the maximum value

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/stmmac.txt   | 54 ++++++++----
 drivers/net/ethernet/stmicro/stmmac/common.h       |  5 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac1000.h    |  2 +-
 .../net/ethernet/stmicro/stmmac/dwmac1000_dma.c    | 99 +++++++++++++++-------
 drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h    | 34 +++++++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 12 ++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c   |  4 +-
 .../net/ethernet/stmicro/stmmac/stmmac_platform.c  | 42 ++++++++-
 include/linux/stmmac.h                             | 17 +++-
 10 files changed, 209 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt
index e862a922bd3f..6605d19601c2 100644
--- a/Documentation/devicetree/bindings/net/stmmac.txt
+++ b/Documentation/devicetree/bindings/net/stmmac.txt
@@ -17,7 +17,25 @@ Required properties:
 	The 1st cell is reset pre-delay in micro seconds.
 	The 2nd cell is reset pulse in micro seconds.
 	The 3rd cell is reset post-delay in micro seconds.
+
+Optional properties:
+- resets: Should contain a phandle to the STMMAC reset signal, if any
+- reset-names: Should contain the reset signal name "stmmaceth", if a
+	reset phandle is given
+- max-frame-size: See ethernet.txt file in the same directory
+- clocks: If present, the first clock should be the GMAC main clock and
+  the second clock should be peripheral's register interface clock. Further
+  clocks may be specified in derived bindings.
+- clock-names: One name for each entry in the clocks property, the
+  first one should be "stmmaceth" and the second one should be "pclk".
+- clk_ptp_ref: this is the PTP reference clock; in case of the PTP is
+  available this clock is used for programming the Timestamp Addend Register.
+  If not passed then the system clock will be used and this is fine on some
+  platforms.
+- tx-fifo-depth: See ethernet.txt file in the same directory
+- rx-fifo-depth: See ethernet.txt file in the same directory
 - snps,pbl		Programmable Burst Length
+- snps,aal		Address-Aligned Beats
 - snps,fixed-burst	Program the DMA to use the fixed burst mode
 - snps,mixed-burst	Program the DMA to use the mixed burst mode
 - snps,force_thresh_dma_mode	Force DMA to use the threshold mode for
@@ -29,27 +47,28 @@ Required properties:
 				supported by this device instance
 - snps,perfect-filter-entries:	Number of perfect filter entries supported
 				by this device instance
-
-Optional properties:
-- resets: Should contain a phandle to the STMMAC reset signal, if any
-- reset-names: Should contain the reset signal name "stmmaceth", if a
-	reset phandle is given
-- max-frame-size: See ethernet.txt file in the same directory
-- clocks: If present, the first clock should be the GMAC main clock
-  The optional second clock should be peripheral's register interface clock.
-  The third optional clock should be the ptp reference clock.
-  Further clocks may be specified in derived bindings.
-- clock-names: One name for each entry in the clocks property.
-  The first one should be "stmmaceth".
-  The optional second one should be "pclk".
-  The optional third one should be "clk_ptp_ref".
-- snps,burst_len: The AXI burst lenth value of the AXI BUS MODE register.
-- tx-fifo-depth: See ethernet.txt file in the same directory
-- rx-fifo-depth: See ethernet.txt file in the same directory
+- AXI BUS Mode parameters: below the list of all the parameters to program the
+			   AXI register inside the DMA module:
+	- snps,lpi_en: enable Low Power Interface
+	- snps,xit_frm: unlock on WoL
+	- snps,wr_osr_lmt: max write oustanding req. limit
+	- snps,rd_osr_lmt: max read oustanding req. limit
+	- snps,kbbe: do not cross 1KiB boundary.
+	- snps,axi_all: align address
+	- snps,blen: this is a vector of supported burst length.
+	- snps,fb: fixed-burst
+	- snps,mb: mixed-burst
+	- snps,rb: rebuild INCRx Burst
 - mdio: with compatible = "snps,dwmac-mdio", create and register mdio bus.
 
 Examples:
 
+	stmmac_axi_setup: stmmac-axi-config {
+		snps,wr_osr_lmt = <0xf>;
+		snps,rd_osr_lmt = <0xf>;
+		snps,blen = <256 128 64 32 0 0 0>;
+	};
+
 	gmac0: ethernet@e0800000 {
 		compatible = "st,spear600-gmac";
 		reg = <0xe0800000 0x8000>;
@@ -65,6 +84,7 @@ Examples:
 		tx-fifo-depth = <16384>;
 		clocks = <&clock>;
 		clock-names = "stmmaceth";
+		snps,axi-config = <&stmmac_axi_setup>;
 		mdio0 {
 			#address-cells = <1>;
 			#size-cells = <0>;
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index bac0e44d7634..586a33624dd2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -27,6 +27,7 @@
 
 #include <linux/etherdevice.h>
 #include <linux/netdevice.h>
+#include <linux/stmmac.h>
 #include <linux/phy.h>
 #include <linux/module.h>
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
@@ -378,7 +379,9 @@ struct stmmac_dma_ops {
 	/* DMA core initialization */
 	int (*reset)(void __iomem *ioaddr);
 	void (*init)(void __iomem *ioaddr, int pbl, int fb, int mb,
-		     int burst_len, u32 dma_tx, u32 dma_rx, int atds);
+		     int aal, u32 dma_tx, u32 dma_rx, int atds);
+	/* Configure the AXI Bus Mode Register */
+	void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi);
 	/* Dump DMA registers */
 	void (*dump_regs) (void __iomem *ioaddr);
 	/* Set tx/rx threshold in the csr6 register
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index 9d36ae788429..b0593a4268ee 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -240,7 +240,7 @@ enum rx_tx_priority_ratio {
 #define DMA_BUS_MODE_RPBL_MASK	0x003e0000	/* Rx-Programmable Burst Len */
 #define DMA_BUS_MODE_RPBL_SHIFT	17
 #define DMA_BUS_MODE_USP	0x00800000
-#define DMA_BUS_MODE_PBL	0x01000000
+#define DMA_BUS_MODE_MAXPBL	0x01000000
 #define DMA_BUS_MODE_AAL	0x02000000
 
 /* DMA CRS Control and Status Register Mapping */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index 5f0aea56b298..da32d6037e3e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -30,24 +30,76 @@
 #include "dwmac1000.h"
 #include "dwmac_dma.h"
 
+static void dwmac1000_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
+{
+	u32 value = readl(ioaddr + DMA_AXI_BUS_MODE);
+	int i;
+
+	pr_info("dwmac1000: Master AXI performs %s burst length\n",
+		!(value & DMA_AXI_UNDEF) ? "fixed" : "any");
+
+	if (axi->axi_lpi_en)
+		value |= DMA_AXI_EN_LPI;
+	if (axi->axi_xit_frm)
+		value |= DMA_AXI_LPI_XIT_FRM;
+
+	value |= (axi->axi_wr_osr_lmt & DMA_AXI_WR_OSR_LMT_MASK) <<
+		 DMA_AXI_WR_OSR_LMT_SHIFT;
+
+	value |= (axi->axi_rd_osr_lmt & DMA_AXI_RD_OSR_LMT_MASK) <<
+		 DMA_AXI_RD_OSR_LMT_SHIFT;
+
+	/* Depending on the UNDEF bit the Master AXI will perform any burst
+	 * length according to the BLEN programmed (by default all BLEN are
+	 * set).
+	 */
+	for (i = 0; i < AXI_BLEN; i++) {
+		switch (axi->axi_blen[i]) {
+		case 256:
+			value |= DMA_AXI_BLEN256;
+			break;
+		case 128:
+			value |= DMA_AXI_BLEN128;
+			break;
+		case 64:
+			value |= DMA_AXI_BLEN64;
+			break;
+		case 32:
+			value |= DMA_AXI_BLEN32;
+			break;
+		case 16:
+			value |= DMA_AXI_BLEN16;
+			break;
+		case 8:
+			value |= DMA_AXI_BLEN8;
+			break;
+		case 4:
+			value |= DMA_AXI_BLEN4;
+			break;
+		}
+	}
+
+	writel(value, ioaddr + DMA_AXI_BUS_MODE);
+}
+
 static void dwmac1000_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb,
-			       int burst_len, u32 dma_tx, u32 dma_rx, int atds)
+			       int aal, u32 dma_tx, u32 dma_rx, int atds)
 {
-	u32 value;
+	u32 value = readl(ioaddr + DMA_BUS_MODE);
 
 	/*
-	 * Set the DMA PBL (Programmable Burst Length) mode
-	 * Before stmmac core 3.50 this mode bit was 4xPBL, and
+	 * Set the DMA PBL (Programmable Burst Length) mode.
+	 *
+	 * Note: before stmmac core 3.50 this mode bit was 4xPBL, and
 	 * post 3.5 mode bit acts as 8*PBL.
-	 * For core rev < 3.5, when the core is set for 4xPBL mode, the
-	 * DMA transfers the data in 4, 8, 16, 32, 64 & 128 beats
-	 * depending on pbl value.
-	 * For core rev > 3.5, when the core is set for 8xPBL mode, the
-	 * DMA transfers the data in 8, 16, 32, 64, 128 & 256 beats
-	 * depending on pbl value.
+	 *
+	 * This configuration doesn't take care about the Separate PBL
+	 * so only the bits: 13-8 are programmed with the PBL passed from the
+	 * platform.
 	 */
-	value = DMA_BUS_MODE_PBL | ((pbl << DMA_BUS_MODE_PBL_SHIFT) |
-				    (pbl << DMA_BUS_MODE_RPBL_SHIFT));
+	value |= DMA_BUS_MODE_MAXPBL;
+	value &= ~DMA_BUS_MODE_PBL_MASK;
+	value |= (pbl << DMA_BUS_MODE_PBL_SHIFT);
 
 	/* Set the Fixed burst mode */
 	if (fb)
@@ -60,26 +112,10 @@ static void dwmac1000_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb,
 	if (atds)
 		value |= DMA_BUS_MODE_ATDS;
 
-	writel(value, ioaddr + DMA_BUS_MODE);
+	if (aal)
+		value |= DMA_BUS_MODE_AAL;
 
-	/* In case of GMAC AXI configuration, program the DMA_AXI_BUS_MODE
-	 * for supported bursts.
-	 *
-	 * Note: This is applicable only for revision GMACv3.61a. For
-	 * older version this register is reserved and shall have no
-	 * effect.
-	 *
-	 * Note:
-	 *  For Fixed Burst Mode: if we directly write 0xFF to this
-	 *  register using the configurations pass from platform code,
-	 *  this would ensure that all bursts supported by core are set
-	 *  and those which are not supported would remain ineffective.
-	 *
-	 *  For Non Fixed Burst Mode: provide the maximum value of the
-	 *  burst length. Any burst equal or below the provided burst
-	 *  length would be allowed to perform.
-	 */
-	writel(burst_len, ioaddr + DMA_AXI_BUS_MODE);
+	writel(value, ioaddr + DMA_BUS_MODE);
 
 	/* Mask interrupts by writing to CSR7 */
 	writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA);
@@ -192,6 +228,7 @@ static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt)
 const struct stmmac_dma_ops dwmac1000_dma_ops = {
 	.reset = dwmac_dma_reset,
 	.init = dwmac1000_dma_init,
+	.axi = dwmac1000_dma_axi,
 	.dump_regs = dwmac1000_dump_dma_regs,
 	.dma_mode = dwmac1000_dma_operation_mode,
 	.enable_dma_transmission = dwmac_enable_dma_transmission,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
index c40582a938a4..61f54c99a7de 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
@@ -33,7 +33,7 @@
 #include "dwmac_dma.h"
 
 static void dwmac100_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb,
-			      int burst_len, u32 dma_tx, u32 dma_rx, int atds)
+			      int aal, u32 dma_tx, u32 dma_rx, int atds)
 {
 	/* Enable Application Access by writing to DMA CSR0 */
 	writel(DMA_BUS_MODE_DEFAULT | (pbl << DMA_BUS_MODE_PBL_SHIFT),
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
index 13ca90e23479..726d9d9aaf83 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
@@ -41,8 +41,40 @@
 
 /* Rx watchdog register */
 #define DMA_RX_WATCHDOG		0x00001024
-/* AXI Bus Mode */
+
+/* AXI Master Bus Mode */
 #define DMA_AXI_BUS_MODE	0x00001028
+
+#define DMA_AXI_EN_LPI		BIT(31)
+#define DMA_AXI_LPI_XIT_FRM	BIT(30)
+#define DMA_AXI_WR_OSR_LMT	GENMASK(23, 20)
+#define DMA_AXI_WR_OSR_LMT_SHIFT	20
+#define DMA_AXI_WR_OSR_LMT_MASK	0xf
+#define DMA_AXI_RD_OSR_LMT	GENMASK(19, 16)
+#define DMA_AXI_RD_OSR_LMT_SHIFT	16
+#define DMA_AXI_RD_OSR_LMT_MASK	0xf
+
+#define DMA_AXI_OSR_MAX		0xf
+#define DMA_AXI_MAX_OSR_LIMIT ((DMA_AXI_OSR_MAX << DMA_AXI_WR_OSR_LMT_SHIFT) | \
+			       (DMA_AXI_OSR_MAX << DMA_AXI_RD_OSR_LMT_SHIFT))
+#define	DMA_AXI_1KBBE		BIT(13)
+#define DMA_AXI_AAL		BIT(12)
+#define DMA_AXI_BLEN256		BIT(7)
+#define DMA_AXI_BLEN128		BIT(6)
+#define DMA_AXI_BLEN64		BIT(5)
+#define DMA_AXI_BLEN32		BIT(4)
+#define DMA_AXI_BLEN16		BIT(3)
+#define DMA_AXI_BLEN8		BIT(2)
+#define DMA_AXI_BLEN4		BIT(1)
+#define DMA_BURST_LEN_DEFAULT	(DMA_AXI_BLEN256 | DMA_AXI_BLEN128 | \
+				 DMA_AXI_BLEN64 | DMA_AXI_BLEN32 | \
+				 DMA_AXI_BLEN16 | DMA_AXI_BLEN8 | \
+				 DMA_AXI_BLEN4)
+
+#define DMA_AXI_UNDEF		BIT(0)
+
+#define DMA_AXI_BURST_LEN_MASK	0x000000FE
+
 #define DMA_CUR_TX_BUF_ADDR	0x00001050	/* Current Host Tx Buffer */
 #define DMA_CUR_RX_BUF_ADDR	0x00001054	/* Current Host Rx Buffer */
 #define DMA_HW_FEATURE		0x00001058	/* HW Feature Register */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 13752e933e43..89c26268822e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1635,7 +1635,7 @@ static void stmmac_check_ether_addr(struct stmmac_priv *priv)
  */
 static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 {
-	int pbl = DEFAULT_DMA_PBL, fixed_burst = 0, burst_len = 0;
+	int pbl = DEFAULT_DMA_PBL, fixed_burst = 0, aal = 0;
 	int mixed_burst = 0;
 	int atds = 0;
 	int ret = 0;
@@ -1644,7 +1644,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 		pbl = priv->plat->dma_cfg->pbl;
 		fixed_burst = priv->plat->dma_cfg->fixed_burst;
 		mixed_burst = priv->plat->dma_cfg->mixed_burst;
-		burst_len = priv->plat->dma_cfg->burst_len;
+		aal = priv->plat->dma_cfg->aal;
 	}
 
 	if (priv->extend_desc && (priv->mode == STMMAC_RING_MODE))
@@ -1657,8 +1657,12 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 	}
 
 	priv->hw->dma->init(priv->ioaddr, pbl, fixed_burst, mixed_burst,
-			    burst_len, priv->dma_tx_phy,
-			    priv->dma_rx_phy, atds);
+			    aal, priv->dma_tx_phy, priv->dma_rx_phy, atds);
+
+	if ((priv->synopsys_id >= DWMAC_CORE_3_50) &&
+	    (priv->plat->axi && priv->hw->dma->axi))
+		priv->hw->dma->axi(priv->ioaddr, priv->plat->axi);
+
 	return ret;
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index d71a721ea61c..ae4388735b7f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -81,7 +81,7 @@ static void stmmac_default_data(struct plat_stmmacenet_data *plat)
 	plat->mdio_bus_data->phy_mask = 0;
 
 	plat->dma_cfg->pbl = 32;
-	plat->dma_cfg->burst_len = DMA_AXI_BLEN_256;
+	/* TODO: AXI */
 
 	/* Set default value for multicast hash bins */
 	plat->multicast_filter_bins = HASH_TABLE_SIZE;
@@ -115,8 +115,8 @@ static int quark_default_data(struct plat_stmmacenet_data *plat,
 	plat->mdio_bus_data->phy_mask = 0;
 
 	plat->dma_cfg->pbl = 16;
-	plat->dma_cfg->burst_len = DMA_AXI_BLEN_256;
 	plat->dma_cfg->fixed_burst = 1;
+	/* AXI (TODO) */
 
 	/* Set default value for multicast hash bins */
 	plat->multicast_filter_bins = HASH_TABLE_SIZE;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 6a52fa18cbf2..69ccf486d4fa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -95,6 +95,42 @@ static int dwmac1000_validate_ucast_entries(int ucast_entries)
 	return x;
 }
 
+/**
+ * stmmac_axi_setup - parse DT parameters for programming the AXI register
+ * @pdev: platform device
+ * @priv: driver private struct.
+ * Description:
+ * if required, from device-tree the AXI internal register can be tuned
+ * by using platform parameters.
+ */
+static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
+{
+	struct device_node *np;
+	struct stmmac_axi *axi;
+
+	np = of_parse_phandle(pdev->dev.of_node, "snps,axi-config", 0);
+	if (!np)
+		return NULL;
+
+	axi = kzalloc(sizeof(axi), GFP_KERNEL);
+	if (!axi)
+		return ERR_PTR(-ENOMEM);
+
+	axi->axi_lpi_en = of_property_read_bool(np, "snps,lpi_en");
+	axi->axi_xit_frm = of_property_read_bool(np, "snps,xit_frm");
+	axi->axi_kbbe = of_property_read_bool(np, "snps,axi_kbbe");
+	axi->axi_axi_all = of_property_read_bool(np, "snps,axi_all");
+	axi->axi_fb = of_property_read_bool(np, "snps,axi_fb");
+	axi->axi_mb = of_property_read_bool(np, "snps,axi_mb");
+	axi->axi_rb =  of_property_read_bool(np, "snps,axi_rb");
+
+	of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt);
+	of_property_read_u32(np, "snps,rd_osr_lmt", &axi->axi_rd_osr_lmt);
+	of_property_read_u32_array(np, "snps,blen", axi->axi_blen, AXI_BLEN);
+
+	return axi;
+}
+
 /**
  * stmmac_probe_config_dt - parse device-tree driver parameters
  * @pdev: platform_device structure
@@ -216,13 +252,11 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 		}
 		plat->dma_cfg = dma_cfg;
 		of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl);
+		dma_cfg->aal = of_property_read_bool(np, "snps,aal");
 		dma_cfg->fixed_burst =
 			of_property_read_bool(np, "snps,fixed-burst");
 		dma_cfg->mixed_burst =
 			of_property_read_bool(np, "snps,mixed-burst");
-		of_property_read_u32(np, "snps,burst_len", &dma_cfg->burst_len);
-		if (dma_cfg->burst_len < 0 || dma_cfg->burst_len > 256)
-			dma_cfg->burst_len = 0;
 	}
 	plat->force_thresh_dma_mode = of_property_read_bool(np, "snps,force_thresh_dma_mode");
 	if (plat->force_thresh_dma_mode) {
@@ -230,6 +264,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 		pr_warn("force_sf_dma_mode is ignored if force_thresh_dma_mode is set.");
 	}
 
+	plat->axi = stmmac_axi_setup(pdev);
+
 	return plat;
 }
 #else
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index eead8ab93c0a..6e53fa8942a4 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -90,7 +90,21 @@ struct stmmac_dma_cfg {
 	int pbl;
 	int fixed_burst;
 	int mixed_burst;
-	int burst_len;
+	bool aal;
+};
+
+#define AXI_BLEN	7
+struct stmmac_axi {
+	bool axi_lpi_en;
+	bool axi_xit_frm;
+	u32 axi_wr_osr_lmt;
+	u32 axi_rd_osr_lmt;
+	bool axi_kbbe;
+	bool axi_axi_all;
+	u32 axi_blen[AXI_BLEN];
+	bool axi_fb;
+	bool axi_mb;
+	bool axi_rb;
 };
 
 struct plat_stmmacenet_data {
@@ -122,5 +136,6 @@ struct plat_stmmacenet_data {
 	int (*init)(struct platform_device *pdev, void *priv);
 	void (*exit)(struct platform_device *pdev, void *priv);
 	void *bsp_priv;
+	struct stmmac_axi *axi;
 };
 #endif
-- 
cgit v1.2.3


From a9e340dce3c3bceeb42f6b6d33b7858822d76cb6 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sun, 17 Jan 2016 10:57:09 -0800
Subject: Input: rotary_encoder - move away from platform data structure

Drop support for platform data passed via a C-structure and switch to
device properties instead, which should make the driver compatible with all
platforms: OF, ACPI and static boards. Static boards should use property
sets to communicate device parameters to the driver.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 arch/arm/mach-pxa/raumfeld.c        |  29 +++++--
 drivers/input/misc/rotary_encoder.c | 146 ++++++++++++++----------------------
 include/linux/rotary_encoder.h      |  13 ----
 3 files changed, 76 insertions(+), 112 deletions(-)
 delete mode 100644 include/linux/rotary_encoder.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index bac88e0be58a..82689ac20ccf 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -18,13 +18,13 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/property.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
 #include <linux/gpio/machine.h>
 #include <linux/smsc911x.h>
 #include <linux/input.h>
-#include <linux/rotary_encoder.h>
 #include <linux/gpio_keys.h>
 #include <linux/input/eeti_ts.h>
 #include <linux/leds.h>
@@ -378,18 +378,27 @@ static struct gpiod_lookup_table raumfeld_rotary_gpios_table = {
 	},
 };
 
-static struct rotary_encoder_platform_data raumfeld_rotary_encoder_info = {
-	.steps		= 24,
-	.axis		= REL_X,
-	.relative_axis	= 1,
+static u32 raumfeld_rotary_encoder_steps = 24;
+static u32 raumfeld_rotary_encoder_axis = REL_X;
+static u32 raumfeld_rotary_encoder_relative_axis = 1;
+
+static struct property_entry raumfeld_rotary_properties[] = {
+	{ "rotary-encoder,steps-per-period",
+		DEV_PROP_U32, 1, &raumfeld_rotary_encoder_steps, },
+	{ "linux,axis",
+		DEV_PROP_U32, 1, &raumfeld_rotary_encoder_axis, },
+	{ "rotary-encoder,relative_axis",
+		DEV_PROP_U32, 1, &raumfeld_rotary_encoder_relative_axis, },
+	{ NULL }
+};
+
+static struct property_set raumfeld_rotary_property_set = {
+	.properties = raumfeld_rotary_properties,
 };
 
 static struct platform_device rotary_encoder_device = {
 	.name		= "rotary-encoder",
 	.id		= 0,
-	.dev		= {
-		.platform_data = &raumfeld_rotary_encoder_info,
-	}
 };
 
 /**
@@ -1061,6 +1070,8 @@ static void __init raumfeld_controller_init(void)
 	pxa3xx_mfp_config(ARRAY_AND_SIZE(raumfeld_controller_pin_config));
 
 	gpiod_add_lookup_table(&raumfeld_rotary_gpios_table);
+	device_add_property_set(&rotary_encoder_device.dev,
+				&raumfeld_rotary_property_set);
 	platform_device_register(&rotary_encoder_device);
 
 	spi_register_board_info(ARRAY_AND_SIZE(controller_spi_devices));
@@ -1099,6 +1110,8 @@ static void __init raumfeld_speaker_init(void)
 	platform_device_register(&smc91x_device);
 
 	gpiod_add_lookup_table(&raumfeld_rotary_gpios_table);
+	device_add_property_set(&rotary_encoder_device.dev,
+				&raumfeld_rotary_property_set);
 	platform_device_register(&rotary_encoder_device);
 
 	raumfeld_audio_init();
diff --git a/drivers/input/misc/rotary_encoder.c b/drivers/input/misc/rotary_encoder.c
index 70fdcce84866..5a4e1d69c4af 100644
--- a/drivers/input/misc/rotary_encoder.c
+++ b/drivers/input/misc/rotary_encoder.c
@@ -21,20 +21,23 @@
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <linux/gpio/consumer.h>
-#include <linux/rotary_encoder.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
 #include <linux/pm.h>
+#include <linux/property.h>
 
 #define DRV_NAME "rotary-encoder"
 
 struct rotary_encoder {
 	struct input_dev *input;
-	const struct rotary_encoder_platform_data *pdata;
+
 	struct mutex access_mutex;
 
-	unsigned int axis;
+	u32 steps;
+	u32 axis;
+	bool relative_axis;
+	bool rollover;
+
 	unsigned int pos;
 
 	struct gpio_desc *gpio_a;
@@ -59,31 +62,29 @@ static int rotary_encoder_get_state(struct rotary_encoder *encoder)
 
 static void rotary_encoder_report_event(struct rotary_encoder *encoder)
 {
-	const struct rotary_encoder_platform_data *pdata = encoder->pdata;
-
-	if (pdata->relative_axis) {
+	if (encoder->relative_axis) {
 		input_report_rel(encoder->input,
-				 pdata->axis, encoder->dir ? -1 : 1);
+				 encoder->axis, encoder->dir ? -1 : 1);
 	} else {
 		unsigned int pos = encoder->pos;
 
 		if (encoder->dir) {
 			/* turning counter-clockwise */
-			if (pdata->rollover)
-				pos += pdata->steps;
+			if (encoder->rollover)
+				pos += encoder->steps;
 			if (pos)
 				pos--;
 		} else {
 			/* turning clockwise */
-			if (pdata->rollover || pos < pdata->steps)
+			if (encoder->rollover || pos < encoder->steps)
 				pos++;
 		}
 
-		if (pdata->rollover)
-			pos %= pdata->steps;
+		if (encoder->rollover)
+			pos %= encoder->steps;
 
 		encoder->pos = pos;
-		input_report_abs(encoder->input, pdata->axis, encoder->pos);
+		input_report_abs(encoder->input, encoder->axis, encoder->pos);
 	}
 
 	input_sync(encoder->input);
@@ -204,90 +205,43 @@ out:
 	return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_OF
-static const struct of_device_id rotary_encoder_of_match[] = {
-	{ .compatible = "rotary-encoder", },
-	{ },
-};
-MODULE_DEVICE_TABLE(of, rotary_encoder_of_match);
-
-static struct rotary_encoder_platform_data *rotary_encoder_parse_dt(struct device *dev)
-{
-	const struct of_device_id *of_id =
-				of_match_device(rotary_encoder_of_match, dev);
-	struct device_node *np = dev->of_node;
-	struct rotary_encoder_platform_data *pdata;
-	int error;
-
-	if (!of_id || !np)
-		return NULL;
-
-	pdata = devm_kzalloc(dev, sizeof(struct rotary_encoder_platform_data),
-			     GFP_KERNEL);
-	if (!pdata)
-		return ERR_PTR(-ENOMEM);
-
-	of_property_read_u32(np, "rotary-encoder,steps", &pdata->steps);
-	of_property_read_u32(np, "linux,axis", &pdata->axis);
-
-	pdata->relative_axis =
-		of_property_read_bool(np, "rotary-encoder,relative-axis");
-	pdata->rollover = of_property_read_bool(np, "rotary-encoder,rollover");
-
-	error = of_property_read_u32(np, "rotary-encoder,steps-per-period",
-				     &pdata->steps_per_period);
-	if (error) {
-		/*
-		 * The 'half-period' property has been deprecated, you must use
-		 * 'steps-per-period' and set an appropriate value, but we still
-		 * need to parse it to maintain compatibility.
-		 */
-		if (of_property_read_bool(np, "rotary-encoder,half-period")) {
-			pdata->steps_per_period = 2;
-		} else {
-			/* Fallback to one step per period behavior */
-			pdata->steps_per_period = 1;
-		}
-	}
-
-	pdata->wakeup_source = of_property_read_bool(np, "wakeup-source");
-
-	return pdata;
-}
-#else
-static inline struct rotary_encoder_platform_data *
-rotary_encoder_parse_dt(struct device *dev)
-{
-	return NULL;
-}
-#endif
-
 static int rotary_encoder_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	const struct rotary_encoder_platform_data *pdata = dev_get_platdata(dev);
 	struct rotary_encoder *encoder;
 	struct input_dev *input;
 	irq_handler_t handler;
+	u32 steps_per_period;
 	int err;
 
-	if (!pdata) {
-		pdata = rotary_encoder_parse_dt(dev);
-		if (IS_ERR(pdata))
-			return PTR_ERR(pdata);
-
-		if (!pdata) {
-			dev_err(dev, "missing platform data\n");
-			return -EINVAL;
-		}
-	}
-
 	encoder = devm_kzalloc(dev, sizeof(struct rotary_encoder), GFP_KERNEL);
 	if (!encoder)
 		return -ENOMEM;
 
 	mutex_init(&encoder->access_mutex);
-	encoder->pdata = pdata;
+
+	device_property_read_u32(dev, "rotary-encoder,steps", &encoder->steps);
+
+	err = device_property_read_u32(dev, "rotary-encoder,steps-per-period",
+				       &steps_per_period);
+	if (err) {
+		/*
+		 * The 'half-period' property has been deprecated, you must
+		 * use 'steps-per-period' and set an appropriate value, but
+		 * we still need to parse it to maintain compatibility. If
+		 * neither property is present we fall back to the one step
+		 * per period behavior.
+		 */
+		steps_per_period = device_property_read_bool(dev,
+					"rotary-encoder,half-period") ? 2 : 1;
+	}
+
+	encoder->rollover =
+		device_property_read_bool(dev, "rotary-encoder,rollover");
+
+	device_property_read_u32(dev, "linux,axis", &encoder->axis);
+	encoder->relative_axis =
+		device_property_read_bool(dev, "rotary-encoder,relative-axis");
 
 	encoder->gpio_a = devm_gpiod_get_index(dev, NULL, 0, GPIOD_IN);
 	if (IS_ERR(encoder->gpio_a)) {
@@ -317,12 +271,13 @@ static int rotary_encoder_probe(struct platform_device *pdev)
 	input->id.bustype = BUS_HOST;
 	input->dev.parent = dev;
 
-	if (pdata->relative_axis)
-		input_set_capability(input, EV_REL, pdata->axis);
+	if (encoder->relative_axis)
+		input_set_capability(input, EV_REL, encoder->axis);
 	else
-		input_set_abs_params(input, pdata->axis, 0, pdata->steps, 0, 1);
+		input_set_abs_params(input,
+				     encoder->axis, 0, encoder->steps, 0, 1);
 
-	switch (pdata->steps_per_period) {
+	switch (steps_per_period) {
 	case 4:
 		handler = &rotary_encoder_quarter_period_irq;
 		encoder->last_stable = rotary_encoder_get_state(encoder);
@@ -336,7 +291,7 @@ static int rotary_encoder_probe(struct platform_device *pdev)
 		break;
 	default:
 		dev_err(dev, "'%d' is not a valid steps-per-period value\n",
-			pdata->steps_per_period);
+			steps_per_period);
 		return -EINVAL;
 	}
 
@@ -364,7 +319,8 @@ static int rotary_encoder_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	device_init_wakeup(&pdev->dev, pdata->wakeup_source);
+	device_init_wakeup(dev,
+			   device_property_read_bool(dev, "wakeup-source"));
 
 	platform_set_drvdata(pdev, encoder);
 
@@ -398,6 +354,14 @@ static int __maybe_unused rotary_encoder_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(rotary_encoder_pm_ops,
 			 rotary_encoder_suspend, rotary_encoder_resume);
 
+#ifdef CONFIG_OF
+static const struct of_device_id rotary_encoder_of_match[] = {
+	{ .compatible = "rotary-encoder", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, rotary_encoder_of_match);
+#endif
+
 static struct platform_driver rotary_encoder_driver = {
 	.probe		= rotary_encoder_probe,
 	.driver		= {
diff --git a/include/linux/rotary_encoder.h b/include/linux/rotary_encoder.h
deleted file mode 100644
index 4536c813a1e9..000000000000
--- a/include/linux/rotary_encoder.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ROTARY_ENCODER_H__
-#define __ROTARY_ENCODER_H__
-
-struct rotary_encoder_platform_data {
-	unsigned int steps;
-	unsigned int axis;
-	unsigned int steps_per_period;
-	bool relative_axis;
-	bool rollover;
-	bool wakeup_source;
-};
-
-#endif /* __ROTARY_ENCODER_H__ */
-- 
cgit v1.2.3


From ff20c2e0acc5ad7e27c68592ade135efee399549 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 1 Mar 2016 09:45:14 +0530
Subject: mm: Some arch may want to use HPAGE_PMD related values as variables

With next generation power processor, we are having a new mmu model
[1] that require us to maintain a different linux page table format.

Inorder to support both current and future ppc64 systems with a single
kernel we need to make sure kernel can select between different page
table format at runtime. With the new MMU (radix MMU) added, we will
have two different pmd hugepage size 16MB for hash model and 2MB for
Radix model. Hence make HPAGE_PMD related values as a variable.

Actual conversion of HPAGE_PMD to a variable for ppc64 happens in a
followup patch.

[1] http://ibm.biz/power-isa3 (Needs registration).

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/pgtable_64.c |  7 +++++++
 include/linux/bug.h          |  9 +++++++++
 include/linux/huge_mm.h      |  3 ---
 mm/huge_memory.c             | 17 ++++++++++++++---
 4 files changed, 30 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index af304e6d5a89..0eb53128ca2a 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -817,6 +817,13 @@ pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 
 int has_transparent_hugepage(void)
 {
+
+	BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) >= MAX_ORDER,
+		"hugepages can't be allocated by the buddy allocator");
+
+	BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) < 2,
+			 "We need more than 2 pages to do deferred thp split");
+
 	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
 		return 0;
 	/*
diff --git a/include/linux/bug.h b/include/linux/bug.h
index 7f4818673c41..e51b0709e78d 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -20,6 +20,7 @@ struct pt_regs;
 #define BUILD_BUG_ON_MSG(cond, msg) (0)
 #define BUILD_BUG_ON(condition) (0)
 #define BUILD_BUG() (0)
+#define MAYBE_BUILD_BUG_ON(cond) (0)
 #else /* __CHECKER__ */
 
 /* Force a compilation error if a constant expression is not a power of 2 */
@@ -83,6 +84,14 @@ struct pt_regs;
  */
 #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
 
+#define MAYBE_BUILD_BUG_ON(cond)			\
+	do {						\
+		if (__builtin_constant_p((cond)))       \
+			BUILD_BUG_ON(cond);             \
+		else                                    \
+			BUG_ON(cond);                   \
+	} while (0)
+
 #endif	/* __CHECKER__ */
 
 #ifdef CONFIG_GENERIC_BUG
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 459fd25b378e..f12513a20a06 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -111,9 +111,6 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			__split_huge_pmd(__vma, __pmd, __address);	\
 	}  while (0)
 
-#if HPAGE_PMD_ORDER >= MAX_ORDER
-#error "hugepages can't be allocated by the buddy allocator"
-#endif
 extern int hugepage_madvise(struct vm_area_struct *vma,
 			    unsigned long *vm_flags, int advice);
 extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index aea8f7a42df9..36c22a89df61 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -83,7 +83,7 @@ unsigned long transparent_hugepage_flags __read_mostly =
 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
 
 /* default scan 8*512 pte (or vmas) every 30 second */
-static unsigned int khugepaged_pages_to_scan __read_mostly = HPAGE_PMD_NR*8;
+static unsigned int khugepaged_pages_to_scan __read_mostly;
 static unsigned int khugepaged_pages_collapsed;
 static unsigned int khugepaged_full_scans;
 static unsigned int khugepaged_scan_sleep_millisecs __read_mostly = 10000;
@@ -98,7 +98,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
  * it would have happened if the vma was large enough during page
  * fault.
  */
-static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1;
+static unsigned int khugepaged_max_ptes_none __read_mostly;
 
 static int khugepaged(void *none);
 static int khugepaged_slab_init(void);
@@ -660,6 +660,18 @@ static int __init hugepage_init(void)
 		return -EINVAL;
 	}
 
+	khugepaged_pages_to_scan = HPAGE_PMD_NR * 8;
+	khugepaged_max_ptes_none = HPAGE_PMD_NR - 1;
+	/*
+	 * hugepages can't be allocated by the buddy allocator
+	 */
+	MAYBE_BUILD_BUG_ON(HPAGE_PMD_ORDER >= MAX_ORDER);
+	/*
+	 * we use page->mapping and page->index in second tail page
+	 * as list_head: assuming THP order >= 2
+	 */
+	MAYBE_BUILD_BUG_ON(HPAGE_PMD_ORDER < 2);
+
 	err = hugepage_init_sysfs(&hugepage_kobj);
 	if (err)
 		goto err_sysfs;
@@ -764,7 +776,6 @@ void prep_transhuge_page(struct page *page)
 	 * we use page->mapping and page->indexlru in second tail page
 	 * as list_head: assuming THP order >= 2
 	 */
-	BUILD_BUG_ON(HPAGE_PMD_ORDER < 2);
 
 	INIT_LIST_HEAD(page_deferred_list(page));
 	set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
-- 
cgit v1.2.3


From 88cb04b96a1934ecbfd1d324e7cde55890c1a576 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 3 Mar 2016 09:57:58 -0500
Subject: cgroup: explicitly track whether a cgroup_subsys_state is visible to
 userland

Currently, whether a css (cgroup_subsys_state) has its interface files
created is not tracked and assumed to change together with the owning
cgroup's lifecycle.  cgroup directory and interface creation is being
separated out from internal object creation to help refactoring and
eventually allow cgroups which are not visible through cgroupfs.

This patch adds CSS_VISIBLE to track whether a css has its interface
files created and perform management operations only when necessary
which helps decoupling interface file handling from internal object
lifecycle.  After this patch, all css interface file management
functions can be called regardless of the current state and will
achieve the expected result.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Zefan Li <lizefan@huawei.com>
---
 include/linux/cgroup-defs.h |  1 +
 kernel/cgroup.c             | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 8fc3f0452289..7593c1a46786 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -45,6 +45,7 @@ enum {
 	CSS_NO_REF	= (1 << 0), /* no reference counting for this css */
 	CSS_ONLINE	= (1 << 1), /* between ->css_online() and ->css_offline() */
 	CSS_RELEASED	= (1 << 2), /* refcnt reached zero, released */
+	CSS_VISIBLE	= (1 << 3), /* css is visible to userland */
 };
 
 /* bits in struct cgroup flags field */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4178e45becb4..a9a53ca942f3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1421,6 +1421,11 @@ static void css_clear_dir(struct cgroup_subsys_state *css,
 	struct cgroup *cgrp = cgrp_override ?: css->cgroup;
 	struct cftype *cfts;
 
+	if (!(css->flags & CSS_VISIBLE))
+		return;
+
+	css->flags &= ~CSS_VISIBLE;
+
 	list_for_each_entry(cfts, &css->ss->cfts, node)
 		cgroup_addrm_files(css, cgrp, cfts, false);
 }
@@ -1439,6 +1444,9 @@ static int css_populate_dir(struct cgroup_subsys_state *css,
 	struct cftype *cfts, *failed_cfts;
 	int ret;
 
+	if (css->flags & CSS_VISIBLE)
+		return 0;
+
 	if (!css->ss) {
 		if (cgroup_on_dfl(cgrp))
 			cfts = cgroup_dfl_base_files;
@@ -1455,6 +1463,9 @@ static int css_populate_dir(struct cgroup_subsys_state *css,
 			goto err;
 		}
 	}
+
+	css->flags |= CSS_VISIBLE;
+
 	return 0;
 err:
 	list_for_each_entry(cfts, &css->ss->cfts, node) {
@@ -3403,7 +3414,7 @@ static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
 	css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
 		struct cgroup *cgrp = css->cgroup;
 
-		if (cgroup_is_dead(cgrp))
+		if (!(css->flags & CSS_VISIBLE))
 			continue;
 
 		ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
-- 
cgit v1.2.3


From 15a27c362d54378f17ec078579b2f6af88495a3f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 3 Mar 2016 09:57:59 -0500
Subject: cgroup: introduce cgroup_{save|propagate|restore}_control()

While controllers are being enabled and disabled in
cgroup_subtree_control_write(), the original subsystem masks are
stashed in local variables so that they can be restored if the
operation fails in the middle.

This patch adds dedicated fields to struct cgroup to be used instead
of the local variables and implements functions to stash the current
values, propagate the changes and restore them recursively.  Combined
with the previous changes, this makes subsystem management operations
fully recursive and modularlized.  This will be used to expand cgroup
core functionalities.

While at it, remove now unused @css_enable and @css_disable from
cgroup_subtree_control_write().

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Zefan Li <lizefan@huawei.com>
---
 include/linux/cgroup-defs.h |  2 ++
 kernel/cgroup.c             | 82 ++++++++++++++++++++++++++++++++++-----------
 2 files changed, 64 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 7593c1a46786..aae8c94de4b3 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -260,6 +260,8 @@ struct cgroup {
 	 */
 	u16 subtree_control;
 	u16 subtree_ss_mask;
+	u16 old_subtree_control;
+	u16 old_subtree_ss_mask;
 
 	/* Private pointers for each registered subsystem */
 	struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0398f2a6673b..452a90e455fa 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3031,6 +3031,62 @@ static bool cgroup_drain_offline(struct cgroup *cgrp)
 	return false;
 }
 
+/**
+ * cgroup_save_control - save control masks of a subtree
+ * @cgrp: root of the target subtree
+ *
+ * Save ->subtree_control and ->subtree_ss_mask to the respective old_
+ * prefixed fields for @cgrp's subtree including @cgrp itself.
+ */
+static void cgroup_save_control(struct cgroup *cgrp)
+{
+	struct cgroup *dsct;
+	struct cgroup_subsys_state *d_css;
+
+	cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
+		dsct->old_subtree_control = dsct->subtree_control;
+		dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
+	}
+}
+
+/**
+ * cgroup_propagate_control - refresh control masks of a subtree
+ * @cgrp: root of the target subtree
+ *
+ * For @cgrp and its subtree, ensure ->subtree_ss_mask matches
+ * ->subtree_control and propagate controller availability through the
+ * subtree so that descendants don't have unavailable controllers enabled.
+ */
+static void cgroup_propagate_control(struct cgroup *cgrp)
+{
+	struct cgroup *dsct;
+	struct cgroup_subsys_state *d_css;
+
+	cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
+		dsct->subtree_control &= cgroup_control(dsct);
+		dsct->subtree_ss_mask = cgroup_calc_subtree_ss_mask(dsct,
+							dsct->subtree_control);
+	}
+}
+
+/**
+ * cgroup_restore_control - restore control masks of a subtree
+ * @cgrp: root of the target subtree
+ *
+ * Restore ->subtree_control and ->subtree_ss_mask from the respective old_
+ * prefixed fields for @cgrp's subtree including @cgrp itself.
+ */
+static void cgroup_restore_control(struct cgroup *cgrp)
+{
+	struct cgroup *dsct;
+	struct cgroup_subsys_state *d_css;
+
+	cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
+		dsct->subtree_control = dsct->old_subtree_control;
+		dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
+	}
+}
+
 /**
  * cgroup_apply_control_enable - enable or show csses according to control
  * @cgrp: root of the target subtree
@@ -3119,7 +3175,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 					    loff_t off)
 {
 	u16 enable = 0, disable = 0;
-	u16 css_enable, css_disable, old_sc, new_sc, old_ss, new_ss;
 	struct cgroup *cgrp, *child;
 	struct cgroup_subsys *ss;
 	char *tok;
@@ -3203,25 +3258,14 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 		return restart_syscall();
 	}
 
-	/*
-	 * Update subsys masks and calculate what needs to be done.  More
-	 * subsystems than specified may need to be enabled or disabled
-	 * depending on subsystem dependencies.
-	 */
-	old_sc = cgrp->subtree_control;
-	old_ss = cgrp->subtree_ss_mask;
-	new_sc = (old_sc | enable) & ~disable;
-	new_ss = cgroup_calc_subtree_ss_mask(cgrp, new_sc);
+	/* save and update control masks and prepare csses */
+	cgroup_save_control(cgrp);
 
-	css_enable = ~old_ss & new_ss;
-	css_disable = old_ss & ~new_ss;
-	enable |= css_enable;
-	disable |= css_disable;
+	cgrp->subtree_control |= enable;
+	cgrp->subtree_control &= ~disable;
 
-	cgrp->subtree_control = new_sc;
-	cgrp->subtree_ss_mask = new_ss;
+	cgroup_propagate_control(cgrp);
 
-	/* prepare csses */
 	ret = cgroup_apply_control_enable(cgrp);
 	if (ret)
 		goto err_undo_css;
@@ -3246,9 +3290,7 @@ out_unlock:
 
 err_undo_css:
 	/* restore masks and shoot down new csses */
-	cgrp->subtree_control = old_sc;
-	cgrp->subtree_ss_mask = old_ss;
-
+	cgroup_restore_control(cgrp);
 	cgroup_apply_control_disable(cgrp);
 
 	goto out_unlock;
-- 
cgit v1.2.3


From d5bdec8ddb9f5fac3b351bed463a7132f6ba907b Mon Sep 17 00:00:00 2001
From: Matias Bjørling <m@bjorling.me>
Date: Fri, 19 Feb 2016 13:56:58 +0100
Subject: lightnvm: fold get bb tbl when using dual/quad plane mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the media manager runs in dual or quad plane mode, lightnvm
abstracts away plane specific commands. This poses a problem for
get bad block table, as it reports bad blocks per plane, making the
table either two or four times bigger than expected. Fold the bad block
list before returning.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c      |  2 +-
 drivers/nvme/host/lightnvm.c | 46 +++++++++++++++++++++++++++++++++++++++-----
 include/linux/lightnvm.h     |  6 +++---
 3 files changed, 45 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 782ac5d60a49..968ba7ed4158 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -250,7 +250,7 @@ int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
 		return 0;
 	}
 
-	plane_cnt = (1 << dev->plane_mode);
+	plane_cnt = dev->plane_mode;
 	rqd->nr_pages = plane_cnt * nr_ppas;
 
 	if (dev->ops->max_phys_sect < rqd->nr_pages)
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 5cd3725e2fa4..d4f81f07f296 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -373,8 +373,31 @@ out:
 	return ret;
 }
 
+static void nvme_nvm_bb_tbl_fold(struct nvm_dev *nvmdev,
+						int nr_dst_blks, u8 *dst_blks,
+						int nr_src_blks, u8 *src_blks)
+{
+	int blk, offset, pl, blktype;
+
+	for (blk = 0; blk < nr_dst_blks; blk++) {
+		offset = blk * nvmdev->plane_mode;
+		blktype = src_blks[offset];
+
+		/* Bad blocks on any planes take precedence over other types */
+		for (pl = 0; pl < nvmdev->plane_mode; pl++) {
+			if (src_blks[offset + pl] &
+					(NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) {
+				blktype = src_blks[offset + pl];
+				break;
+			}
+		}
+
+		dst_blks[blk] = blktype;
+	}
+}
+
 static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
-				int nr_blocks, nvm_bb_update_fn *update_bbtbl,
+				int nr_dst_blks, nvm_bb_update_fn *update_bbtbl,
 				void *priv)
 {
 	struct request_queue *q = nvmdev->q;
@@ -382,7 +405,9 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
 	struct nvme_ctrl *ctrl = ns->ctrl;
 	struct nvme_nvm_command c = {};
 	struct nvme_nvm_bb_tbl *bb_tbl;
-	int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks;
+	u8 *dst_blks = NULL;
+	int nr_src_blks = nr_dst_blks * nvmdev->plane_mode;
+	int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_src_blks;
 	int ret = 0;
 
 	c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl;
@@ -393,6 +418,12 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
 	if (!bb_tbl)
 		return -ENOMEM;
 
+	dst_blks = kzalloc(nr_dst_blks, GFP_KERNEL);
+	if (!dst_blks) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
 	ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c,
 								bb_tbl, tblsz);
 	if (ret) {
@@ -414,16 +445,21 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
 		goto out;
 	}
 
-	if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) {
+	if (le32_to_cpu(bb_tbl->tblks) != nr_src_blks) {
 		ret = -EINVAL;
 		dev_err(ctrl->dev, "bbt unsuspected blocks returned (%u!=%u)",
-					le32_to_cpu(bb_tbl->tblks), nr_blocks);
+				le32_to_cpu(bb_tbl->tblks), nr_src_blks);
 		goto out;
 	}
 
+	nvme_nvm_bb_tbl_fold(nvmdev, nr_dst_blks, dst_blks,
+						nr_src_blks, bb_tbl->blk);
+
 	ppa = dev_to_generic_addr(nvmdev, ppa);
-	ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv);
+	ret = update_bbtbl(ppa, nr_dst_blks, dst_blks, priv);
+
 out:
+	kfree(dst_blks);
 	kfree(bb_tbl);
 	return ret;
 }
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index d6750111e48e..7fa1838f7356 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -92,9 +92,9 @@ enum {
 	NVM_ADDRMODE_CHANNEL	= 1,
 
 	/* Plane programming mode for LUN */
-	NVM_PLANE_SINGLE	= 0,
-	NVM_PLANE_DOUBLE	= 1,
-	NVM_PLANE_QUAD		= 2,
+	NVM_PLANE_SINGLE	= 1,
+	NVM_PLANE_DOUBLE	= 2,
+	NVM_PLANE_QUAD		= 4,
 
 	/* Status codes */
 	NVM_RSP_SUCCESS		= 0x0,
-- 
cgit v1.2.3


From 4ece44af733ff63a7cd12aaa8c85afb6d9fdc664 Mon Sep 17 00:00:00 2001
From: Matias Bjørling <m@bjorling.me>
Date: Sat, 20 Feb 2016 08:52:41 +0100
Subject: lightnvm: rename ->nr_pages to ->nr_sects
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The struct rrpc->nr_pages can easily be interpreted as the number of
flash pages allocated to rrpc, while it is the nr_sects. Make sure that
this is reflected from the variable name.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c   |  2 +-
 drivers/lightnvm/gennvm.c |  7 +++----
 drivers/lightnvm/rrpc.c   | 29 ++++++++++++++---------------
 drivers/lightnvm/rrpc.h   |  6 +++---
 include/linux/lightnvm.h  |  2 +-
 5 files changed, 22 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 968ba7ed4158..1cb4b331c3e8 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -467,7 +467,7 @@ static int nvm_core_init(struct nvm_dev *dev)
 				dev->blks_per_lun *
 				dev->luns_per_chnl *
 				dev->nr_chnls;
-	dev->total_pages = dev->total_blocks * dev->pgs_per_blk;
+	dev->total_secs = dev->total_blocks * dev->sec_per_blk;
 	INIT_LIST_HEAD(&dev->online_targets);
 	mutex_init(&dev->mlock);
 
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index 7fb725b16148..d65ec36a2231 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -100,14 +100,13 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 {
 	struct nvm_dev *dev = private;
 	struct gen_nvm *gn = dev->mp;
-	sector_t max_pages = dev->total_pages * (dev->sec_size >> 9);
 	u64 elba = slba + nlb;
 	struct gen_lun *lun;
 	struct nvm_block *blk;
 	u64 i;
 	int lun_id;
 
-	if (unlikely(elba > dev->total_pages)) {
+	if (unlikely(elba > dev->total_secs)) {
 		pr_err("gennvm: L2P data from device is out of bounds!\n");
 		return -EINVAL;
 	}
@@ -115,7 +114,7 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 	for (i = 0; i < nlb; i++) {
 		u64 pba = le64_to_cpu(entries[i]);
 
-		if (unlikely(pba >= max_pages && pba != U64_MAX)) {
+		if (unlikely(pba >= dev->total_secs && pba != U64_MAX)) {
 			pr_err("gennvm: L2P data entry is out of bounds!\n");
 			return -EINVAL;
 		}
@@ -197,7 +196,7 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
 	}
 
 	if (dev->ops->get_l2p_tbl) {
-		ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_pages,
+		ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs,
 							gennvm_block_map, dev);
 		if (ret) {
 			pr_err("gennvm: could not read L2P table.\n");
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index e2710da5cd78..c4d0b04ac521 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -552,7 +552,7 @@ static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
 	struct rrpc_addr *gp;
 	struct rrpc_rev_addr *rev;
 
-	BUG_ON(laddr >= rrpc->nr_pages);
+	BUG_ON(laddr >= rrpc->nr_sects);
 
 	gp = &rrpc->trans_map[laddr];
 	spin_lock(&rrpc->rev_lock);
@@ -721,7 +721,7 @@ static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
 
 	for (i = 0; i < npages; i++) {
 		/* We assume that mapping occurs at 4KB granularity */
-		BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_pages));
+		BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects));
 		gp = &rrpc->trans_map[laddr + i];
 
 		if (gp->rblk) {
@@ -752,7 +752,7 @@ static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
 	if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd))
 		return NVM_IO_REQUEUE;
 
-	BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_pages));
+	BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects));
 	gp = &rrpc->trans_map[laddr];
 
 	if (gp->rblk) {
@@ -1002,11 +1002,10 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
 	struct nvm_dev *dev = rrpc->dev;
 	struct rrpc_addr *addr = rrpc->trans_map + slba;
 	struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
-	sector_t max_pages = dev->total_pages * (dev->sec_size >> 9);
 	u64 elba = slba + nlb;
 	u64 i;
 
-	if (unlikely(elba > dev->total_pages)) {
+	if (unlikely(elba > dev->total_secs)) {
 		pr_err("nvm: L2P data from device is out of bounds!\n");
 		return -EINVAL;
 	}
@@ -1016,7 +1015,7 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
 		/* LNVM treats address-spaces as silos, LBA and PBA are
 		 * equally large and zero-indexed.
 		 */
-		if (unlikely(pba >= max_pages && pba != U64_MAX)) {
+		if (unlikely(pba >= dev->total_secs && pba != U64_MAX)) {
 			pr_err("nvm: L2P data entry is out of bounds!\n");
 			return -EINVAL;
 		}
@@ -1041,16 +1040,16 @@ static int rrpc_map_init(struct rrpc *rrpc)
 	sector_t i;
 	int ret;
 
-	rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_pages);
+	rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_sects);
 	if (!rrpc->trans_map)
 		return -ENOMEM;
 
 	rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr)
-							* rrpc->nr_pages);
+							* rrpc->nr_sects);
 	if (!rrpc->rev_trans_map)
 		return -ENOMEM;
 
-	for (i = 0; i < rrpc->nr_pages; i++) {
+	for (i = 0; i < rrpc->nr_sects; i++) {
 		struct rrpc_addr *p = &rrpc->trans_map[i];
 		struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i];
 
@@ -1062,8 +1061,8 @@ static int rrpc_map_init(struct rrpc *rrpc)
 		return 0;
 
 	/* Bring up the mapping table from device */
-	ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_pages,
-							rrpc_l2p_update, rrpc);
+	ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs, rrpc_l2p_update,
+									rrpc);
 	if (ret) {
 		pr_err("nvm: rrpc: could not read L2P table.\n");
 		return -EINVAL;
@@ -1163,7 +1162,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 		spin_lock_init(&rlun->lock);
 
 		rrpc->total_blocks += dev->blks_per_lun;
-		rrpc->nr_pages += dev->sec_per_lun;
+		rrpc->nr_sects += dev->sec_per_lun;
 
 		rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
 						rrpc->dev->blks_per_lun);
@@ -1216,9 +1215,9 @@ static sector_t rrpc_capacity(void *private)
 
 	/* cur, gc, and two emergency blocks for each lun */
 	reserved = rrpc->nr_luns * dev->max_pages_per_blk * 4;
-	provisioned = rrpc->nr_pages - reserved;
+	provisioned = rrpc->nr_sects - reserved;
 
-	if (reserved > rrpc->nr_pages) {
+	if (reserved > rrpc->nr_sects) {
 		pr_err("rrpc: not enough space available to expose storage.\n");
 		return 0;
 	}
@@ -1381,7 +1380,7 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
 	blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));
 
 	pr_info("nvm: rrpc initialized with %u luns and %llu pages.\n",
-			rrpc->nr_luns, (unsigned long long)rrpc->nr_pages);
+			rrpc->nr_luns, (unsigned long long)rrpc->nr_sects);
 
 	mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10));
 
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index ef13ac7700c8..dfca5c4d26bb 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -104,7 +104,7 @@ struct rrpc {
 	struct rrpc_lun *luns;
 
 	/* calculated values */
-	unsigned long long nr_pages;
+	unsigned long long nr_sects;
 	unsigned long total_blocks;
 
 	/* Write strategy variables. Move these into each for structure for each
@@ -205,7 +205,7 @@ static inline int rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr,
 				 unsigned pages,
 				 struct rrpc_inflight_rq *r)
 {
-	BUG_ON((laddr + pages) > rrpc->nr_pages);
+	BUG_ON((laddr + pages) > rrpc->nr_sects);
 
 	return __rrpc_lock_laddr(rrpc, laddr, pages, r);
 }
@@ -242,7 +242,7 @@ static inline void rrpc_unlock_rq(struct rrpc *rrpc, struct nvm_rq *rqd)
 	struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
 	uint8_t pages = rqd->nr_pages;
 
-	BUG_ON((r->l_start + pages) > rrpc->nr_pages);
+	BUG_ON((r->l_start + pages) > rrpc->nr_sects);
 
 	rrpc_unlock_laddr(rrpc, r);
 }
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 7fa1838f7356..8f8a74328f20 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -337,8 +337,8 @@ struct nvm_dev {
 	int lps_per_blk;
 	int *lptbl;
 
-	unsigned long total_pages;
 	unsigned long total_blocks;
+	unsigned long total_secs;
 	int nr_luns;
 	unsigned max_pages_per_blk;
 
-- 
cgit v1.2.3


From fe99c707fc7bb0baba75b26ed585ee3464612dbe Mon Sep 17 00:00:00 2001
From: David Rivshin <drivshin@allworx.com>
Date: Wed, 2 Mar 2016 16:35:51 -0500
Subject: of: add 'const' for of_property_*_string*() parameter '*np'

The of_property_{read,count,match}_string* family of functions never
modify the struct device_node pointer that is passed in, so there is no
reason for it to be non-const. Equivalent functions for all other types
already take a 'const struct device_node *np'.

Signed-off-by: David Rivshin <drivshin@allworx.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/base.c  | 15 ++++++++-------
 include/linux/of.h | 18 +++++++++---------
 2 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 017dd94f16ea..b299de2b3afa 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1341,10 +1341,10 @@ EXPORT_SYMBOL_GPL(of_property_read_u64_array);
  *
  * The out_string pointer is modified only if a valid string can be decoded.
  */
-int of_property_read_string(struct device_node *np, const char *propname,
+int of_property_read_string(const struct device_node *np, const char *propname,
 				const char **out_string)
 {
-	struct property *prop = of_find_property(np, propname, NULL);
+	const struct property *prop = of_find_property(np, propname, NULL);
 	if (!prop)
 		return -EINVAL;
 	if (!prop->value)
@@ -1365,10 +1365,10 @@ EXPORT_SYMBOL_GPL(of_property_read_string);
  * This function searches a string list property and returns the index
  * of a specific string value.
  */
-int of_property_match_string(struct device_node *np, const char *propname,
+int of_property_match_string(const struct device_node *np, const char *propname,
 			     const char *string)
 {
-	struct property *prop = of_find_property(np, propname, NULL);
+	const struct property *prop = of_find_property(np, propname, NULL);
 	size_t l;
 	int i;
 	const char *p, *end;
@@ -1404,10 +1404,11 @@ EXPORT_SYMBOL_GPL(of_property_match_string);
  * Don't call this function directly. It is a utility helper for the
  * of_property_read_string*() family of functions.
  */
-int of_property_read_string_helper(struct device_node *np, const char *propname,
-				   const char **out_strs, size_t sz, int skip)
+int of_property_read_string_helper(const struct device_node *np,
+				   const char *propname, const char **out_strs,
+				   size_t sz, int skip)
 {
-	struct property *prop = of_find_property(np, propname, NULL);
+	const struct property *prop = of_find_property(np, propname, NULL);
 	int l = 0, i = 0;
 	const char *p, *end;
 
diff --git a/include/linux/of.h b/include/linux/of.h
index dd10626a615f..588fdd5d21ad 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -296,13 +296,13 @@ extern int of_property_read_u64_array(const struct device_node *np,
 				      u64 *out_values,
 				      size_t sz);
 
-extern int of_property_read_string(struct device_node *np,
+extern int of_property_read_string(const struct device_node *np,
 				   const char *propname,
 				   const char **out_string);
-extern int of_property_match_string(struct device_node *np,
+extern int of_property_match_string(const struct device_node *np,
 				    const char *propname,
 				    const char *string);
-extern int of_property_read_string_helper(struct device_node *np,
+extern int of_property_read_string_helper(const struct device_node *np,
 					      const char *propname,
 					      const char **out_strs, size_t sz, int index);
 extern int of_device_is_compatible(const struct device_node *device,
@@ -538,14 +538,14 @@ static inline int of_property_read_u64_array(const struct device_node *np,
 	return -ENOSYS;
 }
 
-static inline int of_property_read_string(struct device_node *np,
+static inline int of_property_read_string(const struct device_node *np,
 					  const char *propname,
 					  const char **out_string)
 {
 	return -ENOSYS;
 }
 
-static inline int of_property_read_string_helper(struct device_node *np,
+static inline int of_property_read_string_helper(const struct device_node *np,
 						 const char *propname,
 						 const char **out_strs, size_t sz, int index)
 {
@@ -571,7 +571,7 @@ static inline int of_property_read_u64(const struct device_node *np,
 	return -ENOSYS;
 }
 
-static inline int of_property_match_string(struct device_node *np,
+static inline int of_property_match_string(const struct device_node *np,
 					   const char *propname,
 					   const char *string)
 {
@@ -773,7 +773,7 @@ static inline int of_property_count_u64_elems(const struct device_node *np,
  *
  * If @out_strs is NULL, the number of strings in the property is returned.
  */
-static inline int of_property_read_string_array(struct device_node *np,
+static inline int of_property_read_string_array(const struct device_node *np,
 						const char *propname, const char **out_strs,
 						size_t sz)
 {
@@ -792,7 +792,7 @@ static inline int of_property_read_string_array(struct device_node *np,
  * does not have a value, and -EILSEQ if the string is not null-terminated
  * within the length of the property data.
  */
-static inline int of_property_count_strings(struct device_node *np,
+static inline int of_property_count_strings(const struct device_node *np,
 					    const char *propname)
 {
 	return of_property_read_string_helper(np, propname, NULL, 0, 0);
@@ -816,7 +816,7 @@ static inline int of_property_count_strings(struct device_node *np,
  *
  * The out_string pointer is modified only if a valid string can be decoded.
  */
-static inline int of_property_read_string_index(struct device_node *np,
+static inline int of_property_read_string_index(const struct device_node *np,
 						const char *propname,
 						int index, const char **output)
 {
-- 
cgit v1.2.3


From 1ca1b6a65ba7771ab8d16971b5fc40f9ba7a0e53 Mon Sep 17 00:00:00 2001
From: John Youn <John.Youn@synopsys.com>
Date: Fri, 5 Feb 2016 17:05:26 -0800
Subject: usb: gadget: Add gadget_is_superspeed_plus()

Add a function to check for SuperSpeedPlus capable gadgets.

Signed-off-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Felipe Balbi <balbi@kernel.org>
---
 include/linux/usb/gadget.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index d82d0068872b..d6a1bdaf11c1 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -728,6 +728,16 @@ static inline int gadget_is_superspeed(struct usb_gadget *g)
 	return g->max_speed >= USB_SPEED_SUPER;
 }
 
+/**
+ * gadget_is_superspeed_plus() - return true if the hardware handles
+ *	superspeed plus
+ * @g: controller that might support superspeed plus
+ */
+static inline int gadget_is_superspeed_plus(struct usb_gadget *g)
+{
+	return g->max_speed >= USB_SPEED_SUPER_PLUS;
+}
+
 /**
  * gadget_is_otg - return true iff the hardware is OTG-ready
  * @g: controller that might have a Mini-AB connector
-- 
cgit v1.2.3


From eaef50c760576bca70b87fdc26eb87a3660529f8 Mon Sep 17 00:00:00 2001
From: John Youn <John.Youn@synopsys.com>
Date: Fri, 5 Feb 2016 17:06:07 -0800
Subject: usb: gadget: Update usb_assign_descriptors for SuperSpeedPlus

Add the 'ssp_descriptors' parameter to the usb_assign_descriptors()
function. This allows a function driver to add descriptors for
SuperSpeedPlus speeds if it supports it.

Also update all uses of this function in the gadget subsystem so that
they pass NULL for the ssp_descriptors parameters.

Signed-off-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Felipe Balbi <balbi@kernel.org>
---
 drivers/usb/gadget/config.c                  | 3 ++-
 drivers/usb/gadget/function/f_acm.c          | 2 +-
 drivers/usb/gadget/function/f_ecm.c          | 2 +-
 drivers/usb/gadget/function/f_eem.c          | 2 +-
 drivers/usb/gadget/function/f_hid.c          | 2 +-
 drivers/usb/gadget/function/f_loopback.c     | 2 +-
 drivers/usb/gadget/function/f_mass_storage.c | 2 +-
 drivers/usb/gadget/function/f_ncm.c          | 2 +-
 drivers/usb/gadget/function/f_obex.c         | 3 ++-
 drivers/usb/gadget/function/f_phonet.c       | 2 +-
 drivers/usb/gadget/function/f_printer.c      | 2 +-
 drivers/usb/gadget/function/f_rndis.c        | 2 +-
 drivers/usb/gadget/function/f_serial.c       | 2 +-
 drivers/usb/gadget/function/f_sourcesink.c   | 2 +-
 drivers/usb/gadget/function/f_subset.c       | 2 +-
 drivers/usb/gadget/function/f_tcm.c          | 2 +-
 drivers/usb/gadget/function/f_uac1.c         | 3 ++-
 drivers/usb/gadget/function/f_uac2.c         | 3 ++-
 include/linux/usb/gadget.h                   | 3 ++-
 19 files changed, 24 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/config.c b/drivers/usb/gadget/config.c
index 0fafa7a1b6f6..62ba4c1b69ec 100644
--- a/drivers/usb/gadget/config.c
+++ b/drivers/usb/gadget/config.c
@@ -163,7 +163,8 @@ EXPORT_SYMBOL_GPL(usb_copy_descriptors);
 int usb_assign_descriptors(struct usb_function *f,
 		struct usb_descriptor_header **fs,
 		struct usb_descriptor_header **hs,
-		struct usb_descriptor_header **ss)
+		struct usb_descriptor_header **ss,
+		struct usb_descriptor_header **ssp)
 {
 	struct usb_gadget *g = f->config->cdev->gadget;
 
diff --git a/drivers/usb/gadget/function/f_acm.c b/drivers/usb/gadget/function/f_acm.c
index 2fa1e80a3ce7..dc13b280db28 100644
--- a/drivers/usb/gadget/function/f_acm.c
+++ b/drivers/usb/gadget/function/f_acm.c
@@ -685,7 +685,7 @@ acm_bind(struct usb_configuration *c, struct usb_function *f)
 	acm_ss_out_desc.bEndpointAddress = acm_fs_out_desc.bEndpointAddress;
 
 	status = usb_assign_descriptors(f, acm_fs_function, acm_hs_function,
-			acm_ss_function);
+			acm_ss_function, NULL);
 	if (status)
 		goto fail;
 
diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c
index 7ad60ee41914..4c488d15b6f6 100644
--- a/drivers/usb/gadget/function/f_ecm.c
+++ b/drivers/usb/gadget/function/f_ecm.c
@@ -786,7 +786,7 @@ ecm_bind(struct usb_configuration *c, struct usb_function *f)
 		fs_ecm_notify_desc.bEndpointAddress;
 
 	status = usb_assign_descriptors(f, ecm_fs_function, ecm_hs_function,
-			ecm_ss_function);
+			ecm_ss_function, NULL);
 	if (status)
 		goto fail;
 
diff --git a/drivers/usb/gadget/function/f_eem.c b/drivers/usb/gadget/function/f_eem.c
index cad35a502d3f..d58bfc32be9e 100644
--- a/drivers/usb/gadget/function/f_eem.c
+++ b/drivers/usb/gadget/function/f_eem.c
@@ -309,7 +309,7 @@ static int eem_bind(struct usb_configuration *c, struct usb_function *f)
 	eem_ss_out_desc.bEndpointAddress = eem_fs_out_desc.bEndpointAddress;
 
 	status = usb_assign_descriptors(f, eem_fs_function, eem_hs_function,
-			eem_ss_function);
+			eem_ss_function, NULL);
 	if (status)
 		goto fail;
 
diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index 99285b416308..51980c50546d 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -646,7 +646,7 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f)
 		hidg_fs_out_ep_desc.bEndpointAddress;
 
 	status = usb_assign_descriptors(f, hidg_fs_descriptors,
-			hidg_hs_descriptors, NULL);
+			hidg_hs_descriptors, NULL, NULL);
 	if (status)
 		goto fail;
 
diff --git a/drivers/usb/gadget/function/f_loopback.c b/drivers/usb/gadget/function/f_loopback.c
index ddc3aad886b7..3a9f8f9c77bd 100644
--- a/drivers/usb/gadget/function/f_loopback.c
+++ b/drivers/usb/gadget/function/f_loopback.c
@@ -211,7 +211,7 @@ autoconf_fail:
 	ss_loop_sink_desc.bEndpointAddress = fs_loop_sink_desc.bEndpointAddress;
 
 	ret = usb_assign_descriptors(f, fs_loopback_descs, hs_loopback_descs,
-			ss_loopback_descs);
+			ss_loopback_descs, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 223ccf89d226..321a6315c696 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -3093,7 +3093,7 @@ static int fsg_bind(struct usb_configuration *c, struct usb_function *f)
 	fsg_ss_bulk_out_comp_desc.bMaxBurst = max_burst;
 
 	ret = usb_assign_descriptors(f, fsg_fs_function, fsg_hs_function,
-			fsg_ss_function);
+			fsg_ss_function, NULL);
 	if (ret)
 		goto autoconf_fail;
 
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index 7ad798ace1e5..97f0a9bc84df 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -1432,7 +1432,7 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f)
 		fs_ncm_notify_desc.bEndpointAddress;
 
 	status = usb_assign_descriptors(f, ncm_fs_function, ncm_hs_function,
-			NULL);
+			NULL, NULL);
 	if (status)
 		goto fail;
 
diff --git a/drivers/usb/gadget/function/f_obex.c b/drivers/usb/gadget/function/f_obex.c
index d6396e0909ee..d43e86cea74f 100644
--- a/drivers/usb/gadget/function/f_obex.c
+++ b/drivers/usb/gadget/function/f_obex.c
@@ -364,7 +364,8 @@ static int obex_bind(struct usb_configuration *c, struct usb_function *f)
 	obex_hs_ep_out_desc.bEndpointAddress =
 		obex_fs_ep_out_desc.bEndpointAddress;
 
-	status = usb_assign_descriptors(f, fs_function, hs_function, NULL);
+	status = usb_assign_descriptors(f, fs_function, hs_function, NULL,
+					NULL);
 	if (status)
 		goto fail;
 
diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c
index 157441dbfeba..0473d619d5bf 100644
--- a/drivers/usb/gadget/function/f_phonet.c
+++ b/drivers/usb/gadget/function/f_phonet.c
@@ -541,7 +541,7 @@ static int pn_bind(struct usb_configuration *c, struct usb_function *f)
 
 	/* Do not try to bind Phonet twice... */
 	status = usb_assign_descriptors(f, fs_pn_function, hs_pn_function,
-			NULL);
+			NULL, NULL);
 	if (status)
 		goto err;
 
diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index 26ccad5d8680..c45104e3a64b 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -1051,7 +1051,7 @@ autoconf_fail:
 	ss_ep_out_desc.bEndpointAddress = fs_ep_out_desc.bEndpointAddress;
 
 	ret = usb_assign_descriptors(f, fs_printer_function,
-			hs_printer_function, ss_printer_function);
+			hs_printer_function, ss_printer_function, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c
index e587767e374c..d99dd9542048 100644
--- a/drivers/usb/gadget/function/f_rndis.c
+++ b/drivers/usb/gadget/function/f_rndis.c
@@ -783,7 +783,7 @@ rndis_bind(struct usb_configuration *c, struct usb_function *f)
 	ss_notify_desc.bEndpointAddress = fs_notify_desc.bEndpointAddress;
 
 	status = usb_assign_descriptors(f, eth_fs_function, eth_hs_function,
-			eth_ss_function);
+			eth_ss_function, NULL);
 	if (status)
 		goto fail;
 
diff --git a/drivers/usb/gadget/function/f_serial.c b/drivers/usb/gadget/function/f_serial.c
index 6bb44d613bab..cb00ada21d9c 100644
--- a/drivers/usb/gadget/function/f_serial.c
+++ b/drivers/usb/gadget/function/f_serial.c
@@ -236,7 +236,7 @@ static int gser_bind(struct usb_configuration *c, struct usb_function *f)
 	gser_ss_out_desc.bEndpointAddress = gser_fs_out_desc.bEndpointAddress;
 
 	status = usb_assign_descriptors(f, gser_fs_function, gser_hs_function,
-			gser_ss_function);
+			gser_ss_function, NULL);
 	if (status)
 		goto fail;
 	dev_dbg(&cdev->gadget->dev, "generic ttyGS%d: %s speed IN/%s OUT/%s\n",
diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c
index 242ba5caffe5..df0189ddfdd5 100644
--- a/drivers/usb/gadget/function/f_sourcesink.c
+++ b/drivers/usb/gadget/function/f_sourcesink.c
@@ -437,7 +437,7 @@ no_iso:
 	ss_iso_sink_desc.bEndpointAddress = fs_iso_sink_desc.bEndpointAddress;
 
 	ret = usb_assign_descriptors(f, fs_source_sink_descs,
-			hs_source_sink_descs, ss_source_sink_descs);
+			hs_source_sink_descs, ss_source_sink_descs, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/usb/gadget/function/f_subset.c b/drivers/usb/gadget/function/f_subset.c
index 829c78de9eba..434b983f3b4c 100644
--- a/drivers/usb/gadget/function/f_subset.c
+++ b/drivers/usb/gadget/function/f_subset.c
@@ -362,7 +362,7 @@ geth_bind(struct usb_configuration *c, struct usb_function *f)
 		fs_subset_out_desc.bEndpointAddress;
 
 	status = usb_assign_descriptors(f, fs_eth_function, hs_eth_function,
-			ss_eth_function);
+			ss_eth_function, NULL);
 	if (status)
 		goto fail;
 
diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c
index bad007b5a190..dfb733047a4c 100644
--- a/drivers/usb/gadget/function/f_tcm.c
+++ b/drivers/usb/gadget/function/f_tcm.c
@@ -2098,7 +2098,7 @@ static int tcm_bind(struct usb_configuration *c, struct usb_function *f)
 	uasp_fs_cmd_desc.bEndpointAddress = uasp_ss_cmd_desc.bEndpointAddress;
 
 	ret = usb_assign_descriptors(f, uasp_fs_function_desc,
-			uasp_hs_function_desc, uasp_ss_function_desc);
+			uasp_hs_function_desc, uasp_ss_function_desc, NULL);
 	if (ret)
 		goto ep_fail;
 
diff --git a/drivers/usb/gadget/function/f_uac1.c b/drivers/usb/gadget/function/f_uac1.c
index 6a2346b99f55..f2ac0cbc29a4 100644
--- a/drivers/usb/gadget/function/f_uac1.c
+++ b/drivers/usb/gadget/function/f_uac1.c
@@ -721,7 +721,8 @@ f_audio_bind(struct usb_configuration *c, struct usb_function *f)
 	status = -ENOMEM;
 
 	/* copy descriptors, and track endpoint copies */
-	status = usb_assign_descriptors(f, f_audio_desc, f_audio_desc, NULL);
+	status = usb_assign_descriptors(f, f_audio_desc, f_audio_desc, NULL,
+					NULL);
 	if (status)
 		goto fail;
 	return 0;
diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
index 044ca79d3cb5..186d4b162524 100644
--- a/drivers/usb/gadget/function/f_uac2.c
+++ b/drivers/usb/gadget/function/f_uac2.c
@@ -1100,7 +1100,8 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn)
 	hs_epout_desc.bEndpointAddress = fs_epout_desc.bEndpointAddress;
 	hs_epin_desc.bEndpointAddress = fs_epin_desc.bEndpointAddress;
 
-	ret = usb_assign_descriptors(fn, fs_audio_desc, hs_audio_desc, NULL);
+	ret = usb_assign_descriptors(fn, fs_audio_desc, hs_audio_desc, NULL,
+				     NULL);
 	if (ret)
 		goto err;
 
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index d6a1bdaf11c1..42527297f218 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -1204,7 +1204,8 @@ struct usb_function;
 int usb_assign_descriptors(struct usb_function *f,
 		struct usb_descriptor_header **fs,
 		struct usb_descriptor_header **hs,
-		struct usb_descriptor_header **ss);
+		struct usb_descriptor_header **ss,
+		struct usb_descriptor_header **ssp);
 void usb_free_all_descriptors(struct usb_function *f);
 
 struct usb_descriptor_header *usb_otg_descriptor_alloc(
-- 
cgit v1.2.3


From f5c61225cf29c4f97e544ad7bd088256303acf97 Mon Sep 17 00:00:00 2001
From: John Youn <John.Youn@synopsys.com>
Date: Fri, 5 Feb 2016 17:06:21 -0800
Subject: usb: gadget: Update function for SuperSpeedPlus

Add a ssp_descriptors member to struct usb_function and handle the
initialization and cleanup of it. This holds the SuperSpeedPlus
descriptors for a function that supports SuperSpeedPlus. This is added
by usb_assign_descriptors().

Signed-off-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Felipe Balbi <balbi@kernel.org>
---
 drivers/usb/gadget/config.c   | 6 ++++++
 include/linux/usb/composite.h | 5 +++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/config.c b/drivers/usb/gadget/config.c
index 62ba4c1b69ec..e6c0542a063b 100644
--- a/drivers/usb/gadget/config.c
+++ b/drivers/usb/gadget/config.c
@@ -183,6 +183,11 @@ int usb_assign_descriptors(struct usb_function *f,
 		if (!f->ss_descriptors)
 			goto err;
 	}
+	if (ssp && gadget_is_superspeed_plus(g)) {
+		f->ssp_descriptors = usb_copy_descriptors(ssp);
+		if (!f->ssp_descriptors)
+			goto err;
+	}
 	return 0;
 err:
 	usb_free_all_descriptors(f);
@@ -195,6 +200,7 @@ void usb_free_all_descriptors(struct usb_function *f)
 	usb_free_descriptors(f->fs_descriptors);
 	usb_free_descriptors(f->hs_descriptors);
 	usb_free_descriptors(f->ss_descriptors);
+	usb_free_descriptors(f->ssp_descriptors);
 }
 EXPORT_SYMBOL_GPL(usb_free_all_descriptors);
 
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 1074b8921a5d..9ff1e465d307 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -126,6 +126,10 @@ struct usb_os_desc_table {
  *	string identifiers assigned during @bind(). If this
  *	pointer is null after initiation, the function will not
  *	be available at super speed.
+ * @ssp_descriptors: Table of super speed plus descriptors, using
+ *	interface and string identifiers assigned during @bind(). If
+ *	this pointer is null after initiation, the function will not
+ *	be available at super speed plus.
  * @config: assigned when @usb_add_function() is called; this is the
  *	configuration with which this function is associated.
  * @os_desc_table: Table of (interface id, os descriptors) pairs. The function
@@ -186,6 +190,7 @@ struct usb_function {
 	struct usb_descriptor_header	**fs_descriptors;
 	struct usb_descriptor_header	**hs_descriptors;
 	struct usb_descriptor_header	**ss_descriptors;
+	struct usb_descriptor_header	**ssp_descriptors;
 
 	struct usb_configuration	*config;
 
-- 
cgit v1.2.3


From 554eead5436401ae3cfdb7d79fca24c14ebab143 Mon Sep 17 00:00:00 2001
From: John Youn <John.Youn@synopsys.com>
Date: Fri, 5 Feb 2016 17:06:35 -0800
Subject: usb: gadget: Update config for SuperSpeedPlus

When a function is added to a configuration with usb_add_function(), the
configuration speed flags are updated. These flags indicate for which
speeds the configuration is valid for.

This patch adds a flag in the configuration for SuperSpeedPlus and
also updates this based on the existence of ssp_descriptors.

Signed-off-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Felipe Balbi <balbi@kernel.org>
---
 drivers/usb/gadget/composite.c | 2 ++
 drivers/usb/gadget/configfs.c  | 1 +
 include/linux/usb/composite.h  | 1 +
 3 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 11d243349ebf..560330f5457c 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -237,6 +237,8 @@ int usb_add_function(struct usb_configuration *config,
 		config->highspeed = true;
 	if (!config->superspeed && function->ss_descriptors)
 		config->superspeed = true;
+	if (!config->superspeed_plus && function->ssp_descriptors)
+		config->superspeed_plus = true;
 
 done:
 	if (value)
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 590c44989e5e..c6cc15ebeed6 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -1229,6 +1229,7 @@ static void purge_configs_funcs(struct gadget_info *gi)
 		}
 		c->next_interface_id = 0;
 		memset(c->interface, 0, sizeof(c->interface));
+		c->superspeed_plus = 0;
 		c->superspeed = 0;
 		c->highspeed = 0;
 		c->fullspeed = 0;
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 9ff1e465d307..2b81b24eb5aa 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -322,6 +322,7 @@ struct usb_configuration {
 	unsigned		superspeed:1;
 	unsigned		highspeed:1;
 	unsigned		fullspeed:1;
+	unsigned		superspeed_plus:1;
 	struct usb_function	*interface[MAX_CONFIG_INTERFACES];
 };
 
-- 
cgit v1.2.3


From de18757e272d0e948bfa3e083a1771887f719c05 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Thu, 7 Jan 2016 18:18:13 +0900
Subject: usb: renesas_usbhs: add R-Car Gen3 power control

Since the usb2 phy driver for gen3 (phy-rcar-gen3-usb2) cannot access
LPSTS and UGCTRL2 registers in the HSUSB module, this driver have to
initialize the registers. So, this patch adds such handling code into
rcar3.c.

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <balbi@kernel.org>
---
 drivers/usb/renesas_usbhs/Makefile |  2 +-
 drivers/usb/renesas_usbhs/common.c | 14 +++++++---
 drivers/usb/renesas_usbhs/rcar3.c  | 54 ++++++++++++++++++++++++++++++++++++++
 drivers/usb/renesas_usbhs/rcar3.h  |  3 +++
 include/linux/usb/renesas_usbhs.h  |  1 +
 5 files changed, 69 insertions(+), 5 deletions(-)
 create mode 100644 drivers/usb/renesas_usbhs/rcar3.c
 create mode 100644 drivers/usb/renesas_usbhs/rcar3.h

(limited to 'include/linux')

diff --git a/drivers/usb/renesas_usbhs/Makefile b/drivers/usb/renesas_usbhs/Makefile
index 9e47f477b6d2..d787d05f6546 100644
--- a/drivers/usb/renesas_usbhs/Makefile
+++ b/drivers/usb/renesas_usbhs/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_USB_RENESAS_USBHS)	+= renesas_usbhs.o
 
-renesas_usbhs-y			:= common.o mod.o pipe.o fifo.o rcar2.o
+renesas_usbhs-y			:= common.o mod.o pipe.o fifo.o rcar2.o rcar3.o
 
 ifneq ($(CONFIG_USB_RENESAS_USBHS_HCD),)
 	renesas_usbhs-y		+= mod_host.o
diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index 5af9ca5d54ab..baeb7d23bf24 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -25,6 +25,7 @@
 #include <linux/sysfs.h>
 #include "common.h"
 #include "rcar2.h"
+#include "rcar3.h"
 
 /*
  *		image of renesas_usbhs
@@ -477,18 +478,16 @@ static const struct of_device_id usbhs_of_match[] = {
 		.data = (void *)USBHS_TYPE_RCAR_GEN2,
 	},
 	{
-		/* Gen3 is compatible with Gen2 */
 		.compatible = "renesas,usbhs-r8a7795",
-		.data = (void *)USBHS_TYPE_RCAR_GEN2,
+		.data = (void *)USBHS_TYPE_RCAR_GEN3,
 	},
 	{
 		.compatible = "renesas,rcar-gen2-usbhs",
 		.data = (void *)USBHS_TYPE_RCAR_GEN2,
 	},
 	{
-		/* Gen3 is compatible with Gen2 */
 		.compatible = "renesas,rcar-gen3-usbhs",
-		.data = (void *)USBHS_TYPE_RCAR_GEN2,
+		.data = (void *)USBHS_TYPE_RCAR_GEN3,
 	},
 	{ },
 };
@@ -578,6 +577,13 @@ static int usbhs_probe(struct platform_device *pdev)
 			priv->dparam.pipe_size = ARRAY_SIZE(usbhsc_new_pipe);
 		}
 		break;
+	case USBHS_TYPE_RCAR_GEN3:
+		priv->pfunc = usbhs_rcar3_ops;
+		if (!priv->dparam.pipe_configs) {
+			priv->dparam.pipe_configs = usbhsc_new_pipe;
+			priv->dparam.pipe_size = ARRAY_SIZE(usbhsc_new_pipe);
+		}
+		break;
 	default:
 		if (!info->platform_callback.get_id) {
 			dev_err(&pdev->dev, "no platform callbacks");
diff --git a/drivers/usb/renesas_usbhs/rcar3.c b/drivers/usb/renesas_usbhs/rcar3.c
new file mode 100644
index 000000000000..38b01f2aeeb0
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/rcar3.c
@@ -0,0 +1,54 @@
+/*
+ * Renesas USB driver R-Car Gen. 3 initialization and power control
+ *
+ * Copyright (C) 2016 Renesas Electronics Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/io.h>
+#include "common.h"
+#include "rcar3.h"
+
+#define LPSTS		0x102
+#define UGCTRL2		0x184	/* 32-bit register */
+
+/* Low Power Status register (LPSTS) */
+#define LPSTS_SUSPM	0x4000
+
+/* USB General control register 2 (UGCTRL2), bit[31:6] should be 0 */
+#define UGCTRL2_RESERVED_3	0x00000001	/* bit[3:0] should be B'0001 */
+#define UGCTRL2_USB0SEL_OTG	0x00000030
+
+void usbhs_write32(struct usbhs_priv *priv, u32 reg, u32 data)
+{
+	iowrite32(data, priv->base + reg);
+}
+
+static int usbhs_rcar3_power_ctrl(struct platform_device *pdev,
+				void __iomem *base, int enable)
+{
+	struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev);
+
+	usbhs_write32(priv, UGCTRL2, UGCTRL2_RESERVED_3 | UGCTRL2_USB0SEL_OTG);
+
+	if (enable)
+		usbhs_bset(priv, LPSTS, LPSTS_SUSPM, LPSTS_SUSPM);
+	else
+		usbhs_bset(priv, LPSTS, LPSTS_SUSPM, 0);
+
+	return 0;
+}
+
+static int usbhs_rcar3_get_id(struct platform_device *pdev)
+{
+	return USBHS_GADGET;
+}
+
+const struct renesas_usbhs_platform_callback usbhs_rcar3_ops = {
+	.power_ctrl = usbhs_rcar3_power_ctrl,
+	.get_id = usbhs_rcar3_get_id,
+};
diff --git a/drivers/usb/renesas_usbhs/rcar3.h b/drivers/usb/renesas_usbhs/rcar3.h
new file mode 100644
index 000000000000..5f850b23ff18
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/rcar3.h
@@ -0,0 +1,3 @@
+#include "common.h"
+
+extern const struct renesas_usbhs_platform_callback usbhs_rcar3_ops;
diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
index 4db191fe8c2c..00a47d058d83 100644
--- a/include/linux/usb/renesas_usbhs.h
+++ b/include/linux/usb/renesas_usbhs.h
@@ -184,6 +184,7 @@ struct renesas_usbhs_driver_param {
 };
 
 #define USBHS_TYPE_RCAR_GEN2	1
+#define USBHS_TYPE_RCAR_GEN3	2
 
 /*
  * option:
-- 
cgit v1.2.3


From 175f712119c57c0f2a9b553211d3edcfc460b484 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 18 Feb 2016 11:34:43 +0100
Subject: usb: gadget: provide interface for legacy gadgets to get UDC name

Since commit 855ed04a3758b205e84b269f92d26ab36ed8e2f7 ("usb: gadget:
udc-core: independent registration of gadgets and gadget drivers") gadget
drivers can not assume that UDC drivers are already available on their
initialization. This broke the HACK, which was used in gadgetfs driver,
to get UDC controller name. This patch removes this hack and replaces it
by additional function in the UDC core (which is usefully only for legacy
drivers, please don't use it in the new code).

Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Felipe Balbi <balbi@kernel.org>
---
 drivers/usb/gadget/legacy/inode.c | 28 +++-------------------------
 drivers/usb/gadget/udc/udc-core.c | 30 ++++++++++++++++++++++++++++++
 include/linux/usb/gadget.h        |  1 +
 3 files changed, 34 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 87fb0fd6aaab..5cdaf0150a4e 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1699,28 +1699,6 @@ static struct usb_gadget_driver gadgetfs_driver = {
 };
 
 /*----------------------------------------------------------------------*/
-
-static void gadgetfs_nop(struct usb_gadget *arg) { }
-
-static int gadgetfs_probe(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver)
-{
-	CHIP = gadget->name;
-	return -EISNAM;
-}
-
-static struct usb_gadget_driver probe_driver = {
-	.max_speed	= USB_SPEED_HIGH,
-	.bind		= gadgetfs_probe,
-	.unbind		= gadgetfs_nop,
-	.setup		= (void *)gadgetfs_nop,
-	.disconnect	= gadgetfs_nop,
-	.driver	= {
-		.name		= "nop",
-	},
-};
-
-
 /* DEVICE INITIALIZATION
  *
  *     fd = open ("/dev/gadget/$CHIP", O_RDWR)
@@ -1971,9 +1949,7 @@ gadgetfs_fill_super (struct super_block *sb, void *opts, int silent)
 	if (the_device)
 		return -ESRCH;
 
-	/* fake probe to determine $CHIP */
-	CHIP = NULL;
-	usb_gadget_probe_driver(&probe_driver);
+	CHIP = usb_get_gadget_udc_name();
 	if (!CHIP)
 		return -ENODEV;
 
@@ -2034,6 +2010,8 @@ gadgetfs_kill_sb (struct super_block *sb)
 		put_dev (the_device);
 		the_device = NULL;
 	}
+	kfree(CHIP);
+	CHIP = NULL;
 }
 
 /*----------------------------------------------------------------------*/
diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index b86a6f03592e..4151597e9d28 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -442,6 +442,36 @@ err1:
 }
 EXPORT_SYMBOL_GPL(usb_add_gadget_udc_release);
 
+/**
+ * usb_get_gadget_udc_name - get the name of the first UDC controller
+ * This functions returns the name of the first UDC controller in the system.
+ * Please note that this interface is usefull only for legacy drivers which
+ * assume that there is only one UDC controller in the system and they need to
+ * get its name before initialization. There is no guarantee that the UDC
+ * of the returned name will be still available, when gadget driver registers
+ * itself.
+ *
+ * Returns pointer to string with UDC controller name on success, NULL
+ * otherwise. Caller should kfree() returned string.
+ */
+char *usb_get_gadget_udc_name(void)
+{
+	struct usb_udc *udc;
+	char *name = NULL;
+
+	/* For now we take the first available UDC */
+	mutex_lock(&udc_lock);
+	list_for_each_entry(udc, &udc_list, list) {
+		if (!udc->driver) {
+			name = kstrdup(udc->gadget->name, GFP_KERNEL);
+			break;
+		}
+	}
+	mutex_unlock(&udc_lock);
+	return name;
+}
+EXPORT_SYMBOL_GPL(usb_get_gadget_udc_name);
+
 /**
  * usb_add_gadget_udc - adds a new gadget to the udc class driver list
  * @parent: the parent device to this udc. Usually the controller
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 42527297f218..dd7c53279e6a 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -1136,6 +1136,7 @@ extern int usb_add_gadget_udc_release(struct device *parent,
 		struct usb_gadget *gadget, void (*release)(struct device *dev));
 extern int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget);
 extern void usb_del_gadget_udc(struct usb_gadget *gadget);
+extern char *usb_get_gadget_udc_name(void);
 
 /*-------------------------------------------------------------------------*/
 
-- 
cgit v1.2.3


From 75a9c82ab9638c13ce0964ed8d89d910597f03b4 Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Fri, 19 Feb 2016 10:04:40 +0800
Subject: usb: gadget: add hnp_polling_support and host_request_flag in
 usb_gadget

Add 2 flags for USB OTG HNP polling, hnp_polling_support is to indicate
if the gadget can support HNP polling, host_request_flag is used for
gadget to store host request information from application, which can be
used to respond to HNP polling from host.

Acked-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Li Jun <jun.li@nxp.com>
Signed-off-by: Felipe Balbi <balbi@kernel.org>
---
 include/linux/usb/gadget.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index dd7c53279e6a..5d4e151c49bf 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -595,6 +595,10 @@ struct usb_gadget_ops {
  *	only supports HNP on a different root port.
  * @b_hnp_enable: OTG device feature flag, indicating that the A-Host
  *	enabled HNP support.
+ * @hnp_polling_support: OTG device feature flag, indicating if the OTG device
+ *	in peripheral mode can support HNP polling.
+ * @host_request_flag: OTG device feature flag, indicating if A-Peripheral
+ *	or B-Peripheral wants to take host role.
  * @quirk_ep_out_aligned_size: epout requires buffer size to be aligned to
  *	MaxPacketSize.
  * @is_selfpowered: if the gadget is self-powered.
@@ -642,6 +646,8 @@ struct usb_gadget {
 	unsigned			b_hnp_enable:1;
 	unsigned			a_hnp_support:1;
 	unsigned			a_alt_hnp_support:1;
+	unsigned			hnp_polling_support:1;
+	unsigned			host_request_flag:1;
 	unsigned			quirk_ep_out_aligned_size:1;
 	unsigned			quirk_altset_not_supp:1;
 	unsigned			quirk_stall_not_supp:1;
-- 
cgit v1.2.3


From ae57e97a95211397b46a40f8cb0a23966c054bc5 Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Fri, 19 Feb 2016 10:04:42 +0800
Subject: usb: common: otg-fsm: add HNP polling support

Adds HNP polling timer when transits to host state, the OTG status
request will be sent to peripheral after timeout, if host request flag
is set, it will switch to peripheral state, otherwise it will repeat HNP
polling every 1.5s and maintain the current session.

Acked-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: Li Jun <jun.li@nxp.com>
Signed-off-by: Felipe Balbi <balbi@kernel.org>
---
 drivers/usb/common/usb-otg-fsm.c | 87 ++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/otg-fsm.h      | 14 +++++++
 2 files changed, 101 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c
index 61d538aa2346..504708f59b93 100644
--- a/drivers/usb/common/usb-otg-fsm.c
+++ b/drivers/usb/common/usb-otg-fsm.c
@@ -78,6 +78,8 @@ static void otg_leave_state(struct otg_fsm *fsm, enum usb_otg_state old_state)
 		fsm->b_srp_done = 0;
 		break;
 	case OTG_STATE_B_PERIPHERAL:
+		if (fsm->otg->gadget)
+			fsm->otg->gadget->host_request_flag = 0;
 		break;
 	case OTG_STATE_B_WAIT_ACON:
 		otg_del_timer(fsm, B_ASE0_BRST);
@@ -107,6 +109,8 @@ static void otg_leave_state(struct otg_fsm *fsm, enum usb_otg_state old_state)
 	case OTG_STATE_A_PERIPHERAL:
 		otg_del_timer(fsm, A_BIDL_ADIS);
 		fsm->a_bidl_adis_tmout = 0;
+		if (fsm->otg->gadget)
+			fsm->otg->gadget->host_request_flag = 0;
 		break;
 	case OTG_STATE_A_WAIT_VFALL:
 		otg_del_timer(fsm, A_WAIT_VFALL);
@@ -120,6 +124,87 @@ static void otg_leave_state(struct otg_fsm *fsm, enum usb_otg_state old_state)
 	}
 }
 
+static void otg_hnp_polling_work(struct work_struct *work)
+{
+	struct otg_fsm *fsm = container_of(to_delayed_work(work),
+				struct otg_fsm, hnp_polling_work);
+	struct usb_device *udev;
+	enum usb_otg_state state = fsm->otg->state;
+	u8 flag;
+	int retval;
+
+	if (state != OTG_STATE_A_HOST && state != OTG_STATE_B_HOST)
+		return;
+
+	udev = usb_hub_find_child(fsm->otg->host->root_hub, 1);
+	if (!udev) {
+		dev_err(fsm->otg->host->controller,
+			"no usb dev connected, can't start HNP polling\n");
+		return;
+	}
+
+	*fsm->host_req_flag = 0;
+	/* Get host request flag from connected USB device */
+	retval = usb_control_msg(udev,
+				usb_rcvctrlpipe(udev, 0),
+				USB_REQ_GET_STATUS,
+				USB_DIR_IN | USB_RECIP_DEVICE,
+				0,
+				OTG_STS_SELECTOR,
+				fsm->host_req_flag,
+				1,
+				USB_CTRL_GET_TIMEOUT);
+	if (retval != 1) {
+		dev_err(&udev->dev, "Get one byte OTG status failed\n");
+		return;
+	}
+
+	flag = *fsm->host_req_flag;
+	if (flag == 0) {
+		/* Continue HNP polling */
+		schedule_delayed_work(&fsm->hnp_polling_work,
+					msecs_to_jiffies(T_HOST_REQ_POLL));
+		return;
+	} else if (flag != HOST_REQUEST_FLAG) {
+		dev_err(&udev->dev, "host request flag %d is invalid\n", flag);
+		return;
+	}
+
+	/* Host request flag is set */
+	if (state == OTG_STATE_A_HOST) {
+		/* Set b_hnp_enable */
+		if (!fsm->otg->host->b_hnp_enable) {
+			retval = usb_control_msg(udev,
+					usb_sndctrlpipe(udev, 0),
+					USB_REQ_SET_FEATURE, 0,
+					USB_DEVICE_B_HNP_ENABLE,
+					0, NULL, 0,
+					USB_CTRL_SET_TIMEOUT);
+			if (retval >= 0)
+				fsm->otg->host->b_hnp_enable = 1;
+		}
+		fsm->a_bus_req = 0;
+	} else if (state == OTG_STATE_B_HOST) {
+		fsm->b_bus_req = 0;
+	}
+
+	otg_statemachine(fsm);
+}
+
+static void otg_start_hnp_polling(struct otg_fsm *fsm)
+{
+	/*
+	 * The memory of host_req_flag should be allocated by
+	 * controller driver, otherwise, hnp polling is not started.
+	 */
+	if (!fsm->host_req_flag)
+		return;
+
+	INIT_DELAYED_WORK(&fsm->hnp_polling_work, otg_hnp_polling_work);
+	schedule_delayed_work(&fsm->hnp_polling_work,
+					msecs_to_jiffies(T_HOST_REQ_POLL));
+}
+
 /* Called when entering a state */
 static int otg_set_state(struct otg_fsm *fsm, enum usb_otg_state new_state)
 {
@@ -169,6 +254,7 @@ static int otg_set_state(struct otg_fsm *fsm, enum usb_otg_state new_state)
 		otg_set_protocol(fsm, PROTO_HOST);
 		usb_bus_start_enum(fsm->otg->host,
 				fsm->otg->host->otg_port);
+		otg_start_hnp_polling(fsm);
 		break;
 	case OTG_STATE_A_IDLE:
 		otg_drv_vbus(fsm, 0);
@@ -203,6 +289,7 @@ static int otg_set_state(struct otg_fsm *fsm, enum usb_otg_state new_state)
 		 */
 		if (!fsm->a_bus_req || fsm->a_suspend_req_inf)
 			otg_add_timer(fsm, A_WAIT_ENUM);
+		otg_start_hnp_polling(fsm);
 		break;
 	case OTG_STATE_A_SUSPEND:
 		otg_drv_vbus(fsm, 1);
diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h
index f728f1854829..3059a9599f53 100644
--- a/include/linux/usb/otg-fsm.h
+++ b/include/linux/usb/otg-fsm.h
@@ -40,6 +40,18 @@
 #define PROTO_HOST	(1)
 #define PROTO_GADGET	(2)
 
+#define OTG_STS_SELECTOR	0xF000	/* OTG status selector, according to
+					 * OTG and EH 2.0 Chapter 6.2.3
+					 * Table:6-4
+					 */
+
+#define HOST_REQUEST_FLAG	1	/* Host request flag, according to
+					 * OTG and EH 2.0 Charpter 6.2.3
+					 * Table:6-5
+					 */
+
+#define T_HOST_REQ_POLL		(1500)	/* 1500ms, HNP polling interval */
+
 enum otg_fsm_timer {
 	/* Standard OTG timers */
 	A_WAIT_VRISE,
@@ -119,6 +131,8 @@ struct otg_fsm {
 	/* Current usb protocol used: 0:undefine; 1:host; 2:client */
 	int protocol;
 	struct mutex lock;
+	u8 *host_req_flag;
+	struct delayed_work hnp_polling_work;
 };
 
 struct otg_fsm_ops {
-- 
cgit v1.2.3


From 9c527f49a7312450923222941ff50e5940004e97 Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Fri, 19 Feb 2016 10:04:48 +0800
Subject: usb: otg-fsm: add B_AIDL_BDIS timer

Add A-idle to B-disconnect timer, B-device detects that bus is idle
for more than TB_AIDL_BDIS min and begins HNP by turning off pullup
on D+. This allows the bus to discharge to the SE0 state.

Acked-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: Li Jun <jun.li@nxp.com>
Signed-off-by: Felipe Balbi <balbi@kernel.org>
---
 include/linux/usb/otg-fsm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h
index 3059a9599f53..24198e16f849 100644
--- a/include/linux/usb/otg-fsm.h
+++ b/include/linux/usb/otg-fsm.h
@@ -60,6 +60,7 @@ enum otg_fsm_timer {
 	A_AIDL_BDIS,
 	B_ASE0_BRST,
 	A_BIDL_ADIS,
+	B_AIDL_BDIS,
 
 	/* Auxiliary timers */
 	B_SE0_SRP,
-- 
cgit v1.2.3


From ead22caf8507c0533aab6b89e26776414b477b6f Mon Sep 17 00:00:00 2001
From: Petr Kulhavy <petr@barix.com>
Date: Wed, 24 Feb 2016 16:27:16 +0100
Subject: usb: musb: core: added missing const qualifier to
 musb_hdrc_platform_data::config

The musb_hdrc_platform_data::config was defined as a non-const pointer.
However some drivers (e.g. the ux500) set up this pointer to point to a
static structure, which is potentially dangerous. Since the musb core
uses the pointer in a read-only manner the const qualifier was added to
protect the content of the config.

Signed-off-by: Petr Kulhavy <petr@barix.com>
Acked-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Felipe Balbi <balbi@kernel.org>
---
 drivers/usb/musb/musb_core.c | 2 +-
 drivers/usb/musb/musb_core.h | 2 +-
 include/linux/usb/musb.h     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index c3791a01ab31..39fd95833eb8 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1901,7 +1901,7 @@ static void musb_recover_from_babble(struct musb *musb)
  */
 
 static struct musb *allocate_instance(struct device *dev,
-		struct musb_hdrc_config *config, void __iomem *mbase)
+		const struct musb_hdrc_config *config, void __iomem *mbase)
 {
 	struct musb		*musb;
 	struct musb_hw_ep	*ep;
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index fd215fb45fd4..b6afe9e43305 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -438,7 +438,7 @@ struct musb {
 	 */
 	unsigned                double_buffer_not_ok:1;
 
-	struct musb_hdrc_config	*config;
+	const struct musb_hdrc_config *config;
 
 	int			xceiv_old_state;
 #ifdef CONFIG_DEBUG_FS
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index 96ddfb7ab018..0b3da40a525e 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -124,7 +124,7 @@ struct musb_hdrc_platform_data {
 	int		(*set_power)(int state);
 
 	/* MUSB configuration-specific details */
-	struct musb_hdrc_config	*config;
+	const struct musb_hdrc_config *config;
 
 	/* Architecture specific board data	*/
 	void		*board_data;
-- 
cgit v1.2.3


From 793b80ef14af56d20c998265287648ad34239b6f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2016 16:03:58 +0100
Subject: vfs: pass a flags argument to vfs_readv/vfs_writev

This way we can set kiocb flags also from the sync read/write path for
the read_iter/write_iter operations.  For now there is no way to pass
flags to plain read/write operations as there is no real need for that,
and all flags passed are explicitly rejected for these files.

Signed-off-by: Milosz Tanski <milosz@adfin.com>
[hch: rebased on top of my kiocb changes]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Stephen Bates <stephen.bates@pmcs.com>
Tested-by: Stephen Bates <stephen.bates@pmcs.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/vfs.c      |  4 ++--
 fs/read_write.c    | 44 ++++++++++++++++++++++++++------------------
 fs/splice.c        |  2 +-
 include/linux/fs.h |  4 ++--
 4 files changed, 31 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 5d2a57e4c03a..d40010e4f1a9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -870,7 +870,7 @@ __be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
 
 	oldfs = get_fs();
 	set_fs(KERNEL_DS);
-	host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
+	host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset, 0);
 	set_fs(oldfs);
 	return nfsd_finish_read(file, count, host_err);
 }
@@ -957,7 +957,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 
 	/* Write the data. */
 	oldfs = get_fs(); set_fs(KERNEL_DS);
-	host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos);
+	host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos, 0);
 	set_fs(oldfs);
 	if (host_err < 0)
 		goto out_nfserr;
diff --git a/fs/read_write.c b/fs/read_write.c
index 324ec271cc4e..7d453c3e1cb6 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -692,11 +692,14 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 EXPORT_SYMBOL(iov_shorten);
 
 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
-		loff_t *ppos, iter_fn_t fn)
+		loff_t *ppos, iter_fn_t fn, int flags)
 {
 	struct kiocb kiocb;
 	ssize_t ret;
 
+	if (flags)
+		return -EOPNOTSUPP;
+
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
 
@@ -708,10 +711,13 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 
 /* Do it by hand, with file-ops */
 static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
-		loff_t *ppos, io_fn_t fn)
+		loff_t *ppos, io_fn_t fn, int flags)
 {
 	ssize_t ret = 0;
 
+	if (flags)
+		return -EOPNOTSUPP;
+
 	while (iov_iter_count(iter)) {
 		struct iovec iovec = iov_iter_iovec(iter);
 		ssize_t nr;
@@ -812,7 +818,8 @@ out:
 
 static ssize_t do_readv_writev(int type, struct file *file,
 			       const struct iovec __user * uvector,
-			       unsigned long nr_segs, loff_t *pos)
+			       unsigned long nr_segs, loff_t *pos,
+			       int flags)
 {
 	size_t tot_len;
 	struct iovec iovstack[UIO_FASTIOV];
@@ -844,9 +851,9 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	}
 
 	if (iter_fn)
-		ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
+		ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
 	else
-		ret = do_loop_readv_writev(file, &iter, pos, fn);
+		ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
 
 	if (type != READ)
 		file_end_write(file);
@@ -863,27 +870,27 @@ out:
 }
 
 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
-		  unsigned long vlen, loff_t *pos)
+		  unsigned long vlen, loff_t *pos, int flags)
 {
 	if (!(file->f_mode & FMODE_READ))
 		return -EBADF;
 	if (!(file->f_mode & FMODE_CAN_READ))
 		return -EINVAL;
 
-	return do_readv_writev(READ, file, vec, vlen, pos);
+	return do_readv_writev(READ, file, vec, vlen, pos, flags);
 }
 
 EXPORT_SYMBOL(vfs_readv);
 
 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
-		   unsigned long vlen, loff_t *pos)
+		   unsigned long vlen, loff_t *pos, int flags)
 {
 	if (!(file->f_mode & FMODE_WRITE))
 		return -EBADF;
 	if (!(file->f_mode & FMODE_CAN_WRITE))
 		return -EINVAL;
 
-	return do_readv_writev(WRITE, file, vec, vlen, pos);
+	return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
 }
 
 EXPORT_SYMBOL(vfs_writev);
@@ -896,7 +903,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
 
 	if (f.file) {
 		loff_t pos = file_pos_read(f.file);
-		ret = vfs_readv(f.file, vec, vlen, &pos);
+		ret = vfs_readv(f.file, vec, vlen, &pos, 0);
 		if (ret >= 0)
 			file_pos_write(f.file, pos);
 		fdput_pos(f);
@@ -916,7 +923,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
 
 	if (f.file) {
 		loff_t pos = file_pos_read(f.file);
-		ret = vfs_writev(f.file, vec, vlen, &pos);
+		ret = vfs_writev(f.file, vec, vlen, &pos, 0);
 		if (ret >= 0)
 			file_pos_write(f.file, pos);
 		fdput_pos(f);
@@ -948,7 +955,7 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
 	if (f.file) {
 		ret = -ESPIPE;
 		if (f.file->f_mode & FMODE_PREAD)
-			ret = vfs_readv(f.file, vec, vlen, &pos);
+			ret = vfs_readv(f.file, vec, vlen, &pos, 0);
 		fdput(f);
 	}
 
@@ -972,7 +979,7 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 	if (f.file) {
 		ret = -ESPIPE;
 		if (f.file->f_mode & FMODE_PWRITE)
-			ret = vfs_writev(f.file, vec, vlen, &pos);
+			ret = vfs_writev(f.file, vec, vlen, &pos, 0);
 		fdput(f);
 	}
 
@@ -986,7 +993,8 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 
 static ssize_t compat_do_readv_writev(int type, struct file *file,
 			       const struct compat_iovec __user *uvector,
-			       unsigned long nr_segs, loff_t *pos)
+			       unsigned long nr_segs, loff_t *pos,
+			       int flags)
 {
 	compat_ssize_t tot_len;
 	struct iovec iovstack[UIO_FASTIOV];
@@ -1018,9 +1026,9 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 	}
 
 	if (iter_fn)
-		ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
+		ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
 	else
-		ret = do_loop_readv_writev(file, &iter, pos, fn);
+		ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
 
 	if (type != READ)
 		file_end_write(file);
@@ -1049,7 +1057,7 @@ static size_t compat_readv(struct file *file,
 	if (!(file->f_mode & FMODE_CAN_READ))
 		goto out;
 
-	ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
+	ret = compat_do_readv_writev(READ, file, vec, vlen, pos, 0);
 
 out:
 	if (ret > 0)
@@ -1126,7 +1134,7 @@ static size_t compat_writev(struct file *file,
 	if (!(file->f_mode & FMODE_CAN_WRITE))
 		goto out;
 
-	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
+	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, 0);
 
 out:
 	if (ret > 0)
diff --git a/fs/splice.c b/fs/splice.c
index 82bc0d64fc38..3dc142637ab5 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -577,7 +577,7 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
 	old_fs = get_fs();
 	set_fs(get_ds());
 	/* The cast to a user pointer is valid due to the set_fs() */
-	res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos);
+	res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0);
 	set_fs(old_fs);
 
 	return res;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1a2046275cdf..6ec87964644f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1712,9 +1712,9 @@ extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *)
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
-		unsigned long, loff_t *);
+		unsigned long, loff_t *, int);
 extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
-		unsigned long, loff_t *);
+		unsigned long, loff_t *, int);
 extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 				   loff_t, size_t, unsigned int);
 extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-- 
cgit v1.2.3


From f17d8b35452cab31a70d224964cd583fb2845449 Mon Sep 17 00:00:00 2001
From: Milosz Tanski <milosz@adfin.com>
Date: Thu, 3 Mar 2016 16:03:59 +0100
Subject: vfs: vfs: Define new syscalls preadv2,pwritev2

New syscalls that take an flag argument.   No flags are added yet in this
patch.

Signed-off-by: Milosz Tanski <milosz@adfin.com>
[hch: rebased on top of my kiocb changes]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Stephen Bates <stephen.bates@pmcs.com>
Tested-by: Stephen Bates <stephen.bates@pmcs.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/read_write.c          | 161 ++++++++++++++++++++++++++++++++++++-----------
 include/linux/compat.h   |   6 ++
 include/linux/syscalls.h |   6 ++
 3 files changed, 138 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/read_write.c b/fs/read_write.c
index 7d453c3e1cb6..e9c9e2a667ce 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -895,15 +895,15 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
 
 EXPORT_SYMBOL(vfs_writev);
 
-SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
-		unsigned long, vlen)
+static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
+			unsigned long vlen, int flags)
 {
 	struct fd f = fdget_pos(fd);
 	ssize_t ret = -EBADF;
 
 	if (f.file) {
 		loff_t pos = file_pos_read(f.file);
-		ret = vfs_readv(f.file, vec, vlen, &pos, 0);
+		ret = vfs_readv(f.file, vec, vlen, &pos, flags);
 		if (ret >= 0)
 			file_pos_write(f.file, pos);
 		fdput_pos(f);
@@ -915,15 +915,15 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
 	return ret;
 }
 
-SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
-		unsigned long, vlen)
+static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
+			 unsigned long vlen, int flags)
 {
 	struct fd f = fdget_pos(fd);
 	ssize_t ret = -EBADF;
 
 	if (f.file) {
 		loff_t pos = file_pos_read(f.file);
-		ret = vfs_writev(f.file, vec, vlen, &pos, 0);
+		ret = vfs_writev(f.file, vec, vlen, &pos, flags);
 		if (ret >= 0)
 			file_pos_write(f.file, pos);
 		fdput_pos(f);
@@ -941,10 +941,9 @@ static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
 	return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
 }
 
-SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
-		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
+			 unsigned long vlen, loff_t pos, int flags)
 {
-	loff_t pos = pos_from_hilo(pos_h, pos_l);
 	struct fd f;
 	ssize_t ret = -EBADF;
 
@@ -955,7 +954,7 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
 	if (f.file) {
 		ret = -ESPIPE;
 		if (f.file->f_mode & FMODE_PREAD)
-			ret = vfs_readv(f.file, vec, vlen, &pos, 0);
+			ret = vfs_readv(f.file, vec, vlen, &pos, flags);
 		fdput(f);
 	}
 
@@ -965,10 +964,9 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
 	return ret;
 }
 
-SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
-		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
+			  unsigned long vlen, loff_t pos, int flags)
 {
-	loff_t pos = pos_from_hilo(pos_h, pos_l);
 	struct fd f;
 	ssize_t ret = -EBADF;
 
@@ -979,7 +977,7 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 	if (f.file) {
 		ret = -ESPIPE;
 		if (f.file->f_mode & FMODE_PWRITE)
-			ret = vfs_writev(f.file, vec, vlen, &pos, 0);
+			ret = vfs_writev(f.file, vec, vlen, &pos, flags);
 		fdput(f);
 	}
 
@@ -989,6 +987,58 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 	return ret;
 }
 
+SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
+		unsigned long, vlen)
+{
+	return do_readv(fd, vec, vlen, 0);
+}
+
+SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
+		unsigned long, vlen)
+{
+	return do_writev(fd, vec, vlen, 0);
+}
+
+SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
+		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+{
+	loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+	return do_preadv(fd, vec, vlen, pos, 0);
+}
+
+SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
+		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
+		int, flags)
+{
+	loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+	if (pos == -1)
+		return do_readv(fd, vec, vlen, flags);
+
+	return do_preadv(fd, vec, vlen, pos, flags);
+}
+
+SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
+		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+{
+	loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+	return do_pwritev(fd, vec, vlen, pos, 0);
+}
+
+SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
+		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
+		int, flags)
+{
+	loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+	if (pos == -1)
+		return do_writev(fd, vec, vlen, flags);
+
+	return do_pwritev(fd, vec, vlen, pos, flags);
+}
+
 #ifdef CONFIG_COMPAT
 
 static ssize_t compat_do_readv_writev(int type, struct file *file,
@@ -1046,7 +1096,7 @@ out:
 
 static size_t compat_readv(struct file *file,
 			   const struct compat_iovec __user *vec,
-			   unsigned long vlen, loff_t *pos)
+			   unsigned long vlen, loff_t *pos, int flags)
 {
 	ssize_t ret = -EBADF;
 
@@ -1057,7 +1107,7 @@ static size_t compat_readv(struct file *file,
 	if (!(file->f_mode & FMODE_CAN_READ))
 		goto out;
 
-	ret = compat_do_readv_writev(READ, file, vec, vlen, pos, 0);
+	ret = compat_do_readv_writev(READ, file, vec, vlen, pos, flags);
 
 out:
 	if (ret > 0)
@@ -1066,9 +1116,9 @@ out:
 	return ret;
 }
 
-COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
-		const struct compat_iovec __user *,vec,
-		compat_ulong_t, vlen)
+static size_t do_compat_readv(compat_ulong_t fd,
+				 const struct compat_iovec __user *vec,
+				 compat_ulong_t vlen, int flags)
 {
 	struct fd f = fdget_pos(fd);
 	ssize_t ret;
@@ -1077,16 +1127,24 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
 	if (!f.file)
 		return -EBADF;
 	pos = f.file->f_pos;
-	ret = compat_readv(f.file, vec, vlen, &pos);
+	ret = compat_readv(f.file, vec, vlen, &pos, flags);
 	if (ret >= 0)
 		f.file->f_pos = pos;
 	fdput_pos(f);
 	return ret;
+
 }
 
-static long __compat_sys_preadv64(unsigned long fd,
+COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
+		const struct compat_iovec __user *,vec,
+		compat_ulong_t, vlen)
+{
+	return do_compat_readv(fd, vec, vlen, 0);
+}
+
+static long do_compat_preadv64(unsigned long fd,
 				  const struct compat_iovec __user *vec,
-				  unsigned long vlen, loff_t pos)
+				  unsigned long vlen, loff_t pos, int flags)
 {
 	struct fd f;
 	ssize_t ret;
@@ -1098,7 +1156,7 @@ static long __compat_sys_preadv64(unsigned long fd,
 		return -EBADF;
 	ret = -ESPIPE;
 	if (f.file->f_mode & FMODE_PREAD)
-		ret = compat_readv(f.file, vec, vlen, &pos);
+		ret = compat_readv(f.file, vec, vlen, &pos, flags);
 	fdput(f);
 	return ret;
 }
@@ -1108,7 +1166,7 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
 		const struct compat_iovec __user *,vec,
 		unsigned long, vlen, loff_t, pos)
 {
-	return __compat_sys_preadv64(fd, vec, vlen, pos);
+	return do_compat_preadv64(fd, vec, vlen, pos, 0);
 }
 #endif
 
@@ -1118,12 +1176,25 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
 {
 	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
 
-	return __compat_sys_preadv64(fd, vec, vlen, pos);
+	return do_compat_preadv64(fd, vec, vlen, pos, 0);
+}
+
+COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
+		const struct compat_iovec __user *,vec,
+		compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
+		int, flags)
+{
+	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+
+	if (pos == -1)
+		return do_compat_readv(fd, vec, vlen, flags);
+
+	return do_compat_preadv64(fd, vec, vlen, pos, flags);
 }
 
 static size_t compat_writev(struct file *file,
 			    const struct compat_iovec __user *vec,
-			    unsigned long vlen, loff_t *pos)
+			    unsigned long vlen, loff_t *pos, int flags)
 {
 	ssize_t ret = -EBADF;
 
@@ -1143,9 +1214,9 @@ out:
 	return ret;
 }
 
-COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
-		const struct compat_iovec __user *, vec,
-		compat_ulong_t, vlen)
+static size_t do_compat_writev(compat_ulong_t fd,
+				  const struct compat_iovec __user* vec,
+				  compat_ulong_t vlen, int flags)
 {
 	struct fd f = fdget_pos(fd);
 	ssize_t ret;
@@ -1154,16 +1225,23 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
 	if (!f.file)
 		return -EBADF;
 	pos = f.file->f_pos;
-	ret = compat_writev(f.file, vec, vlen, &pos);
+	ret = compat_writev(f.file, vec, vlen, &pos, flags);
 	if (ret >= 0)
 		f.file->f_pos = pos;
 	fdput_pos(f);
 	return ret;
 }
 
-static long __compat_sys_pwritev64(unsigned long fd,
+COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
+		const struct compat_iovec __user *, vec,
+		compat_ulong_t, vlen)
+{
+	return do_compat_writev(fd, vec, vlen, 0);
+}
+
+static long do_compat_pwritev64(unsigned long fd,
 				   const struct compat_iovec __user *vec,
-				   unsigned long vlen, loff_t pos)
+				   unsigned long vlen, loff_t pos, int flags)
 {
 	struct fd f;
 	ssize_t ret;
@@ -1175,7 +1253,7 @@ static long __compat_sys_pwritev64(unsigned long fd,
 		return -EBADF;
 	ret = -ESPIPE;
 	if (f.file->f_mode & FMODE_PWRITE)
-		ret = compat_writev(f.file, vec, vlen, &pos);
+		ret = compat_writev(f.file, vec, vlen, &pos, flags);
 	fdput(f);
 	return ret;
 }
@@ -1185,7 +1263,7 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
 		const struct compat_iovec __user *,vec,
 		unsigned long, vlen, loff_t, pos)
 {
-	return __compat_sys_pwritev64(fd, vec, vlen, pos);
+	return do_compat_pwritev64(fd, vec, vlen, pos, 0);
 }
 #endif
 
@@ -1195,8 +1273,21 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
 {
 	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
 
-	return __compat_sys_pwritev64(fd, vec, vlen, pos);
+	return do_compat_pwritev64(fd, vec, vlen, pos, 0);
+}
+
+COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
+		const struct compat_iovec __user *,vec,
+		compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags)
+{
+	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+
+	if (pos == -1)
+		return do_compat_writev(fd, vec, vlen, flags);
+
+	return do_compat_pwritev64(fd, vec, vlen, pos, flags);
 }
+
 #endif
 
 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
diff --git a/include/linux/compat.h b/include/linux/compat.h
index a76c9172b2eb..fe4ccd0c748a 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -340,6 +340,12 @@ asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd,
 asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd,
 		const struct compat_iovec __user *vec,
 		compat_ulong_t vlen, u32 pos_low, u32 pos_high);
+asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd,
+		const struct compat_iovec __user *vec,
+		compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags);
+asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd,
+		const struct compat_iovec __user *vec,
+		compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags);
 
 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
 asmlinkage long compat_sys_preadv64(unsigned long fd,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 185815c96433..d795472c54d8 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -575,8 +575,14 @@ asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf,
 			     size_t count, loff_t pos);
 asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec,
 			   unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
+asmlinkage long sys_preadv2(unsigned long fd, const struct iovec __user *vec,
+			    unsigned long vlen, unsigned long pos_l, unsigned long pos_h,
+			    int flags);
 asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec,
 			    unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
+asmlinkage long sys_pwritev2(unsigned long fd, const struct iovec __user *vec,
+			    unsigned long vlen, unsigned long pos_l, unsigned long pos_h,
+			    int flags);
 asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
 asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode);
 asmlinkage long sys_chdir(const char __user *filename);
-- 
cgit v1.2.3


From 97be7ebe53915af504fb491fb99f064c7cf3cb09 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2016 16:04:01 +0100
Subject: vfs: add the RWF_HIPRI flag for preadv2/pwritev2

This adds a flag that tells the file system that this is a high priority
request for which it's worth to poll the hardware.  The flag is purely
advisory and can be ignored if not supported.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Stephen Bates <stephen.bates@pmcs.com>
Tested-by: Stephen Bates <stephen.bates@pmcs.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/read_write.c         | 6 ++++--
 include/linux/fs.h      | 1 +
 include/uapi/linux/fs.h | 3 +++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/read_write.c b/fs/read_write.c
index e9c9e2a667ce..07c53db04ec1 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -697,10 +697,12 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 	struct kiocb kiocb;
 	ssize_t ret;
 
-	if (flags)
+	if (flags & ~RWF_HIPRI)
 		return -EOPNOTSUPP;
 
 	init_sync_kiocb(&kiocb, filp);
+	if (flags & RWF_HIPRI)
+		kiocb.ki_flags |= IOCB_HIPRI;
 	kiocb.ki_pos = *ppos;
 
 	ret = fn(&kiocb, iter);
@@ -715,7 +717,7 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
 {
 	ssize_t ret = 0;
 
-	if (flags)
+	if (flags & ~RWF_HIPRI)
 		return -EOPNOTSUPP;
 
 	while (iov_iter_count(iter)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6ec87964644f..337de88ff50f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -320,6 +320,7 @@ struct writeback_control;
 #define IOCB_EVENTFD		(1 << 0)
 #define IOCB_APPEND		(1 << 1)
 #define IOCB_DIRECT		(1 << 2)
+#define IOCB_HIPRI		(1 << 3)
 
 struct kiocb {
 	struct file		*ki_filp;
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 41e0433b4a83..847c656729f8 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -305,4 +305,7 @@ struct fsxattr {
 #define SYNC_FILE_RANGE_WRITE		2
 #define SYNC_FILE_RANGE_WAIT_AFTER	4
 
+/* flags for preadv2/pwritev2: */
+#define RWF_HIPRI			0x00000001 /* high priority request, poll if possible */
+
 #endif /* _UAPI_LINUX_FS_H */
-- 
cgit v1.2.3


From 8e0b60b96ba06d826a2b26e23b1986853a4e5291 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2016 16:04:03 +0100
Subject: blk-mq: enable polling support by default

Now that applications need to explicitly ask for polling we can enable it
by default in blk-mq drivers.  Note that this will only have an affect
on driver that supply a poll function, which currently only includes nvme.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Stephen Bates <stephen.bates@pmcs.com>
Tested-by: Stephen Bates <stephen.bates@pmcs.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/blkdev.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 29189aeace19..ff33a1ab58c9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -499,7 +499,8 @@ struct request_queue {
 
 #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
-				 (1 << QUEUE_FLAG_SAME_COMP))
+				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
+				 (1 << QUEUE_FLAG_POLL))
 
 static inline void queue_lockdep_assert_held(struct request_queue *q)
 {
-- 
cgit v1.2.3


From b5d3755a22e0cc4c369c0985aef0c52c2477c1e7 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 4 Mar 2016 11:52:16 +0100
Subject: uapi: define DIV_ROUND_UP for userland

DIV_ROUND_UP is defined in linux/kernel.h only for the kernel.
When ethtool.h is included by a userland app, we got the following error:

include/linux/ethtool.h:1218:8: error: variably modified 'queue_mask' at file scope
  __u32 queue_mask[DIV_ROUND_UP(MAX_NUM_QUEUE, 32)];
        ^

Let's add a common definition in uapi and use it everywhere.

Fixes: ac2c7ad0e5d6 ("net/ethtool: introduce a new ioctl for per queue setting")
CC: Kan Liang <kan.liang@intel.com>
Suggested-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/kernel.h       | 2 +-
 include/uapi/linux/ethtool.h | 3 ++-
 include/uapi/linux/kernel.h  | 1 +
 include/uapi/linux/mroute6.h | 9 ++-------
 4 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f31638c6e873..ac1923957236 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -64,7 +64,7 @@
 #define round_down(x, y) ((x) & ~__round_mask(x, y))
 
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
-#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP
 #define DIV_ROUND_UP_ULL(ll,d) \
 	({ unsigned long long _tmp = (ll)+(d)-1; do_div(_tmp, d); _tmp; })
 
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 37fd6dc33de4..9c22249ebf35 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -13,6 +13,7 @@
 #ifndef _UAPI_LINUX_ETHTOOL_H
 #define _UAPI_LINUX_ETHTOOL_H
 
+#include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/if_ether.h>
 
@@ -1215,7 +1216,7 @@ enum ethtool_sfeatures_retval_bits {
 struct ethtool_per_queue_op {
 	__u32	cmd;
 	__u32	sub_command;
-	__u32	queue_mask[DIV_ROUND_UP(MAX_NUM_QUEUE, 32)];
+	__u32	queue_mask[__KERNEL_DIV_ROUND_UP(MAX_NUM_QUEUE, 32)];
 	char	data[];
 };
 
diff --git a/include/uapi/linux/kernel.h b/include/uapi/linux/kernel.h
index 321e399457f5..466073f0ce46 100644
--- a/include/uapi/linux/kernel.h
+++ b/include/uapi/linux/kernel.h
@@ -9,5 +9,6 @@
 #define __ALIGN_KERNEL(x, a)		__ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
 #define __ALIGN_KERNEL_MASK(x, mask)	(((x) + (mask)) & ~(mask))
 
+#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
 
 #endif /* _UAPI_LINUX_KERNEL_H */
diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h
index ce91215cf7e6..5062fb5751e1 100644
--- a/include/uapi/linux/mroute6.h
+++ b/include/uapi/linux/mroute6.h
@@ -1,6 +1,7 @@
 #ifndef _UAPI__LINUX_MROUTE6_H
 #define _UAPI__LINUX_MROUTE6_H
 
+#include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/sockios.h>
 
@@ -46,14 +47,8 @@ typedef unsigned short mifi_t;
 typedef	__u32		if_mask;
 #define NIFBITS (sizeof(if_mask) * 8)        /* bits per mask */
 
-#if !defined(__KERNEL__)
-#if !defined(DIV_ROUND_UP)
-#define	DIV_ROUND_UP(x,y)	(((x) + ((y) - 1)) / (y))
-#endif
-#endif
-
 typedef struct if_set {
-	if_mask ifs_bits[DIV_ROUND_UP(IF_SETSIZE, NIFBITS)];
+	if_mask ifs_bits[__KERNEL_DIV_ROUND_UP(IF_SETSIZE, NIFBITS)];
 } if_set;
 
 #define IF_SET(n, p)    ((p)->ifs_bits[(n)/NIFBITS] |= (1 << ((n) % NIFBITS)))
-- 
cgit v1.2.3


From b821957a5ae76994eebf9eed3247be0ba5775c30 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 3 Mar 2016 00:48:30 +0000
Subject: regmap: replace regmap_write_bits()

commit 23b92e4cf5fd ("regmap: remove regmap_write_bits()")
removed regmap_write_bits(), but MFD driver was using it.
So, commit e30fccd6771d ("regmap: Keep regmap_write_bits()")
turns out it, but it is using original style.
This patch uses regmap_update_bits_base() for regmap_write_bits()

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 23 -----------------------
 include/linux/regmap.h       | 12 +++---------
 2 files changed, 3 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index c7d4a636778d..3fb04c36ae5e 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2650,29 +2650,6 @@ int regmap_update_bits_base(struct regmap *map, unsigned int reg,
 }
 EXPORT_SYMBOL_GPL(regmap_update_bits_base);
 
-/**
- * regmap_write_bits: Perform a read/modify/write cycle on the register map
- *
- * @map: Register map to update
- * @reg: Register to update
- * @mask: Bitmask to change
- * @val: New value for bitmask
- *
- * Returns zero for success, a negative number on error.
- */
-int regmap_write_bits(struct regmap *map, unsigned int reg,
-		      unsigned int mask, unsigned int val)
-{
-	int ret;
-
-	map->lock(map->lock_arg);
-	ret = _regmap_update_bits(map, reg, mask, val, NULL, true);
-	map->unlock(map->lock_arg);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(regmap_write_bits);
-
 void regmap_async_complete_cb(struct regmap_async *async, int ret)
 {
 	struct regmap *map = async->map;
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index e0960b3ff290..0744c9fea24c 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -74,6 +74,9 @@ struct reg_sequence {
 #define	regmap_update_bits_check_async(map, reg, mask, val, change)\
 	regmap_update_bits_base(map, reg, mask, val, change, true, false)
 
+#define	regmap_write_bits(map, reg, mask, val) \
+	regmap_update_bits_base(map, reg, mask, val, NULL, false, true)
+
 #define	regmap_field_write(field, val) \
 	regmap_field_update_bits_base(field, ~0, val, NULL, false, false)
 #define	regmap_field_force_write(field, val) \
@@ -721,8 +724,6 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 int regmap_update_bits_base(struct regmap *map, unsigned int reg,
 			    unsigned int mask, unsigned int val,
 			    bool *change, bool async, bool force);
-int regmap_write_bits(struct regmap *map, unsigned int reg,
-		       unsigned int mask, unsigned int val);
 int regmap_get_val_bytes(struct regmap *map);
 int regmap_get_max_register(struct regmap *map);
 int regmap_get_reg_stride(struct regmap *map);
@@ -961,13 +962,6 @@ static inline int regmap_update_bits_base(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
-static inline int regmap_write_bits(struct regmap *map, unsigned int reg,
-				     unsigned int mask, unsigned int val)
-{
-	WARN_ONCE(1, "regmap API is disabled");
-	return -EINVAL;
-}
-
 static inline int regmap_field_update_bits_base(struct regmap_field *field,
 					unsigned int mask, unsigned int val,
 					bool *change, bool async, bool force)
-- 
cgit v1.2.3


From 045b98480cbe9d8dfe80983013591769f8a112cc Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Wed, 10 Feb 2016 14:29:50 +0530
Subject: regmap: irq: add devm apis for regmap_{add,del}_irq_chip

Add device managed APIs for regmap_add_irq_chip() and
regmap_del_irq_chip() so that it can be managed by
device framework for freeing it.

This helps on following:
1. Maintaining the sequence of resource allocation and deallocation
	regmap_add_irq_chip(&d);
	devm_requested_threaded_irq(virq)

	On free path:
		regmap_del_irq_chip(d);
		and then removing the irq registration.

	On this case, regmap irq is deleted before the irq is free.
	This force to use normal irq registration.

	By using devm apis, the sequence can be maintain properly:
		devm_regmap_add_irq_chip(&d);
		devm_requested_threaded_irq(virq);

	and resource deallocation will be done in reverse order
	by device framework.

2. No need to delete the regmap_irq_chip in error path or remove
   callback and hence there is less code on this path.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-irq.c | 82 ++++++++++++++++++++++++++++++++++++++++
 include/linux/regmap.h           |  8 ++++
 2 files changed, 90 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 9b0d202414d0..0b0ea3b88a91 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -674,6 +674,88 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d)
 }
 EXPORT_SYMBOL_GPL(regmap_del_irq_chip);
 
+static void devm_regmap_irq_chip_release(struct device *dev, void *res)
+{
+	struct regmap_irq_chip_data *d = *(struct regmap_irq_chip_data **)res;
+
+	regmap_del_irq_chip(d->irq, d);
+}
+
+static int devm_regmap_irq_chip_match(struct device *dev, void *res, void *data)
+
+{
+	struct regmap_irq_chip_data **r = res;
+
+	if (!r || !*r) {
+		WARN_ON(!r || !*r);
+		return 0;
+	}
+	return *r == data;
+}
+
+/**
+ * devm_regmap_add_irq_chip(): Resource manager regmap_add_irq_chip()
+ *
+ * @dev:       The device pointer on which irq_chip belongs to.
+ * @map:       The regmap for the device.
+ * @irq:       The IRQ the device uses to signal interrupts
+ * @irq_flags: The IRQF_ flags to use for the primary interrupt.
+ * @chip:      Configuration for the interrupt controller.
+ * @data:      Runtime data structure for the controller, allocated on success
+ *
+ * Returns 0 on success or an errno on failure.
+ *
+ * The regmap_irq_chip data automatically be released when the device is
+ * unbound.
+ */
+int devm_regmap_add_irq_chip(struct device *dev, struct regmap *map, int irq,
+			     int irq_flags, int irq_base,
+			     const struct regmap_irq_chip *chip,
+			     struct regmap_irq_chip_data **data)
+{
+	struct regmap_irq_chip_data **ptr, *d;
+	int ret;
+
+	ptr = devres_alloc(devm_regmap_irq_chip_release, sizeof(*ptr),
+			   GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = regmap_add_irq_chip(map, irq, irq_flags, irq_base,
+				  chip, &d);
+	if (ret < 0) {
+		devres_free(ptr);
+		return ret;
+	}
+
+	*ptr = d;
+	devres_add(dev, ptr);
+	*data = d;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devm_regmap_add_irq_chip);
+
+/**
+ * devm_regmap_del_irq_chip(): Resource managed regmap_del_irq_chip()
+ *
+ * @dev: Device for which which resource was allocated.
+ * @irq: Primary IRQ for the device
+ * @d:   regmap_irq_chip_data allocated by regmap_add_irq_chip()
+ */
+void devm_regmap_del_irq_chip(struct device *dev, int irq,
+			      struct regmap_irq_chip_data *data)
+{
+	int rc;
+
+	WARN_ON(irq != data->irq);
+	rc = devres_release(dev, devm_regmap_irq_chip_release,
+			    devm_regmap_irq_chip_match, data);
+
+	if (rc != 0)
+		WARN_ON(rc);
+}
+EXPORT_SYMBOL_GPL(devm_regmap_del_irq_chip);
+
 /**
  * regmap_irq_chip_get_base(): Retrieve interrupt base for a regmap IRQ chip
  *
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 18394343f489..de428962bfe6 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -868,6 +868,14 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 			int irq_base, const struct regmap_irq_chip *chip,
 			struct regmap_irq_chip_data **data);
 void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *data);
+
+int devm_regmap_add_irq_chip(struct device *dev, struct regmap *map, int irq,
+			     int irq_flags, int irq_base,
+			     const struct regmap_irq_chip *chip,
+			     struct regmap_irq_chip_data **data);
+void devm_regmap_del_irq_chip(struct device *dev, int irq,
+			      struct regmap_irq_chip_data *data);
+
 int regmap_irq_chip_get_base(struct regmap_irq_chip_data *data);
 int regmap_irq_get_virq(struct regmap_irq_chip_data *data, int irq);
 struct irq_domain *regmap_irq_get_domain(struct regmap_irq_chip_data *data);
-- 
cgit v1.2.3


From d17799f9c10e283cccd4d598d3416e6fac336ab9 Mon Sep 17 00:00:00 2001
From: chenhui zhao <chenhui.zhao@freescale.com>
Date: Fri, 20 Nov 2015 17:13:59 +0800
Subject: powerpc/rcpm: add RCPM driver

There is a RCPM (Run Control/Power Management) in Freescale QorIQ
series processors. The device performs tasks associated with device
run control and power management.

The driver implements some features: mask/unmask irq, enter/exit low
power states, freeze time base, etc.

Signed-off-by: Chenhui Zhao <chenhui.zhao@freescale.com>
Signed-off-by: Tang Yuantian <Yuantian.Tang@freescale.com>
[scottwood: remove __KERNEL__ ifdef]
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/include/asm/cputhreads.h |   1 +
 arch/powerpc/include/asm/fsl_pm.h     |  51 +++++
 arch/powerpc/kernel/head_64.S         |  19 ++
 arch/powerpc/platforms/85xx/Kconfig   |   1 +
 arch/powerpc/platforms/85xx/common.c  |   3 +
 arch/powerpc/sysdev/Kconfig           |   5 +
 arch/powerpc/sysdev/Makefile          |   1 +
 arch/powerpc/sysdev/fsl_rcpm.c        | 385 ++++++++++++++++++++++++++++++++++
 include/linux/fsl/guts.h              | 105 ++++++++++
 9 files changed, 571 insertions(+)
 create mode 100644 arch/powerpc/include/asm/fsl_pm.h
 create mode 100644 arch/powerpc/sysdev/fsl_rcpm.c

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index ea9623147b87..81f5b338c76d 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -103,6 +103,7 @@ static inline u32 get_tensr(void)
 	return 1;
 }
 
+void book3e_stop_thread(int thread);
 
 #endif /* _ASM_POWERPC_CPUTHREADS_H */
 
diff --git a/arch/powerpc/include/asm/fsl_pm.h b/arch/powerpc/include/asm/fsl_pm.h
new file mode 100644
index 000000000000..47df55e36d4f
--- /dev/null
+++ b/arch/powerpc/include/asm/fsl_pm.h
@@ -0,0 +1,51 @@
+/*
+ * Support Power Management
+ *
+ * Copyright 2014-2015 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#ifndef __PPC_FSL_PM_H
+#define __PPC_FSL_PM_H
+
+#define E500_PM_PH10	1
+#define E500_PM_PH15	2
+#define E500_PM_PH20	3
+#define E500_PM_PH30	4
+#define E500_PM_DOZE	E500_PM_PH10
+#define E500_PM_NAP	E500_PM_PH15
+
+#define PLAT_PM_SLEEP	20
+#define PLAT_PM_LPM20	30
+
+#define FSL_PM_SLEEP		(1 << 0)
+#define FSL_PM_DEEP_SLEEP	(1 << 1)
+
+struct fsl_pm_ops {
+	/* mask pending interrupts to the RCPM from MPIC */
+	void (*irq_mask)(int cpu);
+
+	/* unmask pending interrupts to the RCPM from MPIC */
+	void (*irq_unmask)(int cpu);
+	void (*cpu_enter_state)(int cpu, int state);
+	void (*cpu_exit_state)(int cpu, int state);
+	void (*cpu_up_prepare)(int cpu);
+	void (*cpu_die)(int cpu);
+	int (*plat_enter_sleep)(void);
+	void (*freeze_time_base)(bool freeze);
+
+	/* keep the power of IP blocks during sleep/deep sleep */
+	void (*set_ip_power)(bool enable, u32 mask);
+
+	/* get platform supported power management modes */
+	unsigned int (*get_pm_modes)(void);
+};
+
+extern const struct fsl_pm_ops *qoriq_pm_ops;
+
+int __init fsl_rcpm_init(void);
+
+#endif /* __PPC_FSL_PM_H */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 1b779560728f..603625368ceb 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -181,6 +181,25 @@ exception_marker:
 #endif
 
 #ifdef CONFIG_PPC_BOOK3E
+/*
+ * stop a thread in the same core
+ * input parameter:
+ * r3 = the thread physical id
+ */
+_GLOBAL(book3e_stop_thread)
+	cmpi	0, r3, 0
+	beq	10f
+	cmpi	0, r3, 1
+	beq	10f
+	/* If the thread id is invalid, just exit. */
+	b	13f
+10:
+	li	r4, 1
+	sld	r4, r4, r3
+	mtspr	SPRN_TENC, r4
+13:
+	blr
+
 _GLOBAL(fsl_secondary_thread_init)
 	mfspr	r4,SPRN_BUCSR
 
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index 97915feffd42..e626461a63bd 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -8,6 +8,7 @@ menuconfig FSL_SOC_BOOKE
 	select FSL_PCI if PCI
 	select SERIAL_8250_EXTENDED if SERIAL_8250
 	select SERIAL_8250_SHARE_IRQ if SERIAL_8250
+	select FSL_CORENET_RCPM if PPC_E500MC
 	default y
 
 if FSL_SOC_BOOKE
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index 949f22c86e61..28720a4ded7b 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -9,11 +9,14 @@
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 
+#include <asm/fsl_pm.h>
 #include <soc/fsl/qe/qe.h>
 #include <sysdev/cpm2_pic.h>
 
 #include "mpc85xx.h"
 
+const struct fsl_pm_ops *qoriq_pm_ops;
+
 static const struct of_device_id mpc85xx_common_ids[] __initconst = {
 	{ .type = "soc", },
 	{ .compatible = "soc", },
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index a19332a38715..52dc165c0efb 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -40,3 +40,8 @@ config SCOM_DEBUGFS
 config GE_FPGA
 	bool
 	default n
+
+config FSL_CORENET_RCPM
+	bool
+	help
+	  This option enables support for RCPM (Run Control/Power Management).
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index bd6bd729969c..a254824719f1 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_MMIO_NVRAM)	+= mmio_nvram.o
 obj-$(CONFIG_FSL_SOC)		+= fsl_soc.o fsl_mpic_err.o
 obj-$(CONFIG_FSL_PCI)		+= fsl_pci.o $(fsl-msi-obj-y)
 obj-$(CONFIG_FSL_PMC)		+= fsl_pmc.o
+obj-$(CONFIG_FSL_CORENET_RCPM)	+= fsl_rcpm.o
 obj-$(CONFIG_FSL_LBC)		+= fsl_lbc.o
 obj-$(CONFIG_FSL_GTM)		+= fsl_gtm.o
 obj-$(CONFIG_FSL_85XX_CACHE_SRAM)	+= fsl_85xx_l2ctlr.o fsl_85xx_cache_sram.o
diff --git a/arch/powerpc/sysdev/fsl_rcpm.c b/arch/powerpc/sysdev/fsl_rcpm.c
new file mode 100644
index 000000000000..656d9ca057e5
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_rcpm.c
@@ -0,0 +1,385 @@
+/*
+ * RCPM(Run Control/Power Management) support
+ *
+ * Copyright 2012-2015 Freescale Semiconductor Inc.
+ *
+ * Author: Chenhui Zhao <chenhui.zhao@freescale.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/of_address.h>
+#include <linux/export.h>
+
+#include <asm/io.h>
+#include <linux/fsl/guts.h>
+#include <asm/cputhreads.h>
+#include <asm/fsl_pm.h>
+
+static struct ccsr_rcpm_v1 __iomem *rcpm_v1_regs;
+static struct ccsr_rcpm_v2 __iomem *rcpm_v2_regs;
+static unsigned int fsl_supported_pm_modes;
+
+static void rcpm_v1_irq_mask(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	setbits32(&rcpm_v1_regs->cpmimr, mask);
+	setbits32(&rcpm_v1_regs->cpmcimr, mask);
+	setbits32(&rcpm_v1_regs->cpmmcmr, mask);
+	setbits32(&rcpm_v1_regs->cpmnmimr, mask);
+}
+
+static void rcpm_v2_irq_mask(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	setbits32(&rcpm_v2_regs->tpmimr0, mask);
+	setbits32(&rcpm_v2_regs->tpmcimr0, mask);
+	setbits32(&rcpm_v2_regs->tpmmcmr0, mask);
+	setbits32(&rcpm_v2_regs->tpmnmimr0, mask);
+}
+
+static void rcpm_v1_irq_unmask(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	clrbits32(&rcpm_v1_regs->cpmimr, mask);
+	clrbits32(&rcpm_v1_regs->cpmcimr, mask);
+	clrbits32(&rcpm_v1_regs->cpmmcmr, mask);
+	clrbits32(&rcpm_v1_regs->cpmnmimr, mask);
+}
+
+static void rcpm_v2_irq_unmask(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	clrbits32(&rcpm_v2_regs->tpmimr0, mask);
+	clrbits32(&rcpm_v2_regs->tpmcimr0, mask);
+	clrbits32(&rcpm_v2_regs->tpmmcmr0, mask);
+	clrbits32(&rcpm_v2_regs->tpmnmimr0, mask);
+}
+
+static void rcpm_v1_set_ip_power(bool enable, u32 mask)
+{
+	if (enable)
+		setbits32(&rcpm_v1_regs->ippdexpcr, mask);
+	else
+		clrbits32(&rcpm_v1_regs->ippdexpcr, mask);
+}
+
+static void rcpm_v2_set_ip_power(bool enable, u32 mask)
+{
+	if (enable)
+		setbits32(&rcpm_v2_regs->ippdexpcr[0], mask);
+	else
+		clrbits32(&rcpm_v2_regs->ippdexpcr[0], mask);
+}
+
+static void rcpm_v1_cpu_enter_state(int cpu, int state)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	switch (state) {
+	case E500_PM_PH10:
+		setbits32(&rcpm_v1_regs->cdozcr, mask);
+		break;
+	case E500_PM_PH15:
+		setbits32(&rcpm_v1_regs->cnapcr, mask);
+		break;
+	default:
+		pr_warn("Unknown cpu PM state (%d)\n", state);
+		break;
+	}
+}
+
+static void rcpm_v2_cpu_enter_state(int cpu, int state)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	u32 mask = 1 << cpu_core_index_of_thread(cpu);
+
+	switch (state) {
+	case E500_PM_PH10:
+		/* one bit corresponds to one thread for PH10 of 6500 */
+		setbits32(&rcpm_v2_regs->tph10setr0, 1 << hw_cpu);
+		break;
+	case E500_PM_PH15:
+		setbits32(&rcpm_v2_regs->pcph15setr, mask);
+		break;
+	case E500_PM_PH20:
+		setbits32(&rcpm_v2_regs->pcph20setr, mask);
+		break;
+	case E500_PM_PH30:
+		setbits32(&rcpm_v2_regs->pcph30setr, mask);
+		break;
+	default:
+		pr_warn("Unknown cpu PM state (%d)\n", state);
+	}
+}
+
+static void rcpm_v1_cpu_die(int cpu)
+{
+	rcpm_v1_cpu_enter_state(cpu, E500_PM_PH15);
+}
+
+#ifdef CONFIG_PPC64
+static void qoriq_disable_thread(int cpu)
+{
+	int thread = cpu_thread_in_core(cpu);
+
+	book3e_stop_thread(thread);
+}
+#endif
+
+static void rcpm_v2_cpu_die(int cpu)
+{
+#ifdef CONFIG_PPC64
+	int primary;
+
+	if (threads_per_core == 2) {
+		primary = cpu_first_thread_sibling(cpu);
+		if (cpu_is_offline(primary) && cpu_is_offline(primary + 1)) {
+			/* if both threads are offline, put the cpu in PH20 */
+			rcpm_v2_cpu_enter_state(cpu, E500_PM_PH20);
+		} else {
+			/* if only one thread is offline, disable the thread */
+			qoriq_disable_thread(cpu);
+		}
+	}
+#endif
+
+	if (threads_per_core == 1)
+		rcpm_v2_cpu_enter_state(cpu, E500_PM_PH20);
+}
+
+static void rcpm_v1_cpu_exit_state(int cpu, int state)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	switch (state) {
+	case E500_PM_PH10:
+		clrbits32(&rcpm_v1_regs->cdozcr, mask);
+		break;
+	case E500_PM_PH15:
+		clrbits32(&rcpm_v1_regs->cnapcr, mask);
+		break;
+	default:
+		pr_warn("Unknown cpu PM state (%d)\n", state);
+		break;
+	}
+}
+
+static void rcpm_v1_cpu_up_prepare(int cpu)
+{
+	rcpm_v1_cpu_exit_state(cpu, E500_PM_PH15);
+	rcpm_v1_irq_unmask(cpu);
+}
+
+static void rcpm_v2_cpu_exit_state(int cpu, int state)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	u32 mask = 1 << cpu_core_index_of_thread(cpu);
+
+	switch (state) {
+	case E500_PM_PH10:
+		setbits32(&rcpm_v2_regs->tph10clrr0, 1 << hw_cpu);
+		break;
+	case E500_PM_PH15:
+		setbits32(&rcpm_v2_regs->pcph15clrr, mask);
+		break;
+	case E500_PM_PH20:
+		setbits32(&rcpm_v2_regs->pcph20clrr, mask);
+		break;
+	case E500_PM_PH30:
+		setbits32(&rcpm_v2_regs->pcph30clrr, mask);
+		break;
+	default:
+		pr_warn("Unknown cpu PM state (%d)\n", state);
+	}
+}
+
+static void rcpm_v2_cpu_up_prepare(int cpu)
+{
+	rcpm_v2_cpu_exit_state(cpu, E500_PM_PH20);
+	rcpm_v2_irq_unmask(cpu);
+}
+
+static int rcpm_v1_plat_enter_state(int state)
+{
+	u32 *pmcsr_reg = &rcpm_v1_regs->powmgtcsr;
+	int ret = 0;
+	int result;
+
+	switch (state) {
+	case PLAT_PM_SLEEP:
+		setbits32(pmcsr_reg, RCPM_POWMGTCSR_SLP);
+
+		/* Upon resume, wait for RCPM_POWMGTCSR_SLP bit to be clear. */
+		result = spin_event_timeout(
+		  !(in_be32(pmcsr_reg) & RCPM_POWMGTCSR_SLP), 10000, 10);
+		if (!result) {
+			pr_err("timeout waiting for SLP bit to be cleared\n");
+			ret = -ETIMEDOUT;
+		}
+		break;
+	default:
+		pr_warn("Unknown platform PM state (%d)", state);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int rcpm_v2_plat_enter_state(int state)
+{
+	u32 *pmcsr_reg = &rcpm_v2_regs->powmgtcsr;
+	int ret = 0;
+	int result;
+
+	switch (state) {
+	case PLAT_PM_LPM20:
+		/* clear previous LPM20 status */
+		setbits32(pmcsr_reg, RCPM_POWMGTCSR_P_LPM20_ST);
+		/* enter LPM20 status */
+		setbits32(pmcsr_reg, RCPM_POWMGTCSR_LPM20_RQ);
+
+		/* At this point, the device is in LPM20 status. */
+
+		/* resume ... */
+		result = spin_event_timeout(
+		  !(in_be32(pmcsr_reg) & RCPM_POWMGTCSR_LPM20_ST), 10000, 10);
+		if (!result) {
+			pr_err("timeout waiting for LPM20 bit to be cleared\n");
+			ret = -ETIMEDOUT;
+		}
+		break;
+	default:
+		pr_warn("Unknown platform PM state (%d)\n", state);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int rcpm_v1_plat_enter_sleep(void)
+{
+	return rcpm_v1_plat_enter_state(PLAT_PM_SLEEP);
+}
+
+static int rcpm_v2_plat_enter_sleep(void)
+{
+	return rcpm_v2_plat_enter_state(PLAT_PM_LPM20);
+}
+
+static void rcpm_common_freeze_time_base(u32 *tben_reg, int freeze)
+{
+	static u32 mask;
+
+	if (freeze) {
+		mask = in_be32(tben_reg);
+		clrbits32(tben_reg, mask);
+	} else {
+		setbits32(tben_reg, mask);
+	}
+
+	/* read back to push the previous write */
+	in_be32(tben_reg);
+}
+
+static void rcpm_v1_freeze_time_base(bool freeze)
+{
+	rcpm_common_freeze_time_base(&rcpm_v1_regs->ctbenr, freeze);
+}
+
+static void rcpm_v2_freeze_time_base(bool freeze)
+{
+	rcpm_common_freeze_time_base(&rcpm_v2_regs->pctbenr, freeze);
+}
+
+static unsigned int rcpm_get_pm_modes(void)
+{
+	return fsl_supported_pm_modes;
+}
+
+static const struct fsl_pm_ops qoriq_rcpm_v1_ops = {
+	.irq_mask = rcpm_v1_irq_mask,
+	.irq_unmask = rcpm_v1_irq_unmask,
+	.cpu_enter_state = rcpm_v1_cpu_enter_state,
+	.cpu_exit_state = rcpm_v1_cpu_exit_state,
+	.cpu_up_prepare = rcpm_v1_cpu_up_prepare,
+	.cpu_die = rcpm_v1_cpu_die,
+	.plat_enter_sleep = rcpm_v1_plat_enter_sleep,
+	.set_ip_power = rcpm_v1_set_ip_power,
+	.freeze_time_base = rcpm_v1_freeze_time_base,
+	.get_pm_modes = rcpm_get_pm_modes,
+};
+
+static const struct fsl_pm_ops qoriq_rcpm_v2_ops = {
+	.irq_mask = rcpm_v2_irq_mask,
+	.irq_unmask = rcpm_v2_irq_unmask,
+	.cpu_enter_state = rcpm_v2_cpu_enter_state,
+	.cpu_exit_state = rcpm_v2_cpu_exit_state,
+	.cpu_up_prepare = rcpm_v2_cpu_up_prepare,
+	.cpu_die = rcpm_v2_cpu_die,
+	.plat_enter_sleep = rcpm_v2_plat_enter_sleep,
+	.set_ip_power = rcpm_v2_set_ip_power,
+	.freeze_time_base = rcpm_v2_freeze_time_base,
+	.get_pm_modes = rcpm_get_pm_modes,
+};
+
+static const struct of_device_id rcpm_matches[] = {
+	{
+		.compatible = "fsl,qoriq-rcpm-1.0",
+		.data = &qoriq_rcpm_v1_ops,
+	},
+	{
+		.compatible = "fsl,qoriq-rcpm-2.0",
+		.data = &qoriq_rcpm_v2_ops,
+	},
+	{
+		.compatible = "fsl,qoriq-rcpm-2.1",
+		.data = &qoriq_rcpm_v2_ops,
+	},
+	{},
+};
+
+int __init fsl_rcpm_init(void)
+{
+	struct device_node *np;
+	const struct of_device_id *match;
+	void __iomem *base;
+
+	np = of_find_matching_node_and_match(NULL, rcpm_matches, &match);
+	if (!np)
+		return 0;
+
+	base = of_iomap(np, 0);
+	of_node_put(np);
+	if (!base) {
+		pr_err("of_iomap() error.\n");
+		return -ENOMEM;
+	}
+
+	rcpm_v1_regs = base;
+	rcpm_v2_regs = base;
+
+	/* support sleep by default */
+	fsl_supported_pm_modes = FSL_PM_SLEEP;
+
+	qoriq_pm_ops = match->data;
+
+	return 0;
+}
diff --git a/include/linux/fsl/guts.h b/include/linux/fsl/guts.h
index 84d971ff3fba..649e9171a9b3 100644
--- a/include/linux/fsl/guts.h
+++ b/include/linux/fsl/guts.h
@@ -189,4 +189,109 @@ static inline void guts_set_pmuxcr_dma(struct ccsr_guts __iomem *guts,
 
 #endif
 
+struct ccsr_rcpm_v1 {
+	u8	res0000[4];
+	__be32	cdozsr;	    /* 0x0004 Core Doze Status Register */
+	u8	res0008[4];
+	__be32	cdozcr;	    /* 0x000c Core Doze Control Register */
+	u8	res0010[4];
+	__be32	cnapsr;	    /* 0x0014 Core Nap Status Register */
+	u8	res0018[4];
+	__be32	cnapcr;	    /* 0x001c Core Nap Control Register */
+	u8	res0020[4];
+	__be32	cdozpsr;    /* 0x0024 Core Doze Previous Status Register */
+	u8	res0028[4];
+	__be32	cnappsr;    /* 0x002c Core Nap Previous Status Register */
+	u8	res0030[4];
+	__be32	cwaitsr;    /* 0x0034 Core Wait Status Register */
+	u8	res0038[4];
+	__be32	cwdtdsr;    /* 0x003c Core Watchdog Detect Status Register */
+	__be32	powmgtcsr;  /* 0x0040 PM Control&Status Register */
+#define RCPM_POWMGTCSR_SLP	0x00020000
+	u8	res0044[12];
+	__be32	ippdexpcr;  /* 0x0050 IP Powerdown Exception Control Register */
+	u8	res0054[16];
+	__be32	cpmimr;	    /* 0x0064 Core PM IRQ Mask Register */
+	u8	res0068[4];
+	__be32	cpmcimr;    /* 0x006c Core PM Critical IRQ Mask Register */
+	u8	res0070[4];
+	__be32	cpmmcmr;    /* 0x0074 Core PM Machine Check Mask Register */
+	u8	res0078[4];
+	__be32	cpmnmimr;   /* 0x007c Core PM NMI Mask Register */
+	u8	res0080[4];
+	__be32	ctbenr;	    /* 0x0084 Core Time Base Enable Register */
+	u8	res0088[4];
+	__be32	ctbckselr;  /* 0x008c Core Time Base Clock Select Register */
+	u8	res0090[4];
+	__be32	ctbhltcr;   /* 0x0094 Core Time Base Halt Control Register */
+	u8	res0098[4];
+	__be32	cmcpmaskcr; /* 0x00a4 Core Machine Check Mask Register */
+};
+
+struct ccsr_rcpm_v2 {
+	u8	res_00[12];
+	__be32	tph10sr0;	/* Thread PH10 Status Register */
+	u8	res_10[12];
+	__be32	tph10setr0;	/* Thread PH10 Set Control Register */
+	u8	res_20[12];
+	__be32	tph10clrr0;	/* Thread PH10 Clear Control Register */
+	u8	res_30[12];
+	__be32	tph10psr0;	/* Thread PH10 Previous Status Register */
+	u8	res_40[12];
+	__be32	twaitsr0;	/* Thread Wait Status Register */
+	u8	res_50[96];
+	__be32	pcph15sr;	/* Physical Core PH15 Status Register */
+	__be32	pcph15setr;	/* Physical Core PH15 Set Control Register */
+	__be32	pcph15clrr;	/* Physical Core PH15 Clear Control Register */
+	__be32	pcph15psr;	/* Physical Core PH15 Prev Status Register */
+	u8	res_c0[16];
+	__be32	pcph20sr;	/* Physical Core PH20 Status Register */
+	__be32	pcph20setr;	/* Physical Core PH20 Set Control Register */
+	__be32	pcph20clrr;	/* Physical Core PH20 Clear Control Register */
+	__be32	pcph20psr;	/* Physical Core PH20 Prev Status Register */
+	__be32	pcpw20sr;	/* Physical Core PW20 Status Register */
+	u8	res_e0[12];
+	__be32	pcph30sr;	/* Physical Core PH30 Status Register */
+	__be32	pcph30setr;	/* Physical Core PH30 Set Control Register */
+	__be32	pcph30clrr;	/* Physical Core PH30 Clear Control Register */
+	__be32	pcph30psr;	/* Physical Core PH30 Prev Status Register */
+	u8	res_100[32];
+	__be32	ippwrgatecr;	/* IP Power Gating Control Register */
+	u8	res_124[12];
+	__be32	powmgtcsr;	/* Power Management Control & Status Reg */
+#define RCPM_POWMGTCSR_LPM20_RQ		0x00100000
+#define RCPM_POWMGTCSR_LPM20_ST		0x00000200
+#define RCPM_POWMGTCSR_P_LPM20_ST	0x00000100
+	u8	res_134[12];
+	__be32	ippdexpcr[4];	/* IP Powerdown Exception Control Reg */
+	u8	res_150[12];
+	__be32	tpmimr0;	/* Thread PM Interrupt Mask Reg */
+	u8	res_160[12];
+	__be32	tpmcimr0;	/* Thread PM Crit Interrupt Mask Reg */
+	u8	res_170[12];
+	__be32	tpmmcmr0;	/* Thread PM Machine Check Interrupt Mask Reg */
+	u8	res_180[12];
+	__be32	tpmnmimr0;	/* Thread PM NMI Mask Reg */
+	u8	res_190[12];
+	__be32	tmcpmaskcr0;	/* Thread Machine Check Mask Control Reg */
+	__be32	pctbenr;	/* Physical Core Time Base Enable Reg */
+	__be32	pctbclkselr;	/* Physical Core Time Base Clock Select */
+	__be32	tbclkdivr;	/* Time Base Clock Divider Register */
+	u8	res_1ac[4];
+	__be32	ttbhltcr[4];	/* Thread Time Base Halt Control Register */
+	__be32	clpcl10sr;	/* Cluster PCL10 Status Register */
+	__be32	clpcl10setr;	/* Cluster PCL30 Set Control Register */
+	__be32	clpcl10clrr;	/* Cluster PCL30 Clear Control Register */
+	__be32	clpcl10psr;	/* Cluster PCL30 Prev Status Register */
+	__be32	cddslpsetr;	/* Core Domain Deep Sleep Set Register */
+	__be32	cddslpclrr;	/* Core Domain Deep Sleep Clear Register */
+	__be32	cdpwroksetr;	/* Core Domain Power OK Set Register */
+	__be32	cdpwrokclrr;	/* Core Domain Power OK Clear Register */
+	__be32	cdpwrensr;	/* Core Domain Power Enable Status Register */
+	__be32	cddslsr;	/* Core Domain Deep Sleep Status Register */
+	u8	res_1e8[8];
+	__be32	dslpcntcr[8];	/* Deep Sleep Counter Cfg Register */
+	u8	res_300[3568];
+};
+
 #endif
-- 
cgit v1.2.3


From 54ce3a0d801142c96935122736a46c08d15d83b5 Mon Sep 17 00:00:00 2001
From: Joseph McNally <jmcna06@gmail.com>
Date: Sun, 28 Feb 2016 22:31:23 +0000
Subject: hwmon: (ntc_thermistor) Add support for ncpXXxh103

This patch adds support for the Murata NCP15XH103 thermistor series.

Signed-off-by: Joseph McNally <jmcna06@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 .../devicetree/bindings/hwmon/ntc_thermistor.txt   |  1 +
 Documentation/hwmon/ntc_thermistor                 |  4 +-
 drivers/hwmon/Kconfig                              |  2 +-
 drivers/hwmon/ntc_thermistor.c                     | 44 ++++++++++++++++++++++
 include/linux/platform_data/ntc_thermistor.h       |  1 +
 5 files changed, 49 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt b/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt
index a04a80f9cc70..c3b9c4cfe8df 100644
--- a/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt
+++ b/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt
@@ -10,6 +10,7 @@ Requires node properties:
 	"murata,ncp03wb473"
 	"murata,ncp15wl333"
 	"murata,ncp03wf104"
+	"murata,ncp15xh103"
 
 /* Usage of vendor name "ntc" is deprecated */
 <DEPRECATED>	"ntc,ncp15wb473"
diff --git a/Documentation/hwmon/ntc_thermistor b/Documentation/hwmon/ntc_thermistor
index 1d4cc847c6fe..8b9ff23edc32 100644
--- a/Documentation/hwmon/ntc_thermistor
+++ b/Documentation/hwmon/ntc_thermistor
@@ -3,9 +3,9 @@ Kernel driver ntc_thermistor
 
 Supported thermistors from Murata:
 * Murata NTC Thermistors NCP15WB473, NCP18WB473, NCP21WB473, NCP03WB473,
-  NCP15WL333, NCP03WF104
+  NCP15WL333, NCP03WF104, NCP15XH103
   Prefixes: 'ncp15wb473', 'ncp18wb473', 'ncp21wb473', 'ncp03wb473',
-  'ncp15wl333', 'ncp03wf104'
+  'ncp15wl333', 'ncp03wf104', 'ncp15xh103'
   Datasheet: Publicly available at Murata
 
 Supported thermistors from EPCOS:
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 852c8a85e1e8..7a437ce99cf2 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1141,7 +1141,7 @@ config SENSORS_NTC_THERMISTOR
 
 	  Currently, this driver supports
 	  NCP15WB473, NCP18WB473, NCP21WB473, NCP03WB473, NCP15WL333,
-	  and NCP03WF104 from Murata and B57330V2103 from EPCOS.
+	  NCP03WF104 and NCP15XH103 from Murata and B57330V2103 from EPCOS.
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called ntc-thermistor.
diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c
index feed30646d91..faa6e8dfbaaf 100644
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -54,6 +54,7 @@ static const struct platform_device_id ntc_thermistor_id[] = {
 	{ "ncp15wl333", TYPE_NCPXXWL333 },
 	{ "b57330v2103", TYPE_B57330V2103},
 	{ "ncp03wf104", TYPE_NCPXXWF104 },
+	{ "ncp15xh103", TYPE_NCPXXXH103 },
 	{ },
 };
 
@@ -173,6 +174,43 @@ static const struct ntc_compensation ncpXXwf104[] = {
 	{ .temp_c	= 125, .ohm	= 2522 },
 };
 
+static const struct ntc_compensation ncpXXxh103[] = {
+	{ .temp_c	= -40, .ohm	= 247565 },
+	{ .temp_c	= -35, .ohm	= 181742 },
+	{ .temp_c	= -30, .ohm	= 135128 },
+	{ .temp_c	= -25, .ohm	= 101678 },
+	{ .temp_c	= -20, .ohm	= 77373 },
+	{ .temp_c	= -15, .ohm	= 59504 },
+	{ .temp_c	= -10, .ohm	= 46222 },
+	{ .temp_c	= -5, .ohm	= 36244 },
+	{ .temp_c	= 0, .ohm	= 28674 },
+	{ .temp_c	= 5, .ohm	= 22878 },
+	{ .temp_c	= 10, .ohm	= 18399 },
+	{ .temp_c	= 15, .ohm	= 14910 },
+	{ .temp_c	= 20, .ohm	= 12169 },
+	{ .temp_c	= 25, .ohm	= 10000 },
+	{ .temp_c	= 30, .ohm	= 8271 },
+	{ .temp_c	= 35, .ohm	= 6883 },
+	{ .temp_c	= 40, .ohm	= 5762 },
+	{ .temp_c	= 45, .ohm	= 4851 },
+	{ .temp_c	= 50, .ohm	= 4105 },
+	{ .temp_c	= 55, .ohm	= 3492 },
+	{ .temp_c	= 60, .ohm	= 2985 },
+	{ .temp_c	= 65, .ohm	= 2563 },
+	{ .temp_c	= 70, .ohm	= 2211 },
+	{ .temp_c	= 75, .ohm	= 1915 },
+	{ .temp_c	= 80, .ohm	= 1666 },
+	{ .temp_c	= 85, .ohm	= 1454 },
+	{ .temp_c	= 90, .ohm	= 1275 },
+	{ .temp_c	= 95, .ohm	= 1121 },
+	{ .temp_c	= 100, .ohm	= 990 },
+	{ .temp_c	= 105, .ohm	= 876 },
+	{ .temp_c	= 110, .ohm	= 779 },
+	{ .temp_c	= 115, .ohm	= 694 },
+	{ .temp_c	= 120, .ohm	= 620 },
+	{ .temp_c	= 125, .ohm	= 556 },
+};
+
 /*
  * The following compensation table is from the specification of EPCOS NTC
  * Thermistors Datasheet
@@ -260,6 +298,8 @@ static const struct of_device_id ntc_match[] = {
 		.data = &ntc_thermistor_id[5]},
 	{ .compatible = "murata,ncp03wf104",
 		.data = &ntc_thermistor_id[6] },
+	{ .compatible = "murata,ncp15xh103",
+		.data = &ntc_thermistor_id[7] },
 
 	/* Usage of vendor name "ntc" is deprecated */
 	{ .compatible = "ntc,ncp15wb473",
@@ -609,6 +649,10 @@ static int ntc_thermistor_probe(struct platform_device *pdev)
 		data->comp = ncpXXwf104;
 		data->n_comp = ARRAY_SIZE(ncpXXwf104);
 		break;
+	case TYPE_NCPXXXH103:
+		data->comp = ncpXXxh103;
+		data->n_comp = ARRAY_SIZE(ncpXXxh103);
+		break;
 	default:
 		dev_err(&pdev->dev, "Unknown device type: %lu(%s)\n",
 				pdev_id->driver_data, pdev_id->name);
diff --git a/include/linux/platform_data/ntc_thermistor.h b/include/linux/platform_data/ntc_thermistor.h
index aed170588b74..698d0d59db76 100644
--- a/include/linux/platform_data/ntc_thermistor.h
+++ b/include/linux/platform_data/ntc_thermistor.h
@@ -28,6 +28,7 @@ enum ntc_thermistor_type {
 	TYPE_NCPXXWL333,
 	TYPE_B57330V2103,
 	TYPE_NCPXXWF104,
+	TYPE_NCPXXXH103,
 };
 
 struct ntc_thermistor_platform_data {
-- 
cgit v1.2.3


From 69bec725985324e79b1c47ea287815ac4ddb0521 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Fri, 19 Feb 2016 17:26:15 +0800
Subject: USB: core: let USB device know device node

Although most of USB devices are hot-plug's, there are still some devices
are hard wired on the board, eg, for HSIC and SSIC interface USB devices.
If these kinds of USB devices are multiple functions, and they can supply
other interfaces like i2c, gpios for other devices, we may need to
describe these at device tree.

In this commit, it uses "reg" in dts as physical port number to match
the phyiscal port number decided by USB core, if they are the same,
then the device node is for the device we are creating for USB core.

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Acked-by: Philipp Zabel <p.zabel@pengutronix.de>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/usb/usb-device.txt         | 28 +++++++++++++
 drivers/usb/core/Makefile                          |  2 +-
 drivers/usb/core/of.c                              | 47 ++++++++++++++++++++++
 drivers/usb/core/usb.c                             | 10 +++++
 include/linux/usb/of.h                             |  7 ++++
 5 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/usb/usb-device.txt
 create mode 100644 drivers/usb/core/of.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/usb/usb-device.txt b/Documentation/devicetree/bindings/usb/usb-device.txt
new file mode 100644
index 000000000000..1c35e7b665e1
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/usb-device.txt
@@ -0,0 +1,28 @@
+Generic USB Device Properties
+
+Usually, we only use device tree for hard wired USB device.
+The reference binding doc is from:
+http://www.firmware.org/1275/bindings/usb/usb-1_0.ps
+
+Required properties:
+- compatible: usbVID,PID. The textual representation of VID, PID shall
+  be in lower case hexadecimal with leading zeroes suppressed. The
+  other compatible strings from the above standard binding could also
+  be used, but a device adhering to this binding may leave out all except
+  for usbVID,PID.
+- reg: the port number which this device is connecting to, the range
+  is 1-31.
+
+Example:
+
+&usb1 {
+	status = "okay";
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	hub: genesys@1 {
+		compatible = "usb5e3,608";
+		reg = <1>;
+	};
+}
diff --git a/drivers/usb/core/Makefile b/drivers/usb/core/Makefile
index 2f6f93220046..9780877010b4 100644
--- a/drivers/usb/core/Makefile
+++ b/drivers/usb/core/Makefile
@@ -5,7 +5,7 @@
 usbcore-y := usb.o hub.o hcd.o urb.o message.o driver.o
 usbcore-y += config.o file.o buffer.o sysfs.o endpoint.o
 usbcore-y += devio.o notify.o generic.o quirks.o devices.o
-usbcore-y += port.o
+usbcore-y += port.o of.o
 
 usbcore-$(CONFIG_PCI)		+= hcd-pci.o
 usbcore-$(CONFIG_ACPI)		+= usb-acpi.o
diff --git a/drivers/usb/core/of.c b/drivers/usb/core/of.c
new file mode 100644
index 000000000000..2289700c31d6
--- /dev/null
+++ b/drivers/usb/core/of.c
@@ -0,0 +1,47 @@
+/*
+ * of.c		The helpers for hcd device tree support
+ *
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ * Author: Peter Chen <peter.chen@freescale.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2  of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/of.h>
+
+/**
+ * usb_of_get_child_node - Find the device node match port number
+ * @parent: the parent device node
+ * @portnum: the port number which device is connecting
+ *
+ * Find the node from device tree according to its port number.
+ *
+ * Return: On success, a pointer to the device node, %NULL on failure.
+ */
+struct device_node *usb_of_get_child_node(struct device_node *parent,
+					int portnum)
+{
+	struct device_node *node;
+	u32 port;
+
+	for_each_child_of_node(parent, node) {
+		if (!of_property_read_u32(node, "reg", &port)) {
+			if (port == portnum)
+				return node;
+		}
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(usb_of_get_child_node);
+
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 524c9822d2bb..ffa5cf13ffe1 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -36,6 +36,7 @@
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
+#include <linux/usb/of.h>
 
 #include <asm/io.h>
 #include <linux/scatterlist.h>
@@ -470,6 +471,7 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent,
 		dev->route = 0;
 
 		dev->dev.parent = bus->controller;
+		dev->dev.of_node = bus->controller->of_node;
 		dev_set_name(&dev->dev, "usb%d", bus->busnum);
 		root_hub = 1;
 	} else {
@@ -494,6 +496,14 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent,
 		dev->dev.parent = &parent->dev;
 		dev_set_name(&dev->dev, "%d-%s", bus->busnum, dev->devpath);
 
+		if (!parent->parent) {
+			/* device under root hub's port */
+			port1 = usb_hcd_find_raw_port_number(usb_hcd,
+				port1);
+		}
+		dev->dev.of_node = usb_of_get_child_node(parent->dev.of_node,
+				port1);
+
 		/* hub driver sets up TT records */
 	}
 
diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h
index 974bce93aa28..de3237fce6b2 100644
--- a/include/linux/usb/of.h
+++ b/include/linux/usb/of.h
@@ -16,6 +16,8 @@ enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *phy_np);
 bool of_usb_host_tpl_support(struct device_node *np);
 int of_usb_update_otg_caps(struct device_node *np,
 			struct usb_otg_caps *otg_caps);
+struct device_node *usb_of_get_child_node(struct device_node *parent,
+			int portnum);
 #else
 static inline enum usb_dr_mode
 of_usb_get_dr_mode_by_phy(struct device_node *phy_np)
@@ -31,6 +33,11 @@ static inline int of_usb_update_otg_caps(struct device_node *np,
 {
 	return 0;
 }
+static inline struct device_node *usb_of_get_child_node
+		(struct device_node *parent, int portnum)
+{
+	return NULL;
+}
 #endif
 
 #if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_USB_SUPPORT)
-- 
cgit v1.2.3


From aef25338226660cdd4df908c2eff1abdcfca65e5 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 12 Feb 2016 17:01:11 -0800
Subject: libnvdimm, nfit: centralize command status translation

The return value from an 'ndctl_fn' reports the command execution
status, i.e. was the command properly formatted and was it successfully
submitted to the bus provider.  The new 'cmd_rc' parameter allows the bus
provider to communicate command specific results, translated into
common error codes.

Convert the ARS commands to this scheme to:

1/ Consolidate status reporting

2/ Prepare for for expanding ars unit test cases

3/ Make the implementation more generic

Cc: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit.c              | 179 ++++++++++++++++++++++++++-------------
 drivers/acpi/nfit.h              |  11 ++-
 drivers/nvdimm/bus.c             |   2 +-
 drivers/nvdimm/dimm_devs.c       |   6 +-
 include/linux/libnvdimm.h        |   2 +-
 tools/testing/nvdimm/test/nfit.c |   5 +-
 6 files changed, 138 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 35947ac87644..4dd2b6808df5 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -72,9 +72,80 @@ static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
 	return to_acpi_device(acpi_desc->dev);
 }
 
+static int xlat_status(void *buf, unsigned int cmd)
+{
+	struct nd_cmd_ars_status *ars_status;
+	struct nd_cmd_ars_start *ars_start;
+	struct nd_cmd_ars_cap *ars_cap;
+	u16 flags;
+
+	switch (cmd) {
+	case ND_CMD_ARS_CAP:
+		ars_cap = buf;
+		if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE)
+			return -ENOTTY;
+
+		/* Command failed */
+		if (ars_cap->status & 0xffff)
+			return -EIO;
+
+		/* No supported scan types for this range */
+		flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE;
+		if ((ars_cap->status >> 16 & flags) == 0)
+			return -ENOTTY;
+		break;
+	case ND_CMD_ARS_START:
+		ars_start = buf;
+		/* ARS is in progress */
+		if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY)
+			return -EBUSY;
+
+		/* Command failed */
+		if (ars_start->status & 0xffff)
+			return -EIO;
+		break;
+	case ND_CMD_ARS_STATUS:
+		ars_status = buf;
+		/* Command failed */
+		if (ars_status->status & 0xffff)
+			return -EIO;
+		/* Check extended status (Upper two bytes) */
+		if (ars_status->status == NFIT_ARS_STATUS_DONE)
+			return 0;
+
+		/* ARS is in progress */
+		if (ars_status->status == NFIT_ARS_STATUS_BUSY)
+			return -EBUSY;
+
+		/* No ARS performed for the current boot */
+		if (ars_status->status == NFIT_ARS_STATUS_NONE)
+			return -EAGAIN;
+
+		/*
+		 * ARS interrupted, either we overflowed or some other
+		 * agent wants the scan to stop.  If we didn't overflow
+		 * then just continue with the returned results.
+		 */
+		if (ars_status->status == NFIT_ARS_STATUS_INTR) {
+			if (ars_status->flags & NFIT_ARS_F_OVERFLOW)
+				return -ENOSPC;
+			return 0;
+		}
+
+		/* Unknown status */
+		if (ars_status->status >> 16)
+			return -EIO;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
-		unsigned int buf_len)
+		unsigned int buf_len, int *cmd_rc)
 {
 	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
 	const struct nd_cmd_desc *desc = NULL;
@@ -185,6 +256,8 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
 			 * unfilled in the output buffer
 			 */
 			rc = buf_len - offset - in_buf.buffer.length;
+			if (cmd_rc)
+				*cmd_rc = xlat_status(buf, cmd);
 		} else {
 			dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
 					__func__, dimm_name, cmd_name, buf_len,
@@ -1105,7 +1178,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
 	writeq(cmd, mmio->addr.base + offset);
 	wmb_blk(nfit_blk);
 
-	if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH)
+	if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
 		readq(mmio->addr.base + offset);
 }
 
@@ -1141,7 +1214,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
 			memcpy_to_pmem(mmio->addr.aperture + offset,
 					iobuf + copied, c);
 		else {
-			if (nfit_blk->dimm_flags & ND_BLK_READ_FLUSH)
+			if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
 				mmio_flush_range((void __force *)
 					mmio->addr.aperture + offset, c);
 
@@ -1328,13 +1401,13 @@ static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
 
 	memset(&flags, 0, sizeof(flags));
 	rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags,
-			sizeof(flags));
+			sizeof(flags), NULL);
 
 	if (rc >= 0 && flags.status == 0)
 		nfit_blk->dimm_flags = flags.flags;
 	else if (rc == -ENOTTY) {
 		/* fall back to a conservative default */
-		nfit_blk->dimm_flags = ND_BLK_DCR_LATCH | ND_BLK_READ_FLUSH;
+		nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH;
 		rc = 0;
 	} else
 		rc = -ENXIO;
@@ -1473,19 +1546,27 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
 	/* devm will free nfit_blk */
 }
 
-static int ars_get_cap(struct nvdimm_bus_descriptor *nd_desc,
+static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
 		struct nd_cmd_ars_cap *cmd, u64 addr, u64 length)
 {
+	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+	int cmd_rc, rc;
+
 	cmd->address = addr;
 	cmd->length = length;
-
-	return nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
-			sizeof(*cmd));
+	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
+			sizeof(*cmd), &cmd_rc);
+	if (rc < 0)
+		return rc;
+	if (cmd_rc < 0)
+		return cmd_rc;
+	return 0;
 }
 
 static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc,
 		struct nd_cmd_ars_start *cmd, u64 addr, u64 length)
 {
+	int cmd_rc;
 	int rc;
 
 	cmd->address = addr;
@@ -1494,52 +1575,49 @@ static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc,
 
 	while (1) {
 		rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, cmd,
-				sizeof(*cmd));
-		if (rc)
+				sizeof(*cmd), &cmd_rc);
+
+		if (rc < 0)
 			return rc;
-		switch (cmd->status) {
-		case 0:
-			return 0;
-		case 1:
-			/* ARS unsupported, but we should never get here */
-			return 0;
-		case 6:
+
+		if (cmd_rc == -EBUSY) {
 			/* ARS is in progress */
 			msleep(1000);
-			break;
-		default:
-			return -ENXIO;
+			continue;
 		}
+
+		if (cmd_rc < 0)
+			return cmd_rc;
+
+		return 0;
 	}
 }
 
 static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc,
 		struct nd_cmd_ars_status *cmd, u32 size)
 {
-	int rc;
+	int rc, cmd_rc;
 
 	while (1) {
 		rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, cmd,
-			size);
-		if (rc || cmd->status & 0xffff)
-			return -ENXIO;
+			size, &cmd_rc);
+		if (rc < 0)
+			return rc;
 
-		/* Check extended status (Upper two bytes) */
-		switch (cmd->status >> 16) {
-		case 0:
-			return 0;
-		case 1:
-			/* ARS is in progress */
+		/* FIXME make async and have a timeout */
+		if (cmd_rc == -EBUSY) {
 			msleep(1000);
-			break;
-		case 2:
-			/* No ARS performed for the current boot */
+			continue;
+		}
+
+		if (cmd_rc == -EAGAIN || cmd_rc == 0)
 			return 0;
-		case 3:
-			/* TODO: error list overflow support */
-		default:
+
+		/* TODO: error list overflow support */
+		if (cmd_rc == -ENOSPC)
 			return -ENXIO;
-		}
+
+		return cmd_rc;
 	}
 }
 
@@ -1590,28 +1668,11 @@ static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc,
 	start = ndr_desc->res->start;
 	len = ndr_desc->res->end - ndr_desc->res->start + 1;
 
-	/*
-	 * If ARS is unimplemented, unsupported, or if the 'Persistent Memory
-	 * Scrub' flag in extended status is not set, skip this but continue
-	 * initialization
-	 */
-	rc = ars_get_cap(nd_desc, ars_cap, start, len);
+	rc = ars_get_cap(acpi_desc, ars_cap, start, len);
 	if (rc == -ENOTTY) {
-		dev_dbg(acpi_desc->dev,
-			"Address Range Scrub is not implemented, won't create an error list\n");
 		rc = 0;
 		goto out;
 	}
-	if (rc)
-		goto out;
-
-	if ((ars_cap->status & 0xffff) ||
-		!(ars_cap->status >> 16 & ND_ARS_PERSISTENT)) {
-		dev_warn(acpi_desc->dev,
-			"ARS unsupported (status: 0x%x), won't create an error list\n",
-			ars_cap->status);
-		goto out;
-	}
 
 	/*
 	 * Check if a full-range ARS has been run. If so, use those results
@@ -1651,15 +1712,15 @@ static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc,
 		u64 done, end;
 
 		rc = ars_do_start(nd_desc, ars_start, cur, remaining);
-		if (rc)
+		if (rc < 0)
 			goto out;
 
 		rc = ars_get_status(nd_desc, ars_status, ars_status_size);
-		if (rc)
+		if (rc < 0)
 			goto out;
 
 		rc = ars_status_process_records(nvdimm_bus, ars_status, cur);
-		if (rc)
+		if (rc < 0)
 			goto out;
 
 		end = min(cur + remaining,
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 6689b0aaf194..f45b7d9b1d9e 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -47,8 +47,15 @@ enum nfit_fic {
 };
 
 enum {
-	ND_BLK_READ_FLUSH = 1,
-	ND_BLK_DCR_LATCH = 2,
+	NFIT_BLK_READ_FLUSH = 1,
+	NFIT_BLK_DCR_LATCH = 2,
+	NFIT_ARS_STATUS_DONE = 0,
+	NFIT_ARS_STATUS_BUSY = 1 << 16,
+	NFIT_ARS_STATUS_NONE = 2 << 16,
+	NFIT_ARS_STATUS_INTR = 3 << 16,
+	NFIT_ARS_START_BUSY = 6,
+	NFIT_ARS_CAP_NONE = 1,
+	NFIT_ARS_F_OVERFLOW = 1,
 };
 
 struct nfit_spa {
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 5d28e9405f32..c3ba888e3e3a 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -587,7 +587,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 	if (rc)
 		goto out_unlock;
 
-	rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len);
+	rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL);
 	if (rc < 0)
 		goto out_unlock;
 	if (copy_to_user(p, buf, buf_len))
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 651b8d19d324..c56f88217924 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -75,7 +75,7 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd)
 	memset(cmd, 0, sizeof(*cmd));
 	nd_desc = nvdimm_bus->nd_desc;
 	return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
-			ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd));
+			ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd), NULL);
 }
 
 int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
@@ -120,7 +120,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
 		cmd->in_offset = offset;
 		rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
 				ND_CMD_GET_CONFIG_DATA, cmd,
-				cmd->in_length + sizeof(*cmd));
+				cmd->in_length + sizeof(*cmd), NULL);
 		if (rc || cmd->status) {
 			rc = -ENXIO;
 			break;
@@ -171,7 +171,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
 		status = ((void *) cmd) + cmd_size - sizeof(u32);
 
 		rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
-				ND_CMD_SET_CONFIG_DATA, cmd, cmd_size);
+				ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, NULL);
 		if (rc || *status) {
 			rc = rc ? rc : -ENXIO;
 			break;
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 141ffdd59960..f398953270d1 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -48,7 +48,7 @@ struct nvdimm;
 struct nvdimm_bus_descriptor;
 typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
-		unsigned int buf_len);
+		unsigned int buf_len, int *cmd_rc);
 
 struct nd_namespace_label;
 struct nvdimm_drvdata;
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 27b808e0489a..9ead77970546 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -261,7 +261,7 @@ static int nfit_test_cmd_ars_status(struct nd_cmd_ars_status *nd_cmd,
 
 static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
-		unsigned int buf_len)
+		unsigned int buf_len, int *cmd_rc)
 {
 	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 	struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
@@ -315,6 +315,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		}
 	}
 
+	/* TODO: error status tests */
+	if (cmd_rc)
+		*cmd_rc = 0;
 	return rc;
 }
 
-- 
cgit v1.2.3


From 719994660c249a086a7493205c7f1562e30c38cb Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 18 Feb 2016 10:29:49 -0800
Subject: libnvdimm: async notification support

In preparation for asynchronous address range scrub support add an
ability for the pmem driver to dynamically consume address range scrub
results.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/bus.c    | 26 ++++++++++++++++++++++++++
 drivers/nvdimm/nd.h     |  2 ++
 drivers/nvdimm/pmem.c   | 15 +++++++++++++++
 drivers/nvdimm/region.c | 12 ++++++++++++
 include/linux/nd.h      |  7 +++++++
 5 files changed, 62 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index c3ba888e3e3a..2508251439e7 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -133,6 +133,32 @@ static int nvdimm_bus_remove(struct device *dev)
 	return rc;
 }
 
+void nd_device_notify(struct device *dev, enum nvdimm_event event)
+{
+	device_lock(dev);
+	if (dev->driver) {
+		struct nd_device_driver *nd_drv;
+
+		nd_drv = to_nd_device_driver(dev->driver);
+		if (nd_drv->notify)
+			nd_drv->notify(dev, event);
+	}
+	device_unlock(dev);
+}
+EXPORT_SYMBOL(nd_device_notify);
+
+void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
+
+	if (!nvdimm_bus)
+		return;
+
+	/* caller is responsible for holding a reference on the device */
+	nd_device_notify(&nd_region->dev, event);
+}
+EXPORT_SYMBOL_GPL(nvdimm_region_notify);
+
 static struct bus_type nvdimm_bus_type = {
 	.name = "nd",
 	.uevent = nvdimm_bus_uevent,
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index ba1633b9da31..78b82f6dd191 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -18,6 +18,7 @@
 #include <linux/mutex.h>
 #include <linux/ndctl.h>
 #include <linux/types.h>
+#include <linux/nd.h>
 #include "label.h"
 
 enum {
@@ -168,6 +169,7 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size);
 void wait_nvdimm_bus_probe_idle(struct device *dev);
 void nd_device_register(struct device *dev);
 void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
+void nd_device_notify(struct device *dev, enum nvdimm_event event);
 int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
 		size_t len);
 ssize_t nd_sector_size_show(unsigned long current_lbasize,
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8d0b54670184..efc2a5e671c6 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -488,12 +488,27 @@ static int nd_pmem_remove(struct device *dev)
 	return 0;
 }
 
+static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
+{
+	struct pmem_device *pmem = dev_get_drvdata(dev);
+	struct nd_namespace_common *ndns = pmem->ndns;
+
+	if (event != NVDIMM_REVALIDATE_POISON)
+		return;
+
+	if (is_nd_btt(dev))
+		nvdimm_namespace_add_poison(ndns, &pmem->bb, 0);
+	else
+		nvdimm_namespace_add_poison(ndns, &pmem->bb, pmem->data_offset);
+}
+
 MODULE_ALIAS("pmem");
 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
 static struct nd_device_driver nd_pmem_driver = {
 	.probe = nd_pmem_probe,
 	.remove = nd_pmem_remove,
+	.notify = nd_pmem_notify,
 	.drv = {
 		.name = "nd_pmem",
 	},
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 7da63eac78ee..4b7715e29cff 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -93,9 +93,21 @@ static int nd_region_remove(struct device *dev)
 	return 0;
 }
 
+static int child_notify(struct device *dev, void *data)
+{
+	nd_device_notify(dev, *(enum nvdimm_event *) data);
+	return 0;
+}
+
+static void nd_region_notify(struct device *dev, enum nvdimm_event event)
+{
+	device_for_each_child(dev, &event, child_notify);
+}
+
 static struct nd_device_driver nd_region_driver = {
 	.probe = nd_region_probe,
 	.remove = nd_region_remove,
+	.notify = nd_region_notify,
 	.drv = {
 		.name = "nd_region",
 	},
diff --git a/include/linux/nd.h b/include/linux/nd.h
index 507e47c86737..5489ab756d1a 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -16,11 +16,16 @@
 #include <linux/ndctl.h>
 #include <linux/device.h>
 
+enum nvdimm_event {
+	NVDIMM_REVALIDATE_POISON,
+};
+
 struct nd_device_driver {
 	struct device_driver drv;
 	unsigned long type;
 	int (*probe)(struct device *dev);
 	int (*remove)(struct device *dev);
+	void (*notify)(struct device *dev, enum nvdimm_event event);
 };
 
 static inline struct nd_device_driver *to_nd_device_driver(
@@ -144,6 +149,8 @@ static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns,
 	MODULE_ALIAS("nd:t" __stringify(type) "*")
 #define ND_DEVICE_MODALIAS_FMT "nd:t%d"
 
+struct nd_region;
+void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event);
 int __must_check __nd_driver_register(struct nd_device_driver *nd_drv,
 		struct module *module, const char *mod_name);
 #define nd_driver_register(driver) \
-- 
cgit v1.2.3


From 7ae0fa439faff000744b234d04cb470bfd83593b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 19 Feb 2016 12:16:34 -0800
Subject: nfit, libnvdimm: async region scrub workqueue

Introduce a workqueue that will be used to run address range scrub
asynchronously with the rest of nvdimm device probing.

Userspace still wants notification when probing operations complete, so
introduce a new callback to flush this workqueue when userspace is
awaiting probe completion.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit.c       | 49 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/acpi/nfit.h       |  3 +++
 drivers/nvdimm/core.c     |  9 +++++++++
 include/linux/libnvdimm.h |  1 +
 4 files changed, 62 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 76c9444c9c60..4aa2bd54fc1d 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -34,6 +34,8 @@ static bool force_enable_dimms;
 module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
 
+static struct workqueue_struct *nfit_wq;
+
 struct nfit_table_prev {
 	struct list_head spas;
 	struct list_head memdevs;
@@ -1961,6 +1963,39 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
 }
 EXPORT_SYMBOL_GPL(acpi_nfit_init);
 
+struct acpi_nfit_flush_work {
+	struct work_struct work;
+	struct completion cmp;
+};
+
+static void flush_probe(struct work_struct *work)
+{
+	struct acpi_nfit_flush_work *flush;
+
+	flush = container_of(work, typeof(*flush), work);
+	complete(&flush->cmp);
+}
+
+static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
+{
+	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+	struct device *dev = acpi_desc->dev;
+	struct acpi_nfit_flush_work flush;
+
+	/* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
+	device_lock(dev);
+	device_unlock(dev);
+
+	/*
+	 * Scrub work could take 10s of seconds, userspace may give up so we
+	 * need to be interruptible while waiting.
+	 */
+	INIT_WORK_ONSTACK(&flush.work, flush_probe);
+	COMPLETION_INITIALIZER_ONSTACK(flush.cmp);
+	queue_work(nfit_wq, &flush.work);
+	return wait_for_completion_interruptible(&flush.cmp);
+}
+
 void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
 {
 	struct nvdimm_bus_descriptor *nd_desc;
@@ -1971,6 +2006,7 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
 	nd_desc = &acpi_desc->nd_desc;
 	nd_desc->provider_name = "ACPI.NFIT";
 	nd_desc->ndctl = acpi_nfit_ctl;
+	nd_desc->flush_probe = acpi_nfit_flush_probe;
 	nd_desc->attr_groups = acpi_nfit_attribute_groups;
 
 	INIT_LIST_HEAD(&acpi_desc->spa_maps);
@@ -2048,6 +2084,8 @@ static int acpi_nfit_remove(struct acpi_device *adev)
 {
 	struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
 
+	acpi_desc->cancel = 1;
+	flush_workqueue(nfit_wq);
 	nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
 	return 0;
 }
@@ -2079,6 +2117,12 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
 		acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc);
 		if (!acpi_desc->nvdimm_bus)
 			goto out_unlock;
+	} else {
+		/*
+		 * Finish previous registration before considering new
+		 * regions.
+		 */
+		flush_workqueue(nfit_wq);
 	}
 
 	/* Evaluate _FIT */
@@ -2146,12 +2190,17 @@ static __init int nfit_init(void)
 	acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
 	acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
 
+	nfit_wq = create_singlethread_workqueue("nfit");
+	if (!nfit_wq)
+		return -ENOMEM;
+
 	return acpi_bus_register_driver(&acpi_nfit_driver);
 }
 
 static __exit void nfit_exit(void)
 {
 	acpi_bus_unregister_driver(&acpi_nfit_driver);
+	destroy_workqueue(nfit_wq);
 }
 
 module_init(nfit_init);
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 524dec0a3adb..e8388fee4cc6 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -14,6 +14,7 @@
  */
 #ifndef __NFIT_H__
 #define __NFIT_H__
+#include <linux/workqueue.h>
 #include <linux/libnvdimm.h>
 #include <linux/types.h>
 #include <linux/uuid.h>
@@ -123,6 +124,8 @@ struct acpi_nfit_desc {
 	struct list_head idts;
 	struct nvdimm_bus *nvdimm_bus;
 	struct device *dev;
+	struct work_struct work;
+	unsigned int cancel:1;
 	unsigned long dimm_dsm_force_en;
 	unsigned long bus_dsm_force_en;
 	int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index f309e6bf6833..79646d0c3277 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -298,6 +298,15 @@ static int flush_regions_dimms(struct device *dev, void *data)
 static ssize_t wait_probe_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
+	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+	int rc;
+
+	if (nd_desc->flush_probe) {
+		rc = nd_desc->flush_probe(nd_desc);
+		if (rc)
+			return rc;
+	}
 	nd_synchronize();
 	device_for_each_child(dev, NULL, flush_regions_dimms);
 	return sprintf(buf, "1\n");
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index f398953270d1..2299f8742f7f 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -71,6 +71,7 @@ struct nvdimm_bus_descriptor {
 	unsigned long dsm_mask;
 	char *provider_name;
 	ndctl_fn ndctl;
+	int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
 };
 
 struct nd_cmd_desc {
-- 
cgit v1.2.3


From 87bf572e19a092cc9cc77d5a00d543a2b628c142 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 22 Feb 2016 21:50:31 -0800
Subject: nfit: disable userspace initiated ars during scrub

While the nfit driver is issuing address range scrub commands and
reaping the results do not permit an ars_start command issued from
userspace.  The scrub thread assumes that all ars completions are for
scrubs initiated by platform firmware at boot, or by the nfit driver.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit.c       | 23 +++++++++++++++++++++++
 drivers/nvdimm/bus.c      | 18 +++++++++++++-----
 include/linux/libnvdimm.h |  2 ++
 3 files changed, 38 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 3646501b01d7..0def4ebf5d43 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -2186,6 +2186,28 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
 	return wait_for_completion_interruptible(&flush.cmp);
 }
 
+static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
+		struct nvdimm *nvdimm, unsigned int cmd)
+{
+	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+
+	if (nvdimm)
+		return 0;
+	if (cmd != ND_CMD_ARS_START)
+		return 0;
+
+	/*
+	 * The kernel and userspace may race to initiate a scrub, but
+	 * the scrub thread is prepared to lose that initial race.  It
+	 * just needs guarantees that any ars it initiates are not
+	 * interrupted by any intervening start reqeusts from userspace.
+	 */
+	if (work_busy(&acpi_desc->work))
+		return -EBUSY;
+
+	return 0;
+}
+
 void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
 {
 	struct nvdimm_bus_descriptor *nd_desc;
@@ -2197,6 +2219,7 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
 	nd_desc->provider_name = "ACPI.NFIT";
 	nd_desc->ndctl = acpi_nfit_ctl;
 	nd_desc->flush_probe = acpi_nfit_flush_probe;
+	nd_desc->clear_to_send = acpi_nfit_clear_to_send;
 	nd_desc->attr_groups = acpi_nfit_attribute_groups;
 
 	INIT_LIST_HEAD(&acpi_desc->spa_maps);
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 2508251439e7..228c0e9f430e 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -490,16 +490,24 @@ void wait_nvdimm_bus_probe_idle(struct device *dev)
 }
 
 /* set_config requires an idle interleave set */
-static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd)
+static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus,
+		struct nvdimm *nvdimm, unsigned int cmd)
 {
-	struct nvdimm_bus *nvdimm_bus;
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+
+	/* ask the bus provider if it would like to block this request */
+	if (nd_desc->clear_to_send) {
+		int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd);
+
+		if (rc)
+			return rc;
+	}
 
 	if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA)
 		return 0;
 
-	nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev);
+	/* prevent label manipulation while the kernel owns label updates */
 	wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev);
-
 	if (atomic_read(&nvdimm->busy))
 		return -EBUSY;
 	return 0;
@@ -609,7 +617,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 	}
 
 	nvdimm_bus_lock(&nvdimm_bus->dev);
-	rc = nd_cmd_clear_to_send(nvdimm, cmd);
+	rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd);
 	if (rc)
 		goto out_unlock;
 
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 2299f8742f7f..833867b9ddc2 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -72,6 +72,8 @@ struct nvdimm_bus_descriptor {
 	char *provider_name;
 	ndctl_fn ndctl;
 	int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
+	int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc,
+			struct nvdimm *nvdimm, unsigned int cmd);
 };
 
 struct nd_cmd_desc {
-- 
cgit v1.2.3


From 1ae1602de028acaa42a0f6ff18d19756f8e825c6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 26 Feb 2016 11:02:14 +0100
Subject: configfs: switch ->default groups to a linked list

Replace the current NULL-terminated array of default groups with a linked
list.  This gets rid of lots of nasty code to size and/or dynamically
allocate the array.

While we're at it also provide a conveniant helper to remove the default
groups.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Felipe Balbi <balbi@kernel.org>		[drivers/usb/gadget]
Acked-by: Joel Becker <jlbec@evilplan.org>
Acked-by: Nicholas Bellinger <nab@linux-iscsi.org>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
---
 Documentation/filesystems/configfs/configfs.txt |  11 +-
 drivers/infiniband/core/cma_configfs.c          |  31 ++--
 drivers/target/iscsi/iscsi_target_configfs.c    |  75 +++------
 drivers/target/target_core_configfs.c           | 203 +++++-------------------
 drivers/target/target_core_fabric_configfs.c    | 194 ++++++----------------
 drivers/target/target_core_internal.h           |   1 -
 drivers/target/target_core_stat.c               |  41 ++---
 drivers/usb/gadget/configfs.c                   |  36 ++---
 drivers/usb/gadget/function/f_mass_storage.c    |   6 +-
 drivers/usb/gadget/function/f_rndis.c           |   5 +-
 drivers/usb/gadget/function/uvc_configfs.c      | 198 +++++++++--------------
 fs/configfs/dir.c                               |  44 +++--
 fs/configfs/item.c                              |   1 +
 fs/dlm/config.c                                 |  38 +----
 fs/ocfs2/cluster/nodemanager.c                  |  22 +--
 include/linux/configfs.h                        |  11 +-
 include/target/target_core_base.h               |   3 -
 17 files changed, 286 insertions(+), 634 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt
index e5fe521eea1d..8ec9136aae56 100644
--- a/Documentation/filesystems/configfs/configfs.txt
+++ b/Documentation/filesystems/configfs/configfs.txt
@@ -250,7 +250,8 @@ child item.
 		struct config_item		cg_item;
 		struct list_head		cg_children;
 		struct configfs_subsystem 	*cg_subsys;
-		struct config_group		**default_groups;
+		struct list_head		default_groups;
+		struct list_head		group_entry;
 	};
 
 	void config_group_init(struct config_group *group);
@@ -420,15 +421,15 @@ These automatic subgroups, or default groups, do not preclude other
 children of the parent group.  If ct_group_ops->make_group() exists,
 other child groups can be created on the parent group directly.
 
-A configfs subsystem specifies default groups by filling in the
-NULL-terminated array default_groups on the config_group structure.
-Each group in that array is populated in the configfs tree at the same
+A configfs subsystem specifies default groups by adding them using the
+configfs_add_default_group() function to the parent config_group
+structure.  Each added group is populated in the configfs tree at the same
 time as the parent group.  Similarly, they are removed at the same time
 as the parent.  No extra notification is provided.  When a ->drop_item()
 method call notifies the subsystem the parent group is going away, it
 also means every default group child associated with that parent group.
 
-As a consequence of this, default_groups cannot be removed directly via
+As a consequence of this, default groups cannot be removed directly via
 rmdir(2).  They also are not considered when rmdir(2) on the parent
 group is checking for children.
 
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 18b112aa577e..41573df1d9fc 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -49,8 +49,6 @@ struct cma_dev_group {
 	char				name[IB_DEVICE_NAME_MAX];
 	struct config_group		device_group;
 	struct config_group		ports_group;
-	struct config_group		*default_dev_group[2];
-	struct config_group		**default_ports_group;
 	struct cma_dev_port_group	*ports;
 };
 
@@ -158,7 +156,6 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
 	unsigned int i;
 	unsigned int ports_num;
 	struct cma_dev_port_group *ports;
-	struct config_group **ports_group;
 	int err;
 
 	ibdev = cma_get_ib_dev(cma_dev);
@@ -169,9 +166,8 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
 	ports_num = ibdev->phys_port_cnt;
 	ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
 			GFP_KERNEL);
-	ports_group = kcalloc(ports_num + 1, sizeof(*ports_group), GFP_KERNEL);
 
-	if (!ports || !ports_group) {
+	if (!ports) {
 		err = -ENOMEM;
 		goto free;
 	}
@@ -185,18 +181,16 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
 		config_group_init_type_name(&ports[i].group,
 					    port_str,
 					    &cma_port_group_type);
-		ports_group[i] = &ports[i].group;
+		configfs_add_default_group(&ports[i].group,
+				&cma_dev_group->ports_group);
+
 	}
-	ports_group[i] = NULL;
-	cma_dev_group->default_ports_group = ports_group;
 	cma_dev_group->ports = ports;
 
 	return 0;
 free:
 	kfree(ports);
-	kfree(ports_group);
 	cma_dev_group->ports = NULL;
-	cma_dev_group->default_ports_group = NULL;
 	return err;
 }
 
@@ -220,9 +214,7 @@ static void release_cma_ports_group(struct config_item  *item)
 							   ports_group);
 
 	kfree(cma_dev_group->ports);
-	kfree(cma_dev_group->default_ports_group);
 	cma_dev_group->ports = NULL;
-	cma_dev_group->default_ports_group = NULL;
 };
 
 static struct configfs_item_operations cma_ports_item_ops = {
@@ -263,22 +255,17 @@ static struct config_group *make_cma_dev(struct config_group *group,
 
 	strncpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
 
-	err = make_cma_ports(cma_dev_group, cma_dev);
-	if (err)
-		goto fail;
-
-	cma_dev_group->ports_group.default_groups =
-		cma_dev_group->default_ports_group;
 	config_group_init_type_name(&cma_dev_group->ports_group, "ports",
 				    &cma_ports_group_type);
 
-	cma_dev_group->device_group.default_groups
-		= cma_dev_group->default_dev_group;
-	cma_dev_group->default_dev_group[0] = &cma_dev_group->ports_group;
-	cma_dev_group->default_dev_group[1] = NULL;
+	err = make_cma_ports(cma_dev_group, cma_dev);
+	if (err)
+		goto fail;
 
 	config_group_init_type_name(&cma_dev_group->device_group, name,
 				    &cma_device_group_type);
+	configfs_add_default_group(&cma_dev_group->ports_group,
+			&cma_dev_group->device_group);
 
 	cma_deref_dev(cma_dev);
 	return &cma_dev_group->device_group;
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 2f821de63049..a24443ba59ea 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -771,21 +771,11 @@ static int lio_target_init_nodeacl(struct se_node_acl *se_nacl,
 {
 	struct iscsi_node_acl *acl =
 		container_of(se_nacl, struct iscsi_node_acl, se_node_acl);
-	struct config_group *stats_cg = &se_nacl->acl_fabric_stat_group;
-
-	stats_cg->default_groups = kmalloc(sizeof(struct config_group *) * 2,
-				GFP_KERNEL);
-	if (!stats_cg->default_groups) {
-		pr_err("Unable to allocate memory for"
-				" stats_cg->default_groups\n");
-		return -ENOMEM;
-	}
 
-	stats_cg->default_groups[0] = &acl->node_stat_grps.iscsi_sess_stats_group;
-	stats_cg->default_groups[1] = NULL;
 	config_group_init_type_name(&acl->node_stat_grps.iscsi_sess_stats_group,
 			"iscsi_sess_stats", &iscsi_stat_sess_cit);
-
+	configfs_add_default_group(&acl->node_stat_grps.iscsi_sess_stats_group,
+			&se_nacl->acl_fabric_stat_group);
 	return 0;
 }
 
@@ -793,17 +783,8 @@ static void lio_target_cleanup_nodeacl( struct se_node_acl *se_nacl)
 {
 	struct iscsi_node_acl *acl = container_of(se_nacl,
 			struct iscsi_node_acl, se_node_acl);
-	struct config_item *df_item;
-	struct config_group *stats_cg;
-	int i;
-
-	stats_cg = &acl->se_node_acl.acl_fabric_stat_group;
-	for (i = 0; stats_cg->default_groups[i]; i++) {
-		df_item = &stats_cg->default_groups[i]->cg_item;
-		stats_cg->default_groups[i] = NULL;
-		config_item_put(df_item);
-	}
-	kfree(stats_cg->default_groups);
+
+	configfs_remove_default_groups(&acl->se_node_acl.acl_fabric_stat_group);
 }
 
 /* End items for lio_target_acl_cit */
@@ -1260,42 +1241,37 @@ static struct se_wwn *lio_target_call_coreaddtiqn(
 	struct config_group *group,
 	const char *name)
 {
-	struct config_group *stats_cg;
 	struct iscsi_tiqn *tiqn;
 
 	tiqn = iscsit_add_tiqn((unsigned char *)name);
 	if (IS_ERR(tiqn))
 		return ERR_CAST(tiqn);
-	/*
-	 * Setup struct iscsi_wwn_stat_grps for se_wwn->fabric_stat_group.
-	 */
-	stats_cg = &tiqn->tiqn_wwn.fabric_stat_group;
-
-	stats_cg->default_groups = kmalloc(sizeof(struct config_group *) * 6,
-				GFP_KERNEL);
-	if (!stats_cg->default_groups) {
-		pr_err("Unable to allocate memory for"
-				" stats_cg->default_groups\n");
-		iscsit_del_tiqn(tiqn);
-		return ERR_PTR(-ENOMEM);
-	}
 
-	stats_cg->default_groups[0] = &tiqn->tiqn_stat_grps.iscsi_instance_group;
-	stats_cg->default_groups[1] = &tiqn->tiqn_stat_grps.iscsi_sess_err_group;
-	stats_cg->default_groups[2] = &tiqn->tiqn_stat_grps.iscsi_tgt_attr_group;
-	stats_cg->default_groups[3] = &tiqn->tiqn_stat_grps.iscsi_login_stats_group;
-	stats_cg->default_groups[4] = &tiqn->tiqn_stat_grps.iscsi_logout_stats_group;
-	stats_cg->default_groups[5] = NULL;
 	config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_instance_group,
 			"iscsi_instance", &iscsi_stat_instance_cit);
+	configfs_add_default_group(&tiqn->tiqn_stat_grps.iscsi_instance_group,
+			&tiqn->tiqn_wwn.fabric_stat_group);
+
 	config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_sess_err_group,
 			"iscsi_sess_err", &iscsi_stat_sess_err_cit);
+	configfs_add_default_group(&tiqn->tiqn_stat_grps.iscsi_sess_err_group,
+			&tiqn->tiqn_wwn.fabric_stat_group);
+
 	config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_tgt_attr_group,
 			"iscsi_tgt_attr", &iscsi_stat_tgt_attr_cit);
+	configfs_add_default_group(&tiqn->tiqn_stat_grps.iscsi_tgt_attr_group,
+			&tiqn->tiqn_wwn.fabric_stat_group);
+
 	config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_login_stats_group,
 			"iscsi_login_stats", &iscsi_stat_login_cit);
+	configfs_add_default_group(&tiqn->tiqn_stat_grps.iscsi_login_stats_group,
+			&tiqn->tiqn_wwn.fabric_stat_group);
+
 	config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_logout_stats_group,
 			"iscsi_logout_stats", &iscsi_stat_logout_cit);
+	configfs_add_default_group(&tiqn->tiqn_stat_grps.iscsi_logout_stats_group,
+			&tiqn->tiqn_wwn.fabric_stat_group);
+
 
 	pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn);
 	pr_debug("LIO_Target_ConfigFS: REGISTER -> Allocated Node:"
@@ -1307,17 +1283,8 @@ static void lio_target_call_coredeltiqn(
 	struct se_wwn *wwn)
 {
 	struct iscsi_tiqn *tiqn = container_of(wwn, struct iscsi_tiqn, tiqn_wwn);
-	struct config_item *df_item;
-	struct config_group *stats_cg;
-	int i;
-
-	stats_cg = &tiqn->tiqn_wwn.fabric_stat_group;
-	for (i = 0; stats_cg->default_groups[i]; i++) {
-		df_item = &stats_cg->default_groups[i]->cg_item;
-		stats_cg->default_groups[i] = NULL;
-		config_item_put(df_item);
-	}
-	kfree(stats_cg->default_groups);
+
+	configfs_remove_default_groups(&tiqn->tiqn_wwn.fabric_stat_group);
 
 	pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s\n",
 			tiqn->tiqn);
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 713c63d9681b..d498533f09ee 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -194,13 +194,11 @@ static struct config_group *target_core_register_fabric(
 	pr_debug("Target_Core_ConfigFS: REGISTER tfc_wwn_cit -> %p\n",
 			&tf->tf_wwn_cit);
 
-	tf->tf_group.default_groups = tf->tf_default_groups;
-	tf->tf_group.default_groups[0] = &tf->tf_disc_group;
-	tf->tf_group.default_groups[1] = NULL;
-
 	config_group_init_type_name(&tf->tf_group, name, &tf->tf_wwn_cit);
+
 	config_group_init_type_name(&tf->tf_disc_group, "discovery_auth",
 			&tf->tf_discovery_cit);
+	configfs_add_default_group(&tf->tf_disc_group, &tf->tf_group);
 
 	pr_debug("Target_Core_ConfigFS: REGISTER -> Allocated Fabric:"
 			" %s\n", tf->tf_group.cg_item.ci_name);
@@ -216,9 +214,6 @@ static void target_core_deregister_fabric(
 {
 	struct target_fabric_configfs *tf = container_of(
 		to_config_group(item), struct target_fabric_configfs, tf_group);
-	struct config_group *tf_group;
-	struct config_item *df_item;
-	int i;
 
 	pr_debug("Target_Core_ConfigFS: DEREGISTER -> Looking up %s in"
 		" tf list\n", config_item_name(item));
@@ -230,12 +225,7 @@ static void target_core_deregister_fabric(
 	pr_debug("Target_Core_ConfigFS: DEREGISTER -> Releasing ci"
 			" %s\n", config_item_name(item));
 
-	tf_group = &tf->tf_group;
-	for (i = 0; tf_group->default_groups[i]; i++) {
-		df_item = &tf_group->default_groups[i]->cg_item;
-		tf_group->default_groups[i] = NULL;
-		config_item_put(df_item);
-	}
+	configfs_remove_default_groups(&tf->tf_group);
 	config_item_put(item);
 }
 
@@ -2151,7 +2141,6 @@ static void target_core_dev_release(struct config_item *item)
 	struct se_device *dev =
 		container_of(dev_cg, struct se_device, dev_group);
 
-	kfree(dev_cg->default_groups);
 	target_free_device(dev);
 }
 
@@ -2819,8 +2808,6 @@ static struct config_group *target_core_make_subdev(
 	struct se_hba *hba = item_to_hba(hba_ci);
 	struct target_backend *tb = hba->backend;
 	struct se_device *dev;
-	struct config_group *dev_cg = NULL, *tg_pt_gp_cg = NULL;
-	struct config_group *dev_stat_grp = NULL;
 	int errno = -ENOMEM, ret;
 
 	ret = mutex_lock_interruptible(&hba->hba_access_mutex);
@@ -2831,73 +2818,52 @@ static struct config_group *target_core_make_subdev(
 	if (!dev)
 		goto out_unlock;
 
-	dev_cg = &dev->dev_group;
-
-	dev_cg->default_groups = kmalloc(sizeof(struct config_group *) * 6,
-			GFP_KERNEL);
-	if (!dev_cg->default_groups)
-		goto out_free_device;
+	config_group_init_type_name(&dev->dev_group, name, &tb->tb_dev_cit);
 
-	config_group_init_type_name(dev_cg, name, &tb->tb_dev_cit);
 	config_group_init_type_name(&dev->dev_attrib.da_group, "attrib",
 			&tb->tb_dev_attrib_cit);
+	configfs_add_default_group(&dev->dev_attrib.da_group, &dev->dev_group);
+
 	config_group_init_type_name(&dev->dev_pr_group, "pr",
 			&tb->tb_dev_pr_cit);
+	configfs_add_default_group(&dev->dev_pr_group, &dev->dev_group);
+
 	config_group_init_type_name(&dev->t10_wwn.t10_wwn_group, "wwn",
 			&tb->tb_dev_wwn_cit);
+	configfs_add_default_group(&dev->t10_wwn.t10_wwn_group,
+			&dev->dev_group);
+
 	config_group_init_type_name(&dev->t10_alua.alua_tg_pt_gps_group,
 			"alua", &tb->tb_dev_alua_tg_pt_gps_cit);
+	configfs_add_default_group(&dev->t10_alua.alua_tg_pt_gps_group,
+			&dev->dev_group);
+
 	config_group_init_type_name(&dev->dev_stat_grps.stat_group,
 			"statistics", &tb->tb_dev_stat_cit);
+	configfs_add_default_group(&dev->dev_stat_grps.stat_group,
+			&dev->dev_group);
 
-	dev_cg->default_groups[0] = &dev->dev_attrib.da_group;
-	dev_cg->default_groups[1] = &dev->dev_pr_group;
-	dev_cg->default_groups[2] = &dev->t10_wwn.t10_wwn_group;
-	dev_cg->default_groups[3] = &dev->t10_alua.alua_tg_pt_gps_group;
-	dev_cg->default_groups[4] = &dev->dev_stat_grps.stat_group;
-	dev_cg->default_groups[5] = NULL;
 	/*
 	 * Add core/$HBA/$DEV/alua/default_tg_pt_gp
 	 */
 	tg_pt_gp = core_alua_allocate_tg_pt_gp(dev, "default_tg_pt_gp", 1);
 	if (!tg_pt_gp)
-		goto out_free_dev_cg_default_groups;
+		goto out_free_device;
 	dev->t10_alua.default_tg_pt_gp = tg_pt_gp;
 
-	tg_pt_gp_cg = &dev->t10_alua.alua_tg_pt_gps_group;
-	tg_pt_gp_cg->default_groups = kmalloc(sizeof(struct config_group *) * 2,
-				GFP_KERNEL);
-	if (!tg_pt_gp_cg->default_groups) {
-		pr_err("Unable to allocate tg_pt_gp_cg->"
-				"default_groups\n");
-		goto out_free_tg_pt_gp;
-	}
-
 	config_group_init_type_name(&tg_pt_gp->tg_pt_gp_group,
 			"default_tg_pt_gp", &target_core_alua_tg_pt_gp_cit);
-	tg_pt_gp_cg->default_groups[0] = &tg_pt_gp->tg_pt_gp_group;
-	tg_pt_gp_cg->default_groups[1] = NULL;
+	configfs_add_default_group(&tg_pt_gp->tg_pt_gp_group,
+			&dev->t10_alua.alua_tg_pt_gps_group);
+
 	/*
 	 * Add core/$HBA/$DEV/statistics/ default groups
 	 */
-	dev_stat_grp = &dev->dev_stat_grps.stat_group;
-	dev_stat_grp->default_groups = kmalloc(sizeof(struct config_group *) * 4,
-				GFP_KERNEL);
-	if (!dev_stat_grp->default_groups) {
-		pr_err("Unable to allocate dev_stat_grp->default_groups\n");
-		goto out_free_tg_pt_gp_cg_default_groups;
-	}
 	target_stat_setup_dev_default_groups(dev);
 
 	mutex_unlock(&hba->hba_access_mutex);
-	return dev_cg;
+	return &dev->dev_group;
 
-out_free_tg_pt_gp_cg_default_groups:
-	kfree(tg_pt_gp_cg->default_groups);
-out_free_tg_pt_gp:
-	core_alua_free_tg_pt_gp(tg_pt_gp);
-out_free_dev_cg_default_groups:
-	kfree(dev_cg->default_groups);
 out_free_device:
 	target_free_device(dev);
 out_unlock:
@@ -2913,40 +2879,22 @@ static void target_core_drop_subdev(
 	struct se_device *dev =
 		container_of(dev_cg, struct se_device, dev_group);
 	struct se_hba *hba;
-	struct config_item *df_item;
-	struct config_group *tg_pt_gp_cg, *dev_stat_grp;
-	int i;
 
 	hba = item_to_hba(&dev->se_hba->hba_group.cg_item);
 
 	mutex_lock(&hba->hba_access_mutex);
 
-	dev_stat_grp = &dev->dev_stat_grps.stat_group;
-	for (i = 0; dev_stat_grp->default_groups[i]; i++) {
-		df_item = &dev_stat_grp->default_groups[i]->cg_item;
-		dev_stat_grp->default_groups[i] = NULL;
-		config_item_put(df_item);
-	}
-	kfree(dev_stat_grp->default_groups);
+	configfs_remove_default_groups(&dev->dev_stat_grps.stat_group);
+	configfs_remove_default_groups(&dev->t10_alua.alua_tg_pt_gps_group);
 
-	tg_pt_gp_cg = &dev->t10_alua.alua_tg_pt_gps_group;
-	for (i = 0; tg_pt_gp_cg->default_groups[i]; i++) {
-		df_item = &tg_pt_gp_cg->default_groups[i]->cg_item;
-		tg_pt_gp_cg->default_groups[i] = NULL;
-		config_item_put(df_item);
-	}
-	kfree(tg_pt_gp_cg->default_groups);
 	/*
 	 * core_alua_free_tg_pt_gp() is called from ->default_tg_pt_gp
 	 * directly from target_core_alua_tg_pt_gp_release().
 	 */
 	dev->t10_alua.default_tg_pt_gp = NULL;
 
-	for (i = 0; dev_cg->default_groups[i]; i++) {
-		df_item = &dev_cg->default_groups[i]->cg_item;
-		dev_cg->default_groups[i] = NULL;
-		config_item_put(df_item);
-	}
+	configfs_remove_default_groups(dev_cg);
+
 	/*
 	 * se_dev is released from target_core_dev_item_ops->release()
 	 */
@@ -3141,8 +3089,6 @@ void target_setup_backend_cits(struct target_backend *tb)
 
 static int __init target_core_init_configfs(void)
 {
-	struct config_group *target_cg, *hba_cg = NULL, *alua_cg = NULL;
-	struct config_group *lu_gp_cg = NULL;
 	struct configfs_subsystem *subsys = &target_core_fabrics;
 	struct t10_alua_lu_gp *lu_gp;
 	int ret;
@@ -3161,51 +3107,24 @@ static int __init target_core_init_configfs(void)
 	 * Create $CONFIGFS/target/core default group for HBA <-> Storage Object
 	 * and ALUA Logical Unit Group and Target Port Group infrastructure.
 	 */
-	target_cg = &subsys->su_group;
-	target_cg->default_groups = kmalloc(sizeof(struct config_group *) * 2,
-				GFP_KERNEL);
-	if (!target_cg->default_groups) {
-		pr_err("Unable to allocate target_cg->default_groups\n");
-		ret = -ENOMEM;
-		goto out_global;
-	}
+	config_group_init_type_name(&target_core_hbagroup, "core",
+			&target_core_cit);
+	configfs_add_default_group(&target_core_hbagroup, &subsys->su_group);
 
-	config_group_init_type_name(&target_core_hbagroup,
-			"core", &target_core_cit);
-	target_cg->default_groups[0] = &target_core_hbagroup;
-	target_cg->default_groups[1] = NULL;
 	/*
 	 * Create ALUA infrastructure under /sys/kernel/config/target/core/alua/
 	 */
-	hba_cg = &target_core_hbagroup;
-	hba_cg->default_groups = kmalloc(sizeof(struct config_group *) * 2,
-				GFP_KERNEL);
-	if (!hba_cg->default_groups) {
-		pr_err("Unable to allocate hba_cg->default_groups\n");
-		ret = -ENOMEM;
-		goto out_global;
-	}
-	config_group_init_type_name(&alua_group,
-			"alua", &target_core_alua_cit);
-	hba_cg->default_groups[0] = &alua_group;
-	hba_cg->default_groups[1] = NULL;
+	config_group_init_type_name(&alua_group, "alua", &target_core_alua_cit);
+	configfs_add_default_group(&alua_group, &target_core_hbagroup);
+
 	/*
 	 * Add ALUA Logical Unit Group and Target Port Group ConfigFS
 	 * groups under /sys/kernel/config/target/core/alua/
 	 */
-	alua_cg = &alua_group;
-	alua_cg->default_groups = kmalloc(sizeof(struct config_group *) * 2,
-			GFP_KERNEL);
-	if (!alua_cg->default_groups) {
-		pr_err("Unable to allocate alua_cg->default_groups\n");
-		ret = -ENOMEM;
-		goto out_global;
-	}
+	config_group_init_type_name(&alua_lu_gps_group, "lu_gps",
+			&target_core_alua_lu_gps_cit);
+	configfs_add_default_group(&alua_lu_gps_group, &alua_group);
 
-	config_group_init_type_name(&alua_lu_gps_group,
-			"lu_gps", &target_core_alua_lu_gps_cit);
-	alua_cg->default_groups[0] = &alua_lu_gps_group;
-	alua_cg->default_groups[1] = NULL;
 	/*
 	 * Add core/alua/lu_gps/default_lu_gp
 	 */
@@ -3215,20 +3134,12 @@ static int __init target_core_init_configfs(void)
 		goto out_global;
 	}
 
-	lu_gp_cg = &alua_lu_gps_group;
-	lu_gp_cg->default_groups = kmalloc(sizeof(struct config_group *) * 2,
-			GFP_KERNEL);
-	if (!lu_gp_cg->default_groups) {
-		pr_err("Unable to allocate lu_gp_cg->default_groups\n");
-		ret = -ENOMEM;
-		goto out_global;
-	}
-
 	config_group_init_type_name(&lu_gp->lu_gp_group, "default_lu_gp",
 				&target_core_alua_lu_gp_cit);
-	lu_gp_cg->default_groups[0] = &lu_gp->lu_gp_group;
-	lu_gp_cg->default_groups[1] = NULL;
+	configfs_add_default_group(&lu_gp->lu_gp_group, &alua_lu_gps_group);
+
 	default_lu_gp = lu_gp;
+
 	/*
 	 * Register the target_core_mod subsystem with configfs.
 	 */
@@ -3267,55 +3178,21 @@ out_global:
 		core_alua_free_lu_gp(default_lu_gp);
 		default_lu_gp = NULL;
 	}
-	if (lu_gp_cg)
-		kfree(lu_gp_cg->default_groups);
-	if (alua_cg)
-		kfree(alua_cg->default_groups);
-	if (hba_cg)
-		kfree(hba_cg->default_groups);
-	kfree(target_cg->default_groups);
 	release_se_kmem_caches();
 	return ret;
 }
 
 static void __exit target_core_exit_configfs(void)
 {
-	struct config_group *hba_cg, *alua_cg, *lu_gp_cg;
-	struct config_item *item;
-	int i;
+	configfs_remove_default_groups(&alua_lu_gps_group);
+	configfs_remove_default_groups(&alua_group);
+	configfs_remove_default_groups(&target_core_hbagroup);
 
-	lu_gp_cg = &alua_lu_gps_group;
-	for (i = 0; lu_gp_cg->default_groups[i]; i++) {
-		item = &lu_gp_cg->default_groups[i]->cg_item;
-		lu_gp_cg->default_groups[i] = NULL;
-		config_item_put(item);
-	}
-	kfree(lu_gp_cg->default_groups);
-	lu_gp_cg->default_groups = NULL;
-
-	alua_cg = &alua_group;
-	for (i = 0; alua_cg->default_groups[i]; i++) {
-		item = &alua_cg->default_groups[i]->cg_item;
-		alua_cg->default_groups[i] = NULL;
-		config_item_put(item);
-	}
-	kfree(alua_cg->default_groups);
-	alua_cg->default_groups = NULL;
-
-	hba_cg = &target_core_hbagroup;
-	for (i = 0; hba_cg->default_groups[i]; i++) {
-		item = &hba_cg->default_groups[i]->cg_item;
-		hba_cg->default_groups[i] = NULL;
-		config_item_put(item);
-	}
-	kfree(hba_cg->default_groups);
-	hba_cg->default_groups = NULL;
 	/*
 	 * We expect subsys->su_group.default_groups to be released
 	 * by configfs subsystem provider logic..
 	 */
 	configfs_unregister_subsystem(&target_core_fabrics);
-	kfree(target_core_fabrics.su_group.default_groups);
 
 	core_alua_free_lu_gp(default_lu_gp);
 	default_lu_gp = NULL;
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index f916d18ccb48..8caef31da415 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -273,18 +273,10 @@ static struct config_group *target_fabric_make_mappedlun(
 	struct se_portal_group *se_tpg = se_nacl->se_tpg;
 	struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf;
 	struct se_lun_acl *lacl = NULL;
-	struct config_item *acl_ci;
-	struct config_group *lacl_cg = NULL, *ml_stat_grp = NULL;
 	char *buf;
 	unsigned long long mapped_lun;
 	int ret = 0;
 
-	acl_ci = &group->cg_item;
-	if (!acl_ci) {
-		pr_err("Unable to locatel acl_ci\n");
-		return NULL;
-	}
-
 	buf = kzalloc(strlen(name) + 1, GFP_KERNEL);
 	if (!buf) {
 		pr_err("Unable to allocate memory for name buf\n");
@@ -315,37 +307,19 @@ static struct config_group *target_fabric_make_mappedlun(
 		goto out;
 	}
 
-	lacl_cg = &lacl->se_lun_group;
-	lacl_cg->default_groups = kmalloc(sizeof(struct config_group *) * 2,
-				GFP_KERNEL);
-	if (!lacl_cg->default_groups) {
-		pr_err("Unable to allocate lacl_cg->default_groups\n");
-		ret = -ENOMEM;
-		goto out;
-	}
-
 	config_group_init_type_name(&lacl->se_lun_group, name,
 			&tf->tf_tpg_mappedlun_cit);
+
 	config_group_init_type_name(&lacl->ml_stat_grps.stat_group,
 			"statistics", &tf->tf_tpg_mappedlun_stat_cit);
-	lacl_cg->default_groups[0] = &lacl->ml_stat_grps.stat_group;
-	lacl_cg->default_groups[1] = NULL;
-
-	ml_stat_grp = &lacl->ml_stat_grps.stat_group;
-	ml_stat_grp->default_groups = kmalloc(sizeof(struct config_group *) * 3,
-				GFP_KERNEL);
-	if (!ml_stat_grp->default_groups) {
-		pr_err("Unable to allocate ml_stat_grp->default_groups\n");
-		ret = -ENOMEM;
-		goto out;
-	}
+	configfs_add_default_group(&lacl->ml_stat_grps.stat_group,
+			&lacl->se_lun_group);
+
 	target_stat_setup_mappedlun_default_groups(lacl);
 
 	kfree(buf);
 	return &lacl->se_lun_group;
 out:
-	if (lacl_cg)
-		kfree(lacl_cg->default_groups);
 	kfree(lacl);
 	kfree(buf);
 	return ERR_PTR(ret);
@@ -357,25 +331,9 @@ static void target_fabric_drop_mappedlun(
 {
 	struct se_lun_acl *lacl = container_of(to_config_group(item),
 			struct se_lun_acl, se_lun_group);
-	struct config_item *df_item;
-	struct config_group *lacl_cg = NULL, *ml_stat_grp = NULL;
-	int i;
-
-	ml_stat_grp = &lacl->ml_stat_grps.stat_group;
-	for (i = 0; ml_stat_grp->default_groups[i]; i++) {
-		df_item = &ml_stat_grp->default_groups[i]->cg_item;
-		ml_stat_grp->default_groups[i] = NULL;
-		config_item_put(df_item);
-	}
-	kfree(ml_stat_grp->default_groups);
 
-	lacl_cg = &lacl->se_lun_group;
-	for (i = 0; lacl_cg->default_groups[i]; i++) {
-		df_item = &lacl_cg->default_groups[i]->cg_item;
-		lacl_cg->default_groups[i] = NULL;
-		config_item_put(df_item);
-	}
-	kfree(lacl_cg->default_groups);
+	configfs_remove_default_groups(&lacl->ml_stat_grps.stat_group);
+	configfs_remove_default_groups(&lacl->se_lun_group);
 
 	config_item_put(item);
 }
@@ -424,7 +382,6 @@ static struct config_group *target_fabric_make_nodeacl(
 			struct se_portal_group, tpg_acl_group);
 	struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf;
 	struct se_node_acl *se_nacl;
-	struct config_group *nacl_cg;
 
 	se_nacl = core_tpg_add_initiator_node_acl(se_tpg, name);
 	if (IS_ERR(se_nacl))
@@ -438,24 +395,28 @@ static struct config_group *target_fabric_make_nodeacl(
 		}
 	}
 
-	nacl_cg = &se_nacl->acl_group;
-	nacl_cg->default_groups = se_nacl->acl_default_groups;
-	nacl_cg->default_groups[0] = &se_nacl->acl_attrib_group;
-	nacl_cg->default_groups[1] = &se_nacl->acl_auth_group;
-	nacl_cg->default_groups[2] = &se_nacl->acl_param_group;
-	nacl_cg->default_groups[3] = &se_nacl->acl_fabric_stat_group;
-	nacl_cg->default_groups[4] = NULL;
-
 	config_group_init_type_name(&se_nacl->acl_group, name,
 			&tf->tf_tpg_nacl_base_cit);
+
 	config_group_init_type_name(&se_nacl->acl_attrib_group, "attrib",
 			&tf->tf_tpg_nacl_attrib_cit);
+	configfs_add_default_group(&se_nacl->acl_attrib_group,
+			&se_nacl->acl_group);
+
 	config_group_init_type_name(&se_nacl->acl_auth_group, "auth",
 			&tf->tf_tpg_nacl_auth_cit);
+	configfs_add_default_group(&se_nacl->acl_auth_group,
+			&se_nacl->acl_group);
+
 	config_group_init_type_name(&se_nacl->acl_param_group, "param",
 			&tf->tf_tpg_nacl_param_cit);
+	configfs_add_default_group(&se_nacl->acl_param_group,
+			&se_nacl->acl_group);
+
 	config_group_init_type_name(&se_nacl->acl_fabric_stat_group,
 			"fabric_statistics", &tf->tf_tpg_nacl_stat_cit);
+	configfs_add_default_group(&se_nacl->acl_fabric_stat_group,
+			&se_nacl->acl_group);
 
 	return &se_nacl->acl_group;
 }
@@ -466,16 +427,9 @@ static void target_fabric_drop_nodeacl(
 {
 	struct se_node_acl *se_nacl = container_of(to_config_group(item),
 			struct se_node_acl, acl_group);
-	struct config_item *df_item;
-	struct config_group *nacl_cg;
-	int i;
-
-	nacl_cg = &se_nacl->acl_group;
-	for (i = 0; nacl_cg->default_groups[i]; i++) {
-		df_item = &nacl_cg->default_groups[i]->cg_item;
-		nacl_cg->default_groups[i] = NULL;
-		config_item_put(df_item);
-	}
+
+	configfs_remove_default_groups(&se_nacl->acl_group);
+
 	/*
 	 * struct se_node_acl free is done in target_fabric_nacl_base_release()
 	 */
@@ -795,7 +749,6 @@ static struct config_group *target_fabric_make_lun(
 	struct se_portal_group *se_tpg = container_of(group,
 			struct se_portal_group, tpg_lun_group);
 	struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf;
-	struct config_group *lun_cg = NULL, *port_stat_grp = NULL;
 	unsigned long long unpacked_lun;
 	int errno;
 
@@ -812,31 +765,14 @@ static struct config_group *target_fabric_make_lun(
 	if (IS_ERR(lun))
 		return ERR_CAST(lun);
 
-	lun_cg = &lun->lun_group;
-	lun_cg->default_groups = kmalloc(sizeof(struct config_group *) * 2,
-				GFP_KERNEL);
-	if (!lun_cg->default_groups) {
-		pr_err("Unable to allocate lun_cg->default_groups\n");
-		kfree(lun);
-		return ERR_PTR(-ENOMEM);
-	}
-
 	config_group_init_type_name(&lun->lun_group, name,
 			&tf->tf_tpg_port_cit);
+
 	config_group_init_type_name(&lun->port_stat_grps.stat_group,
 			"statistics", &tf->tf_tpg_port_stat_cit);
-	lun_cg->default_groups[0] = &lun->port_stat_grps.stat_group;
-	lun_cg->default_groups[1] = NULL;
-
-	port_stat_grp = &lun->port_stat_grps.stat_group;
-	port_stat_grp->default_groups =  kzalloc(sizeof(struct config_group *) * 4,
-				GFP_KERNEL);
-	if (!port_stat_grp->default_groups) {
-		pr_err("Unable to allocate port_stat_grp->default_groups\n");
-		kfree(lun_cg->default_groups);
-		kfree(lun);
-		return ERR_PTR(-ENOMEM);
-	}
+	configfs_add_default_group(&lun->port_stat_grps.stat_group,
+			&lun->lun_group);
+
 	target_stat_setup_port_default_groups(lun);
 
 	return &lun->lun_group;
@@ -848,25 +784,9 @@ static void target_fabric_drop_lun(
 {
 	struct se_lun *lun = container_of(to_config_group(item),
 				struct se_lun, lun_group);
-	struct config_item *df_item;
-	struct config_group *lun_cg, *port_stat_grp;
-	int i;
-
-	port_stat_grp = &lun->port_stat_grps.stat_group;
-	for (i = 0; port_stat_grp->default_groups[i]; i++) {
-		df_item = &port_stat_grp->default_groups[i]->cg_item;
-		port_stat_grp->default_groups[i] = NULL;
-		config_item_put(df_item);
-	}
-	kfree(port_stat_grp->default_groups);
 
-	lun_cg = &lun->lun_group;
-	for (i = 0; lun_cg->default_groups[i]; i++) {
-		df_item = &lun_cg->default_groups[i]->cg_item;
-		lun_cg->default_groups[i] = NULL;
-		config_item_put(df_item);
-	}
-	kfree(lun_cg->default_groups);
+	configfs_remove_default_groups(&lun->port_stat_grps.stat_group);
+	configfs_remove_default_groups(&lun->lun_group);
 
 	config_item_put(item);
 }
@@ -922,32 +842,39 @@ static struct config_group *target_fabric_make_tpg(
 	se_tpg = tf->tf_ops->fabric_make_tpg(wwn, group, name);
 	if (!se_tpg || IS_ERR(se_tpg))
 		return ERR_PTR(-EINVAL);
-	/*
-	 * Setup default groups from pre-allocated se_tpg->tpg_default_groups
-	 */
-	se_tpg->tpg_group.default_groups = se_tpg->tpg_default_groups;
-	se_tpg->tpg_group.default_groups[0] = &se_tpg->tpg_lun_group;
-	se_tpg->tpg_group.default_groups[1] = &se_tpg->tpg_np_group;
-	se_tpg->tpg_group.default_groups[2] = &se_tpg->tpg_acl_group;
-	se_tpg->tpg_group.default_groups[3] = &se_tpg->tpg_attrib_group;
-	se_tpg->tpg_group.default_groups[4] = &se_tpg->tpg_auth_group;
-	se_tpg->tpg_group.default_groups[5] = &se_tpg->tpg_param_group;
-	se_tpg->tpg_group.default_groups[6] = NULL;
 
 	config_group_init_type_name(&se_tpg->tpg_group, name,
 			&tf->tf_tpg_base_cit);
+
 	config_group_init_type_name(&se_tpg->tpg_lun_group, "lun",
 			&tf->tf_tpg_lun_cit);
+	configfs_add_default_group(&se_tpg->tpg_lun_group,
+			&se_tpg->tpg_group);
+
 	config_group_init_type_name(&se_tpg->tpg_np_group, "np",
 			&tf->tf_tpg_np_cit);
+	configfs_add_default_group(&se_tpg->tpg_np_group,
+			&se_tpg->tpg_group);
+
 	config_group_init_type_name(&se_tpg->tpg_acl_group, "acls",
 			&tf->tf_tpg_nacl_cit);
+	configfs_add_default_group(&se_tpg->tpg_acl_group,
+			&se_tpg->tpg_group);
+
 	config_group_init_type_name(&se_tpg->tpg_attrib_group, "attrib",
 			&tf->tf_tpg_attrib_cit);
+	configfs_add_default_group(&se_tpg->tpg_attrib_group,
+			&se_tpg->tpg_group);
+
 	config_group_init_type_name(&se_tpg->tpg_auth_group, "auth",
 			&tf->tf_tpg_auth_cit);
+	configfs_add_default_group(&se_tpg->tpg_auth_group,
+			&se_tpg->tpg_group);
+
 	config_group_init_type_name(&se_tpg->tpg_param_group, "param",
 			&tf->tf_tpg_param_cit);
+	configfs_add_default_group(&se_tpg->tpg_param_group,
+			&se_tpg->tpg_group);
 
 	return &se_tpg->tpg_group;
 }
@@ -958,19 +885,8 @@ static void target_fabric_drop_tpg(
 {
 	struct se_portal_group *se_tpg = container_of(to_config_group(item),
 				struct se_portal_group, tpg_group);
-	struct config_group *tpg_cg = &se_tpg->tpg_group;
-	struct config_item *df_item;
-	int i;
-	/*
-	 * Release default groups, but do not release tpg_cg->default_groups
-	 * memory as it is statically allocated at se_tpg->tpg_default_groups.
-	 */
-	for (i = 0; tpg_cg->default_groups[i]; i++) {
-		df_item = &tpg_cg->default_groups[i]->cg_item;
-		tpg_cg->default_groups[i] = NULL;
-		config_item_put(df_item);
-	}
 
+	configfs_remove_default_groups(&se_tpg->tpg_group);
 	config_item_put(item);
 }
 
@@ -1026,16 +942,12 @@ static struct config_group *target_fabric_make_wwn(
 		return ERR_PTR(-EINVAL);
 
 	wwn->wwn_tf = tf;
-	/*
-	 * Setup default groups from pre-allocated wwn->wwn_default_groups
-	 */
-	wwn->wwn_group.default_groups = wwn->wwn_default_groups;
-	wwn->wwn_group.default_groups[0] = &wwn->fabric_stat_group;
-	wwn->wwn_group.default_groups[1] = NULL;
 
 	config_group_init_type_name(&wwn->wwn_group, name, &tf->tf_tpg_cit);
+
 	config_group_init_type_name(&wwn->fabric_stat_group, "fabric_statistics",
 			&tf->tf_wwn_fabric_stats_cit);
+	configfs_add_default_group(&wwn->fabric_stat_group, &wwn->wwn_group);
 
 	return &wwn->wwn_group;
 }
@@ -1046,16 +958,8 @@ static void target_fabric_drop_wwn(
 {
 	struct se_wwn *wwn = container_of(to_config_group(item),
 				struct se_wwn, wwn_group);
-	struct config_item *df_item;
-	struct config_group *cg = &wwn->wwn_group;
-	int i;
-
-	for (i = 0; cg->default_groups[i]; i++) {
-		df_item = &cg->default_groups[i]->cg_item;
-		cg->default_groups[i] = NULL;
-		config_item_put(df_item);
-	}
 
+	configfs_remove_default_groups(&wwn->wwn_group);
 	config_item_put(item);
 }
 
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index db4412fe6b8a..4a7cf499cdfa 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -22,7 +22,6 @@ struct target_fabric_configfs {
 	struct list_head	tf_list;
 	struct config_group	tf_group;
 	struct config_group	tf_disc_group;
-	struct config_group	*tf_default_groups[2];
 	const struct target_core_fabric_ops *tf_ops;
 
 	struct config_item_type tf_discovery_cit;
diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c
index 81a6b3e07687..1a39033d2bff 100644
--- a/drivers/target/target_core_stat.c
+++ b/drivers/target/target_core_stat.c
@@ -407,19 +407,20 @@ static struct config_item_type target_stat_scsi_lu_cit = {
  */
 void target_stat_setup_dev_default_groups(struct se_device *dev)
 {
-	struct config_group *dev_stat_grp = &dev->dev_stat_grps.stat_group;
-
 	config_group_init_type_name(&dev->dev_stat_grps.scsi_dev_group,
 			"scsi_dev", &target_stat_scsi_dev_cit);
+	configfs_add_default_group(&dev->dev_stat_grps.scsi_dev_group,
+			&dev->dev_stat_grps.stat_group);
+
 	config_group_init_type_name(&dev->dev_stat_grps.scsi_tgt_dev_group,
 			"scsi_tgt_dev", &target_stat_scsi_tgt_dev_cit);
+	configfs_add_default_group(&dev->dev_stat_grps.scsi_tgt_dev_group,
+			&dev->dev_stat_grps.stat_group);
+
 	config_group_init_type_name(&dev->dev_stat_grps.scsi_lu_group,
 			"scsi_lu", &target_stat_scsi_lu_cit);
-
-	dev_stat_grp->default_groups[0] = &dev->dev_stat_grps.scsi_dev_group;
-	dev_stat_grp->default_groups[1] = &dev->dev_stat_grps.scsi_tgt_dev_group;
-	dev_stat_grp->default_groups[2] = &dev->dev_stat_grps.scsi_lu_group;
-	dev_stat_grp->default_groups[3] = NULL;
+	configfs_add_default_group(&dev->dev_stat_grps.scsi_lu_group,
+			&dev->dev_stat_grps.stat_group);
 }
 
 /*
@@ -818,19 +819,20 @@ static struct config_item_type target_stat_scsi_transport_cit = {
  */
 void target_stat_setup_port_default_groups(struct se_lun *lun)
 {
-	struct config_group *port_stat_grp = &lun->port_stat_grps.stat_group;
-
 	config_group_init_type_name(&lun->port_stat_grps.scsi_port_group,
 			"scsi_port", &target_stat_scsi_port_cit);
+	configfs_add_default_group(&lun->port_stat_grps.scsi_port_group,
+			&lun->port_stat_grps.stat_group);
+
 	config_group_init_type_name(&lun->port_stat_grps.scsi_tgt_port_group,
 			"scsi_tgt_port", &target_stat_scsi_tgt_port_cit);
+	configfs_add_default_group(&lun->port_stat_grps.scsi_tgt_port_group,
+			&lun->port_stat_grps.stat_group);
+
 	config_group_init_type_name(&lun->port_stat_grps.scsi_transport_group,
 			"scsi_transport", &target_stat_scsi_transport_cit);
-
-	port_stat_grp->default_groups[0] = &lun->port_stat_grps.scsi_port_group;
-	port_stat_grp->default_groups[1] = &lun->port_stat_grps.scsi_tgt_port_group;
-	port_stat_grp->default_groups[2] = &lun->port_stat_grps.scsi_transport_group;
-	port_stat_grp->default_groups[3] = NULL;
+	configfs_add_default_group(&lun->port_stat_grps.scsi_transport_group,
+			&lun->port_stat_grps.stat_group);
 }
 
 /*
@@ -1351,14 +1353,13 @@ static struct config_item_type target_stat_scsi_att_intr_port_cit = {
  */
 void target_stat_setup_mappedlun_default_groups(struct se_lun_acl *lacl)
 {
-	struct config_group *ml_stat_grp = &lacl->ml_stat_grps.stat_group;
-
 	config_group_init_type_name(&lacl->ml_stat_grps.scsi_auth_intr_group,
 			"scsi_auth_intr", &target_stat_scsi_auth_intr_cit);
+	configfs_add_default_group(&lacl->ml_stat_grps.scsi_auth_intr_group,
+			&lacl->ml_stat_grps.stat_group);
+
 	config_group_init_type_name(&lacl->ml_stat_grps.scsi_att_intr_port_group,
 			"scsi_att_intr_port", &target_stat_scsi_att_intr_port_cit);
-
-	ml_stat_grp->default_groups[0] = &lacl->ml_stat_grps.scsi_auth_intr_group;
-	ml_stat_grp->default_groups[1] = &lacl->ml_stat_grps.scsi_att_intr_port_group;
-	ml_stat_grp->default_groups[2] = NULL;
+	configfs_add_default_group(&lacl->ml_stat_grps.scsi_att_intr_port_group,
+			&lacl->ml_stat_grps.stat_group);
 }
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 590c44989e5e..2f8081f0f795 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -49,7 +49,6 @@ struct gadget_info {
 	struct config_group configs_group;
 	struct config_group strings_group;
 	struct config_group os_desc_group;
-	struct config_group *default_groups[5];
 
 	struct mutex lock;
 	struct usb_gadget_strings *gstrings[MAX_USB_STRING_LANGS + 1];
@@ -71,7 +70,6 @@ static inline struct gadget_info *to_gadget_info(struct config_item *item)
 struct config_usb_cfg {
 	struct config_group group;
 	struct config_group strings_group;
-	struct config_group *default_groups[2];
 	struct list_head string_list;
 	struct usb_configuration c;
 	struct list_head func_list;
@@ -666,13 +664,12 @@ static struct config_group *config_desc_make(
 	INIT_LIST_HEAD(&cfg->string_list);
 	INIT_LIST_HEAD(&cfg->func_list);
 
-	cfg->group.default_groups = cfg->default_groups;
-	cfg->default_groups[0] = &cfg->strings_group;
-
 	config_group_init_type_name(&cfg->group, name,
 				&gadget_config_type);
+
 	config_group_init_type_name(&cfg->strings_group, "strings",
 			&gadget_config_name_strings_type);
+	configfs_add_default_group(&cfg->strings_group, &cfg->group);
 
 	ret = usb_add_config_only(&gi->cdev, &cfg->c);
 	if (ret)
@@ -1149,15 +1146,11 @@ int usb_os_desc_prepare_interf_dir(struct config_group *parent,
 				   char **names,
 				   struct module *owner)
 {
-	struct config_group **f_default_groups, *os_desc_group,
-				**interface_groups;
+	struct config_group *os_desc_group;
 	struct config_item_type *os_desc_type, *interface_type;
 
 	vla_group(data_chunk);
-	vla_item(data_chunk, struct config_group *, f_default_groups, 2);
 	vla_item(data_chunk, struct config_group, os_desc_group, 1);
-	vla_item(data_chunk, struct config_group *, interface_groups,
-		 n_interf + 1);
 	vla_item(data_chunk, struct config_item_type, os_desc_type, 1);
 	vla_item(data_chunk, struct config_item_type, interface_type, 1);
 
@@ -1165,18 +1158,14 @@ int usb_os_desc_prepare_interf_dir(struct config_group *parent,
 	if (!vlabuf)
 		return -ENOMEM;
 
-	f_default_groups = vla_ptr(vlabuf, data_chunk, f_default_groups);
 	os_desc_group = vla_ptr(vlabuf, data_chunk, os_desc_group);
 	os_desc_type = vla_ptr(vlabuf, data_chunk, os_desc_type);
-	interface_groups = vla_ptr(vlabuf, data_chunk, interface_groups);
 	interface_type = vla_ptr(vlabuf, data_chunk, interface_type);
 
-	parent->default_groups = f_default_groups;
 	os_desc_type->ct_owner = owner;
 	config_group_init_type_name(os_desc_group, "os_desc", os_desc_type);
-	f_default_groups[0] = os_desc_group;
+	configfs_add_default_group(os_desc_group, parent);
 
-	os_desc_group->default_groups = interface_groups;
 	interface_type->ct_group_ops = &interf_grp_ops;
 	interface_type->ct_attrs = interf_grp_attrs;
 	interface_type->ct_owner = owner;
@@ -1189,7 +1178,7 @@ int usb_os_desc_prepare_interf_dir(struct config_group *parent,
 		config_group_init_type_name(&d->group, "", interface_type);
 		config_item_set_name(&d->group.cg_item, "interface.%s",
 				     names[n_interf]);
-		interface_groups[n_interf] = &d->group;
+		configfs_add_default_group(&d->group, os_desc_group);
 	}
 
 	return 0;
@@ -1423,20 +1412,23 @@ static struct config_group *gadgets_make(
 	if (!gi)
 		return ERR_PTR(-ENOMEM);
 
-	gi->group.default_groups = gi->default_groups;
-	gi->group.default_groups[0] = &gi->functions_group;
-	gi->group.default_groups[1] = &gi->configs_group;
-	gi->group.default_groups[2] = &gi->strings_group;
-	gi->group.default_groups[3] = &gi->os_desc_group;
+	config_group_init_type_name(&gi->group, name, &gadget_root_type);
 
 	config_group_init_type_name(&gi->functions_group, "functions",
 			&functions_type);
+	configfs_add_default_group(&gi->functions_group, &gi->group);
+
 	config_group_init_type_name(&gi->configs_group, "configs",
 			&config_desc_type);
+	configfs_add_default_group(&gi->configs_group, &gi->group);
+
 	config_group_init_type_name(&gi->strings_group, "strings",
 			&gadget_strings_strings_type);
+	configfs_add_default_group(&gi->strings_group, &gi->group);
+
 	config_group_init_type_name(&gi->os_desc_group, "os_desc",
 			&os_desc_type);
+	configfs_add_default_group(&gi->os_desc_group, &gi->group);
 
 	gi->composite.bind = configfs_do_nothing;
 	gi->composite.unbind = configfs_do_nothing;
@@ -1461,8 +1453,6 @@ static struct config_group *gadgets_make(
 	if (!gi->composite.gadget_driver.function)
 		goto err;
 
-	config_group_init_type_name(&gi->group, name,
-				&gadget_root_type);
 	return &gi->group;
 err:
 	kfree(gi);
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 223ccf89d226..142bb7763f2a 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -3484,12 +3484,12 @@ static struct usb_function_instance *fsg_alloc_inst(void)
 
 	opts->lun0.lun = opts->common->luns[0];
 	opts->lun0.lun_id = 0;
-	config_group_init_type_name(&opts->lun0.group, "lun.0", &fsg_lun_type);
-	opts->default_groups[0] = &opts->lun0.group;
-	opts->func_inst.group.default_groups = opts->default_groups;
 
 	config_group_init_type_name(&opts->func_inst.group, "", &fsg_func_type);
 
+	config_group_init_type_name(&opts->lun0.group, "lun.0", &fsg_lun_type);
+	configfs_add_default_group(&opts->lun0.group, &opts->func_inst.group);
+
 	return &opts->func_inst;
 
 release_buffers:
diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c
index e587767e374c..f577eec88f8c 100644
--- a/drivers/usb/gadget/function/f_rndis.c
+++ b/drivers/usb/gadget/function/f_rndis.c
@@ -889,7 +889,6 @@ static void rndis_free_inst(struct usb_function_instance *f)
 			free_netdev(opts->net);
 	}
 
-	kfree(opts->rndis_os_desc.group.default_groups); /* single VLA chunk */
 	kfree(opts);
 }
 
@@ -916,10 +915,10 @@ static struct usb_function_instance *rndis_alloc_inst(void)
 
 	descs[0] = &opts->rndis_os_desc;
 	names[0] = "rndis";
-	usb_os_desc_prepare_interf_dir(&opts->func_inst.group, 1, descs,
-				       names, THIS_MODULE);
 	config_group_init_type_name(&opts->func_inst.group, "",
 				    &rndis_func_type);
+	usb_os_desc_prepare_interf_dir(&opts->func_inst.group, 1, descs,
+				       names, THIS_MODULE);
 
 	return &opts->func_inst;
 }
diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c
index ad8c9b05572d..66753ba7a42e 100644
--- a/drivers/usb/gadget/function/uvc_configfs.c
+++ b/drivers/usb/gadget/function/uvc_configfs.c
@@ -272,11 +272,6 @@ static struct config_item_type uvcg_default_processing_type = {
 
 /* struct uvcg_processing {}; */
 
-static struct config_group *uvcg_processing_default_groups[] = {
-	&uvcg_default_processing.group,
-	NULL,
-};
-
 /* control/processing */
 static struct uvcg_processing_grp {
 	struct config_group	group;
@@ -394,11 +389,6 @@ static struct config_item_type uvcg_default_camera_type = {
 
 /* struct uvcg_camera {}; */
 
-static struct config_group *uvcg_camera_default_groups[] = {
-	&uvcg_default_camera.group,
-	NULL,
-};
-
 /* control/terminal/camera */
 static struct uvcg_camera_grp {
 	struct config_group	group;
@@ -477,11 +467,6 @@ static struct config_item_type uvcg_default_output_type = {
 
 /* struct uvcg_output {}; */
 
-static struct config_group *uvcg_output_default_groups[] = {
-	&uvcg_default_output.group,
-	NULL,
-};
-
 /* control/terminal/output */
 static struct uvcg_output_grp {
 	struct config_group	group;
@@ -491,12 +476,6 @@ static struct config_item_type uvcg_output_grp_type = {
 	.ct_owner = THIS_MODULE,
 };
 
-static struct config_group *uvcg_terminal_default_groups[] = {
-	&uvcg_camera_grp.group,
-	&uvcg_output_grp.group,
-	NULL,
-};
-
 /* control/terminal */
 static struct uvcg_terminal_grp {
 	struct config_group	group;
@@ -619,12 +598,6 @@ static struct config_item_type uvcg_control_class_type = {
 	.ct_owner	= THIS_MODULE,
 };
 
-static struct config_group *uvcg_control_class_default_groups[] = {
-	&uvcg_control_class_fs.group,
-	&uvcg_control_class_ss.group,
-	NULL,
-};
-
 /* control/class */
 static struct uvcg_control_class_grp {
 	struct config_group	group;
@@ -634,14 +607,6 @@ static struct config_item_type uvcg_control_class_grp_type = {
 	.ct_owner = THIS_MODULE,
 };
 
-static struct config_group *uvcg_control_default_groups[] = {
-	&uvcg_control_header_grp.group,
-	&uvcg_processing_grp.group,
-	&uvcg_terminal_grp.group,
-	&uvcg_control_class_grp.group,
-	NULL,
-};
-
 /* control */
 static struct uvcg_control_grp {
 	struct config_group	group;
@@ -1780,11 +1745,6 @@ static struct config_item_type uvcg_default_color_matching_type = {
 
 /* struct uvcg_color_matching {}; */
 
-static struct config_group *uvcg_color_matching_default_groups[] = {
-	&uvcg_default_color_matching.group,
-	NULL,
-};
-
 /* streaming/color_matching */
 static struct uvcg_color_matching_grp {
 	struct config_group	group;
@@ -2145,13 +2105,6 @@ static struct config_item_type uvcg_streaming_class_type = {
 	.ct_owner	= THIS_MODULE,
 };
 
-static struct config_group *uvcg_streaming_class_default_groups[] = {
-	&uvcg_streaming_class_fs.group,
-	&uvcg_streaming_class_hs.group,
-	&uvcg_streaming_class_ss.group,
-	NULL,
-};
-
 /* streaming/class */
 static struct uvcg_streaming_class_grp {
 	struct config_group	group;
@@ -2161,15 +2114,6 @@ static struct config_item_type uvcg_streaming_class_grp_type = {
 	.ct_owner = THIS_MODULE,
 };
 
-static struct config_group *uvcg_streaming_default_groups[] = {
-	&uvcg_streaming_header_grp.group,
-	&uvcg_uncompressed_grp.group,
-	&uvcg_mjpeg_grp.group,
-	&uvcg_color_matching_grp.group,
-	&uvcg_streaming_class_grp.group,
-	NULL,
-};
-
 /* streaming */
 static struct uvcg_streaming_grp {
 	struct config_group	group;
@@ -2179,12 +2123,6 @@ static struct config_item_type uvcg_streaming_grp_type = {
 	.ct_owner = THIS_MODULE,
 };
 
-static struct config_group *uvcg_default_groups[] = {
-	&uvcg_control_grp.group,
-	&uvcg_streaming_grp.group,
-	NULL,
-};
-
 static inline struct f_uvc_opts *to_f_uvc_opts(struct config_item *item)
 {
 	return container_of(to_config_group(item), struct f_uvc_opts,
@@ -2273,59 +2211,64 @@ static struct config_item_type uvc_func_type = {
 	.ct_owner	= THIS_MODULE,
 };
 
-static inline void uvcg_init_group(struct config_group *g,
-				   struct config_group **default_groups,
-				   const char *name,
-				   struct config_item_type *type)
-{
-	g->default_groups = default_groups;
-	config_group_init_type_name(g, name, type);
-}
-
 int uvcg_attach_configfs(struct f_uvc_opts *opts)
 {
 	config_group_init_type_name(&uvcg_control_header_grp.group,
 				    "header",
 				    &uvcg_control_header_grp_type);
+
 	config_group_init_type_name(&uvcg_default_processing.group,
-				    "default",
-				    &uvcg_default_processing_type);
-	uvcg_init_group(&uvcg_processing_grp.group,
-			uvcg_processing_default_groups,
-			"processing",
-			&uvcg_processing_grp_type);
+			"default", &uvcg_default_processing_type);
+	config_group_init_type_name(&uvcg_processing_grp.group,
+			"processing", &uvcg_processing_grp_type);
+	configfs_add_default_group(&uvcg_default_processing.group,
+			&uvcg_processing_grp.group);
+
 	config_group_init_type_name(&uvcg_default_camera.group,
-				    "default",
-				    &uvcg_default_camera_type);
-	uvcg_init_group(&uvcg_camera_grp.group,
-			uvcg_camera_default_groups,
-			"camera",
-			&uvcg_camera_grp_type);
+			"default", &uvcg_default_camera_type);
+	config_group_init_type_name(&uvcg_camera_grp.group,
+			"camera", &uvcg_camera_grp_type);
+	configfs_add_default_group(&uvcg_default_camera.group,
+			&uvcg_camera_grp.group);
+
 	config_group_init_type_name(&uvcg_default_output.group,
-				    "default",
-				    &uvcg_default_output_type);
-	uvcg_init_group(&uvcg_output_grp.group,
-			uvcg_output_default_groups,
-			"output",
-			&uvcg_output_grp_type);
-	uvcg_init_group(&uvcg_terminal_grp.group,
-			uvcg_terminal_default_groups,
-			"terminal",
-			&uvcg_terminal_grp_type);
+			"default", &uvcg_default_output_type);
+	config_group_init_type_name(&uvcg_output_grp.group,
+			"output", &uvcg_output_grp_type);
+	configfs_add_default_group(&uvcg_default_output.group,
+			&uvcg_output_grp.group);
+
+	config_group_init_type_name(&uvcg_terminal_grp.group,
+			"terminal", &uvcg_terminal_grp_type);
+	configfs_add_default_group(&uvcg_camera_grp.group,
+			&uvcg_terminal_grp.group);
+	configfs_add_default_group(&uvcg_output_grp.group,
+			&uvcg_terminal_grp.group);
+
 	config_group_init_type_name(&uvcg_control_class_fs.group,
-				    "fs",
-				    &uvcg_control_class_type);
+			"fs", &uvcg_control_class_type);
 	config_group_init_type_name(&uvcg_control_class_ss.group,
-				    "ss",
-				    &uvcg_control_class_type);
-	uvcg_init_group(&uvcg_control_class_grp.group,
-			uvcg_control_class_default_groups,
+			"ss", &uvcg_control_class_type);
+	config_group_init_type_name(&uvcg_control_class_grp.group,
 			"class",
 			&uvcg_control_class_grp_type);
-	uvcg_init_group(&uvcg_control_grp.group,
-			uvcg_control_default_groups,
+	configfs_add_default_group(&uvcg_control_class_fs.group,
+			&uvcg_control_class_grp.group);
+	configfs_add_default_group(&uvcg_control_class_ss.group,
+			&uvcg_control_class_grp.group);
+
+	config_group_init_type_name(&uvcg_control_grp.group,
 			"control",
 			&uvcg_control_grp_type);
+	configfs_add_default_group(&uvcg_control_header_grp.group,
+			&uvcg_control_grp.group);
+	configfs_add_default_group(&uvcg_processing_grp.group,
+			&uvcg_control_grp.group);
+	configfs_add_default_group(&uvcg_terminal_grp.group,
+			&uvcg_control_grp.group);
+	configfs_add_default_group(&uvcg_control_class_grp.group,
+			&uvcg_control_grp.group);
+
 	config_group_init_type_name(&uvcg_streaming_header_grp.group,
 				    "header",
 				    &uvcg_streaming_header_grp_type);
@@ -2338,30 +2281,47 @@ int uvcg_attach_configfs(struct f_uvc_opts *opts)
 	config_group_init_type_name(&uvcg_default_color_matching.group,
 				    "default",
 				    &uvcg_default_color_matching_type);
-	uvcg_init_group(&uvcg_color_matching_grp.group,
-			uvcg_color_matching_default_groups,
+	config_group_init_type_name(&uvcg_color_matching_grp.group,
 			"color_matching",
 			&uvcg_color_matching_grp_type);
+	configfs_add_default_group(&uvcg_default_color_matching.group,
+			&uvcg_color_matching_grp.group);
+
 	config_group_init_type_name(&uvcg_streaming_class_fs.group,
-				    "fs",
-				    &uvcg_streaming_class_type);
+			"fs", &uvcg_streaming_class_type);
 	config_group_init_type_name(&uvcg_streaming_class_hs.group,
-				    "hs",
-				    &uvcg_streaming_class_type);
+			"hs", &uvcg_streaming_class_type);
 	config_group_init_type_name(&uvcg_streaming_class_ss.group,
-				    "ss",
-				    &uvcg_streaming_class_type);
-	uvcg_init_group(&uvcg_streaming_class_grp.group,
-			uvcg_streaming_class_default_groups,
-			"class",
-			&uvcg_streaming_class_grp_type);
-	uvcg_init_group(&uvcg_streaming_grp.group,
-			uvcg_streaming_default_groups,
-			"streaming",
-			&uvcg_streaming_grp_type);
-	uvcg_init_group(&opts->func_inst.group,
-			uvcg_default_groups,
+			"ss", &uvcg_streaming_class_type);
+	config_group_init_type_name(&uvcg_streaming_class_grp.group,
+			"class", &uvcg_streaming_class_grp_type);
+	configfs_add_default_group(&uvcg_streaming_class_fs.group,
+			&uvcg_streaming_class_grp.group);
+	configfs_add_default_group(&uvcg_streaming_class_hs.group,
+			&uvcg_streaming_class_grp.group);
+	configfs_add_default_group(&uvcg_streaming_class_ss.group,
+			&uvcg_streaming_class_grp.group);
+
+	config_group_init_type_name(&uvcg_streaming_grp.group,
+			"streaming", &uvcg_streaming_grp_type);
+	configfs_add_default_group(&uvcg_streaming_header_grp.group,
+			&uvcg_streaming_grp.group);
+	configfs_add_default_group(&uvcg_uncompressed_grp.group,
+			&uvcg_streaming_grp.group);
+	configfs_add_default_group(&uvcg_mjpeg_grp.group,
+			&uvcg_streaming_grp.group);
+	configfs_add_default_group(&uvcg_color_matching_grp.group,
+			&uvcg_streaming_grp.group);
+	configfs_add_default_group(&uvcg_streaming_class_grp.group,
+			&uvcg_streaming_grp.group);
+
+	config_group_init_type_name(&opts->func_inst.group,
 			"",
 			&uvc_func_type);
+	configfs_add_default_group(&uvcg_control_grp.group,
+			&opts->func_inst.group);
+	configfs_add_default_group(&uvcg_streaming_grp.group,
+			&opts->func_inst.group);
+
 	return 0;
 }
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index f419519ec41f..b51ce6778145 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -701,23 +701,29 @@ static int populate_groups(struct config_group *group)
 {
 	struct config_group *new_group;
 	int ret = 0;
-	int i;
-
-	if (group->default_groups) {
-		for (i = 0; group->default_groups[i]; i++) {
-			new_group = group->default_groups[i];
 
-			ret = create_default_group(group, new_group);
-			if (ret) {
-				detach_groups(group);
-				break;
-			}
+	list_for_each_entry(new_group, &group->default_groups, group_entry) {
+		ret = create_default_group(group, new_group);
+		if (ret) {
+			detach_groups(group);
+			break;
 		}
 	}
 
 	return ret;
 }
 
+void configfs_remove_default_groups(struct config_group *group)
+{
+	struct config_group *g, *n;
+
+	list_for_each_entry_safe(g, n, &group->default_groups, group_entry) {
+		list_del(&g->group_entry);
+		config_item_put(&g->cg_item);
+	}
+}
+EXPORT_SYMBOL(configfs_remove_default_groups);
+
 /*
  * All of link_obj/unlink_obj/link_group/unlink_group require that
  * subsys->su_mutex is held.
@@ -766,15 +772,10 @@ static void link_obj(struct config_item *parent_item, struct config_item *item)
 
 static void unlink_group(struct config_group *group)
 {
-	int i;
 	struct config_group *new_group;
 
-	if (group->default_groups) {
-		for (i = 0; group->default_groups[i]; i++) {
-			new_group = group->default_groups[i];
-			unlink_group(new_group);
-		}
-	}
+	list_for_each_entry(new_group, &group->default_groups, group_entry)
+		unlink_group(new_group);
 
 	group->cg_subsys = NULL;
 	unlink_obj(&group->cg_item);
@@ -782,7 +783,6 @@ static void unlink_group(struct config_group *group)
 
 static void link_group(struct config_group *parent_group, struct config_group *group)
 {
-	int i;
 	struct config_group *new_group;
 	struct configfs_subsystem *subsys = NULL; /* gcc is a turd */
 
@@ -796,12 +796,8 @@ static void link_group(struct config_group *parent_group, struct config_group *g
 		BUG();
 	group->cg_subsys = subsys;
 
-	if (group->default_groups) {
-		for (i = 0; group->default_groups[i]; i++) {
-			new_group = group->default_groups[i];
-			link_group(group, new_group);
-		}
-	}
+	list_for_each_entry(new_group, &group->default_groups, group_entry)
+		link_group(group, new_group);
 }
 
 /*
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index b863a09cd2f1..8b2a994042dd 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -182,6 +182,7 @@ void config_group_init(struct config_group *group)
 {
 	config_item_init(&group->cg_item);
 	INIT_LIST_HEAD(&group->cg_children);
+	INIT_LIST_HEAD(&group->default_groups);
 }
 EXPORT_SYMBOL(config_group_init);
 
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 8e294fbbac39..519112168a9e 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -346,7 +346,6 @@ static struct config_group *make_cluster(struct config_group *g,
 	void *gps = NULL;
 
 	cl = kzalloc(sizeof(struct dlm_cluster), GFP_NOFS);
-	gps = kcalloc(3, sizeof(struct config_group *), GFP_NOFS);
 	sps = kzalloc(sizeof(struct dlm_spaces), GFP_NOFS);
 	cms = kzalloc(sizeof(struct dlm_comms), GFP_NOFS);
 
@@ -357,10 +356,8 @@ static struct config_group *make_cluster(struct config_group *g,
 	config_group_init_type_name(&sps->ss_group, "spaces", &spaces_type);
 	config_group_init_type_name(&cms->cs_group, "comms", &comms_type);
 
-	cl->group.default_groups = gps;
-	cl->group.default_groups[0] = &sps->ss_group;
-	cl->group.default_groups[1] = &cms->cs_group;
-	cl->group.default_groups[2] = NULL;
+	configfs_add_default_group(&sps->ss_group, &cl->group);
+	configfs_add_default_group(&cms->cs_group, &cl->group);
 
 	cl->cl_tcp_port = dlm_config.ci_tcp_port;
 	cl->cl_buffer_size = dlm_config.ci_buffer_size;
@@ -383,7 +380,6 @@ static struct config_group *make_cluster(struct config_group *g,
 
  fail:
 	kfree(cl);
-	kfree(gps);
 	kfree(sps);
 	kfree(cms);
 	return ERR_PTR(-ENOMEM);
@@ -392,14 +388,8 @@ static struct config_group *make_cluster(struct config_group *g,
 static void drop_cluster(struct config_group *g, struct config_item *i)
 {
 	struct dlm_cluster *cl = config_item_to_cluster(i);
-	struct config_item *tmp;
-	int j;
 
-	for (j = 0; cl->group.default_groups[j]; j++) {
-		tmp = &cl->group.default_groups[j]->cg_item;
-		cl->group.default_groups[j] = NULL;
-		config_item_put(tmp);
-	}
+	configfs_remove_default_groups(&cl->group);
 
 	space_list = NULL;
 	comm_list = NULL;
@@ -410,7 +400,6 @@ static void drop_cluster(struct config_group *g, struct config_item *i)
 static void release_cluster(struct config_item *i)
 {
 	struct dlm_cluster *cl = config_item_to_cluster(i);
-	kfree(cl->group.default_groups);
 	kfree(cl);
 }
 
@@ -418,21 +407,17 @@ static struct config_group *make_space(struct config_group *g, const char *name)
 {
 	struct dlm_space *sp = NULL;
 	struct dlm_nodes *nds = NULL;
-	void *gps = NULL;
 
 	sp = kzalloc(sizeof(struct dlm_space), GFP_NOFS);
-	gps = kcalloc(2, sizeof(struct config_group *), GFP_NOFS);
 	nds = kzalloc(sizeof(struct dlm_nodes), GFP_NOFS);
 
-	if (!sp || !gps || !nds)
+	if (!sp || !nds)
 		goto fail;
 
 	config_group_init_type_name(&sp->group, name, &space_type);
-	config_group_init_type_name(&nds->ns_group, "nodes", &nodes_type);
 
-	sp->group.default_groups = gps;
-	sp->group.default_groups[0] = &nds->ns_group;
-	sp->group.default_groups[1] = NULL;
+	config_group_init_type_name(&nds->ns_group, "nodes", &nodes_type);
+	configfs_add_default_group(&nds->ns_group, &sp->group);
 
 	INIT_LIST_HEAD(&sp->members);
 	mutex_init(&sp->members_lock);
@@ -441,7 +426,6 @@ static struct config_group *make_space(struct config_group *g, const char *name)
 
  fail:
 	kfree(sp);
-	kfree(gps);
 	kfree(nds);
 	return ERR_PTR(-ENOMEM);
 }
@@ -449,24 +433,16 @@ static struct config_group *make_space(struct config_group *g, const char *name)
 static void drop_space(struct config_group *g, struct config_item *i)
 {
 	struct dlm_space *sp = config_item_to_space(i);
-	struct config_item *tmp;
-	int j;
 
 	/* assert list_empty(&sp->members) */
 
-	for (j = 0; sp->group.default_groups[j]; j++) {
-		tmp = &sp->group.default_groups[j]->cg_item;
-		sp->group.default_groups[j] = NULL;
-		config_item_put(tmp);
-	}
-
+	configfs_remove_default_groups(&sp->group);
 	config_item_put(i);
 }
 
 static void release_space(struct config_item *i)
 {
 	struct dlm_space *sp = config_item_to_space(i);
-	kfree(sp->group.default_groups);
 	kfree(sp);
 }
 
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index ebe543894db0..b17d180bdc16 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -630,7 +630,6 @@ static void o2nm_cluster_release(struct config_item *item)
 {
 	struct o2nm_cluster *cluster = to_o2nm_cluster(item);
 
-	kfree(cluster->cl_group.default_groups);
 	kfree(cluster);
 }
 
@@ -666,7 +665,6 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
 	struct o2nm_cluster *cluster = NULL;
 	struct o2nm_node_group *ns = NULL;
 	struct config_group *o2hb_group = NULL, *ret = NULL;
-	void *defs = NULL;
 
 	/* this runs under the parent dir's i_mutex; there can be only
 	 * one caller in here at a time */
@@ -675,20 +673,18 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
 
 	cluster = kzalloc(sizeof(struct o2nm_cluster), GFP_KERNEL);
 	ns = kzalloc(sizeof(struct o2nm_node_group), GFP_KERNEL);
-	defs = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
 	o2hb_group = o2hb_alloc_hb_set();
-	if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL)
+	if (cluster == NULL || ns == NULL || o2hb_group == NULL)
 		goto out;
 
 	config_group_init_type_name(&cluster->cl_group, name,
 				    &o2nm_cluster_type);
+	configfs_add_default_group(&ns->ns_group, &cluster->cl_group);
+
 	config_group_init_type_name(&ns->ns_group, "node",
 				    &o2nm_node_group_type);
+	configfs_add_default_group(o2hb_group, &cluster->cl_group);
 
-	cluster->cl_group.default_groups = defs;
-	cluster->cl_group.default_groups[0] = &ns->ns_group;
-	cluster->cl_group.default_groups[1] = o2hb_group;
-	cluster->cl_group.default_groups[2] = NULL;
 	rwlock_init(&cluster->cl_nodes_lock);
 	cluster->cl_node_ip_tree = RB_ROOT;
 	cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
@@ -704,7 +700,6 @@ out:
 		kfree(cluster);
 		kfree(ns);
 		o2hb_free_hb_set(o2hb_group);
-		kfree(defs);
 		ret = ERR_PTR(-ENOMEM);
 	}
 
@@ -714,18 +709,11 @@ out:
 static void o2nm_cluster_group_drop_item(struct config_group *group, struct config_item *item)
 {
 	struct o2nm_cluster *cluster = to_o2nm_cluster(item);
-	int i;
-	struct config_item *killme;
 
 	BUG_ON(o2nm_single_cluster != cluster);
 	o2nm_single_cluster = NULL;
 
-	for (i = 0; cluster->cl_group.default_groups[i]; i++) {
-		killme = &cluster->cl_group.default_groups[i]->cg_item;
-		cluster->cl_group.default_groups[i] = NULL;
-		config_item_put(killme);
-	}
-
+	configfs_remove_default_groups(&cluster->cl_group);
 	config_item_put(item);
 }
 
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index f8165c129ccb..485fe5519448 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -96,7 +96,8 @@ struct config_group {
 	struct config_item		cg_item;
 	struct list_head		cg_children;
 	struct configfs_subsystem 	*cg_subsys;
-	struct config_group		**default_groups;
+	struct list_head		default_groups;
+	struct list_head		group_entry;
 };
 
 extern void config_group_init(struct config_group *group);
@@ -123,6 +124,12 @@ extern struct config_item *config_group_find_item(struct config_group *,
 						  const char *);
 
 
+static inline void configfs_add_default_group(struct config_group *new_group,
+		struct config_group *group)
+{
+	list_add_tail(&new_group->group_entry, &group->default_groups);
+}
+
 struct configfs_attribute {
 	const char		*ca_name;
 	struct module 		*ca_owner;
@@ -251,6 +258,8 @@ int configfs_register_group(struct config_group *parent_group,
 			    struct config_group *group);
 void configfs_unregister_group(struct config_group *group);
 
+void configfs_remove_default_groups(struct config_group *group);
+
 struct config_group *
 configfs_register_default_group(struct config_group *parent_group,
 				const char *name,
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index e8c8c08bf575..1b09cac06508 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -560,7 +560,6 @@ struct se_node_acl {
 	struct config_group	acl_auth_group;
 	struct config_group	acl_param_group;
 	struct config_group	acl_fabric_stat_group;
-	struct config_group	*acl_default_groups[5];
 	struct list_head	acl_list;
 	struct list_head	acl_sess_list;
 	struct completion	acl_free_comp;
@@ -887,7 +886,6 @@ struct se_portal_group {
 	const struct target_core_fabric_ops *se_tpg_tfo;
 	struct se_wwn		*se_tpg_wwn;
 	struct config_group	tpg_group;
-	struct config_group	*tpg_default_groups[7];
 	struct config_group	tpg_lun_group;
 	struct config_group	tpg_np_group;
 	struct config_group	tpg_acl_group;
@@ -923,7 +921,6 @@ static inline struct se_portal_group *param_to_tpg(struct config_item *item)
 struct se_wwn {
 	struct target_fabric_configfs *wwn_tf;
 	struct config_group	wwn_group;
-	struct config_group	*wwn_default_groups[2];
 	struct config_group	fabric_stat_group;
 };
 
-- 
cgit v1.2.3


From 335e073faacc9d90bee1527fa2550d6df045df63 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Sun, 6 Mar 2016 22:20:25 +0100
Subject: klp: remove CONFIG_LIVEPATCH dependency from klp headers

There is no need for livepatch.h (generic and arch-specific) to depend
on CONFIG_LIVEPATCH. Remove that superfluous dependency.

Reported-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/s390/include/asm/livepatch.h | 2 --
 arch/x86/include/asm/livepatch.h  | 2 --
 include/linux/livepatch.h         | 4 ----
 3 files changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h
index 105074582d86..d5427c78b1b3 100644
--- a/arch/s390/include/asm/livepatch.h
+++ b/arch/s390/include/asm/livepatch.h
@@ -19,7 +19,6 @@
 
 #include <linux/module.h>
 
-#ifdef CONFIG_LIVEPATCH
 static inline int klp_check_compiler_support(void)
 {
 	return 0;
@@ -36,6 +35,5 @@ static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 {
 	regs->psw.addr = ip;
 }
-#endif
 
 #endif
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
index 8acfe798625b..7e68f9558552 100644
--- a/arch/x86/include/asm/livepatch.h
+++ b/arch/x86/include/asm/livepatch.h
@@ -25,7 +25,6 @@
 #include <linux/module.h>
 #include <linux/ftrace.h>
 
-#ifdef CONFIG_LIVEPATCH
 static inline int klp_check_compiler_support(void)
 {
 #ifndef CC_USING_FENTRY
@@ -40,6 +39,5 @@ static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 {
 	regs->ip = ip;
 }
-#endif
 
 #endif /* _ASM_X86_LIVEPATCH_H */
diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index a8828652f794..c05679701e10 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -24,8 +24,6 @@
 #include <linux/module.h>
 #include <linux/ftrace.h>
 
-#if IS_ENABLED(CONFIG_LIVEPATCH)
-
 #include <asm/livepatch.h>
 
 enum klp_state {
@@ -134,6 +132,4 @@ int klp_unregister_patch(struct klp_patch *);
 int klp_enable_patch(struct klp_patch *);
 int klp_disable_patch(struct klp_patch *);
 
-#endif /* CONFIG_LIVEPATCH */
-
 #endif /* _LINUX_LIVEPATCH_H_ */
-- 
cgit v1.2.3


From 4d7928959832ea41f7f91456b76da19cad01bd09 Mon Sep 17 00:00:00 2001
From: Hante Meuleman <meuleman@broadcom.com>
Date: Wed, 17 Feb 2016 11:27:07 +0100
Subject: brcmfmac: switch to new platform data

Platform data is only available for sdio. With this patch a new
platform data structure is being used which allows for platform
data for any device and configurable per device. This patch only
switches to the new structure and adds support for SDIO devices.

Reviewed-by: Arend Van Spriel <arend@broadcom.com>
Reviewed-by: Franky (Zhenhui) Lin <frankyl@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Signed-off-by: Hante Meuleman <meuleman@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c  |  15 +-
 .../broadcom/brcm80211/brcmfmac/cfg80211.c         |   4 +-
 .../wireless/broadcom/brcm80211/brcmfmac/common.c  |  43 ++++-
 .../wireless/broadcom/brcm80211/brcmfmac/common.h  |  35 +---
 .../net/wireless/broadcom/brcm80211/brcmfmac/of.c  |   3 +-
 .../net/wireless/broadcom/brcm80211/brcmfmac/of.h  |   5 +-
 .../wireless/broadcom/brcm80211/brcmfmac/sdio.c    |  77 +++++----
 .../wireless/broadcom/brcm80211/brcmfmac/sdio.h    |   2 +-
 include/linux/platform_data/brcmfmac-sdio.h        | 135 ---------------
 include/linux/platform_data/brcmfmac.h             | 185 +++++++++++++++++++++
 10 files changed, 282 insertions(+), 222 deletions(-)
 delete mode 100644 include/linux/platform_data/brcmfmac-sdio.h
 create mode 100644 include/linux/platform_data/brcmfmac.h

(limited to 'include/linux')

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index 25cd71229c95..bb4aece9ad2c 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -103,7 +103,7 @@ static void brcmf_sdiod_dummy_irqhandler(struct sdio_func *func)
 
 int brcmf_sdiod_intr_register(struct brcmf_sdio_dev *sdiodev)
 {
-	struct brcmfmac_sdio_platform_data *pdata;
+	struct brcmfmac_sdio_pd *pdata;
 	int ret = 0;
 	u8 data;
 	u32 addr, gpiocontrol;
@@ -173,7 +173,7 @@ int brcmf_sdiod_intr_register(struct brcmf_sdio_dev *sdiodev)
 
 int brcmf_sdiod_intr_unregister(struct brcmf_sdio_dev *sdiodev)
 {
-	struct brcmfmac_sdio_platform_data *pdata;
+	struct brcmfmac_sdio_pd *pdata;
 
 	brcmf_dbg(SDIO, "Entering\n");
 
@@ -1164,17 +1164,6 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func,
 	dev_set_drvdata(&func->dev, bus_if);
 	dev_set_drvdata(&sdiodev->func[1]->dev, bus_if);
 	sdiodev->dev = &sdiodev->func[1]->dev;
-	sdiodev->pdata = brcmf_get_module_param(sdiodev->dev);
-
-#ifdef CONFIG_PM_SLEEP
-	/* wowl can be supported when KEEP_POWER is true and (WAKE_SDIO_IRQ
-	 * is true or when platform data OOB irq is true).
-	 */
-	if ((sdio_get_host_pm_caps(sdiodev->func[1]) & MMC_PM_KEEP_POWER) &&
-	    ((sdio_get_host_pm_caps(sdiodev->func[1]) & MMC_PM_WAKE_SDIO_IRQ) ||
-	     (sdiodev->pdata && sdiodev->pdata->oob_irq_supported)))
-		bus_if->wowl_supported = true;
-#endif
 
 	brcmf_sdiod_change_state(sdiodev, BRCMF_SDIOD_DOWN);
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 5609a79df1c1..5e3acaca7231 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -6459,8 +6459,8 @@ int brcmf_cfg80211_wait_vif_event(struct brcmf_cfg80211_info *cfg,
 static s32 brcmf_translate_country_code(struct brcmf_pub *drvr, char alpha2[2],
 					struct brcmf_fil_country_le *ccreq)
 {
-	struct cc_translate *country_codes;
-	struct cc_entry *cc;
+	struct brcmfmac_pd_cc *country_codes;
+	struct brcmfmac_pd_cc_entry *cc;
 	s32 found_index;
 	int i;
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
index 020901c2e0ca..4bd3225cdea6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
@@ -80,7 +80,7 @@ module_param_named(ignore_probe_fail, brcmf_ignore_probe_fail, int, 0);
 MODULE_PARM_DESC(ignore_probe_fail, "always succeed probe for debugging");
 #endif
 
-static struct brcmfmac_sdio_platform_data *brcmfmac_pdata;
+static struct brcmfmac_platform_data *brcmfmac_pdata;
 struct brcmf_mp_global_t brcmf_mp_global;
 
 int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
@@ -229,15 +229,46 @@ void __brcmf_dbg(u32 level, const char *func, const char *fmt, ...)
 
 static void brcmf_mp_attach(void)
 {
+	/* If module param firmware path is set then this will always be used,
+	 * if not set then if available use the platform data version. To make
+	 * sure it gets initialized at all, always copy the module param version
+	 */
 	strlcpy(brcmf_mp_global.firmware_path, brcmf_firmware_path,
 		BRCMF_FW_ALTPATH_LEN);
+	if ((brcmfmac_pdata) && (brcmfmac_pdata->fw_alternative_path) &&
+	    (brcmf_mp_global.firmware_path[0] == '\0')) {
+		strlcpy(brcmf_mp_global.firmware_path,
+			brcmfmac_pdata->fw_alternative_path,
+			BRCMF_FW_ALTPATH_LEN);
+	}
 }
 
-struct brcmfmac_sdio_platform_data *brcmf_get_module_param(struct device *dev)
+struct brcmfmac_sdio_pd *brcmf_get_module_param(struct device *dev,
+						enum brcmf_bus_type bus_type,
+						u32 chip, u32 chiprev)
 {
-	if (!brcmfmac_pdata)
-		brcmf_of_probe(dev, &brcmfmac_pdata);
-	return brcmfmac_pdata;
+	struct brcmfmac_sdio_pd *pdata;
+	struct brcmfmac_pd_device *device_pd;
+	int i;
+
+	if (brcmfmac_pdata) {
+		for (i = 0; i < brcmfmac_pdata->device_count; i++) {
+			device_pd = &brcmfmac_pdata->devices[i];
+			if ((device_pd->bus_type == bus_type) &&
+			    (device_pd->id == chip) &&
+			    ((device_pd->rev == chiprev) ||
+			     (device_pd->rev == -1))) {
+				brcmf_dbg(INFO, "Platform data for device found\n");
+				if (device_pd->bus_type == BRCMF_BUSTYPE_SDIO)
+					return &device_pd->bus.sdio;
+				break;
+			}
+		}
+	}
+	pdata = NULL;
+	brcmf_of_probe(dev, &pdata);
+
+	return pdata;
 }
 
 int brcmf_mp_device_attach(struct brcmf_pub *drvr)
@@ -287,7 +318,7 @@ static int brcmf_common_pd_remove(struct platform_device *pdev)
 static struct platform_driver brcmf_pd = {
 	.remove		= brcmf_common_pd_remove,
 	.driver		= {
-		.name	= BRCMFMAC_SDIO_PDATA_NAME,
+		.name	= BRCMFMAC_PDATA_NAME,
 	}
 };
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h
index 54a26ede808d..a64e40e8bfda 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h
@@ -16,7 +16,7 @@
 #define BRCMFMAC_COMMON_H
 
 #include <linux/platform_device.h>
-#include <linux/platform_data/brcmfmac-sdio.h>
+#include <linux/platform_data/brcmfmac.h>
 #include "fwil_types.h"
 
 extern const u8 ALLFFMAC[ETH_ALEN];
@@ -42,33 +42,6 @@ struct brcmf_mp_global_t {
 
 extern struct brcmf_mp_global_t brcmf_mp_global;
 
-/**
- * struct cc_entry - Struct for translating user space country code (iso3166) to
- *		     firmware country code and revision.
- *
- * @iso3166: iso3166 alpha 2 country code string.
- * @cc: firmware country code string.
- * @rev: firmware country code revision.
- */
-struct cc_entry {
-	char	iso3166[BRCMF_COUNTRY_BUF_SZ];
-	char	cc[BRCMF_COUNTRY_BUF_SZ];
-	s32	rev;
-};
-
-/**
- * struct cc_translate - Struct for translating country codes as set by user
- *			 space to a country code and rev which can be used by
- *			 firmware.
- *
- * @table_size: number of entries in table (> 0)
- * @table: dynamic array of 1 or more elements with translation information.
- */
-struct cc_translate {
-	int	table_size;
-	struct cc_entry table[0];
-};
-
 /**
  * struct brcmf_mp_device - Device module paramaters.
  *
@@ -88,10 +61,12 @@ struct brcmf_mp_device {
 	int	fcmode;
 	bool	roamoff;
 	bool	ignore_probe_fail;
-	struct cc_translate *country_codes;
+	struct brcmfmac_pd_cc *country_codes;
 };
 
-struct brcmfmac_sdio_platform_data *brcmf_get_module_param(struct device *dev);
+struct brcmfmac_sdio_pd *brcmf_get_module_param(struct device *dev,
+						enum brcmf_bus_type bus_type,
+						u32 chip, u32 chiprev);
 int brcmf_mp_device_attach(struct brcmf_pub *drvr);
 void brcmf_mp_device_detach(struct brcmf_pub *drvr);
 #ifdef DEBUG
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
index 8201d937b826..ece0b65dd039 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
@@ -23,8 +23,7 @@
 #include "common.h"
 #include "of.h"
 
-void
-brcmf_of_probe(struct device *dev, struct brcmfmac_sdio_platform_data **sdio)
+void brcmf_of_probe(struct device *dev, struct brcmfmac_sdio_pd **sdio)
 {
 	struct device_node *np = dev->of_node;
 	int irq;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.h
index 84b474484cea..1ba951f9b542 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.h
@@ -15,10 +15,9 @@
  */
 #ifdef CONFIG_OF
 void
-brcmf_of_probe(struct device *dev, struct brcmfmac_sdio_platform_data **sdio);
+brcmf_of_probe(struct device *dev, struct brcmfmac_sdio_pd **sdio);
 #else
-static void brcmf_of_probe(struct device *dev,
-			   struct brcmfmac_sdio_platform_data **sdio)
+static void brcmf_of_probe(struct device *dev, struct brcmfmac_sdio_pd **sdio)
 {
 }
 #endif /* CONFIG_OF */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index c790fa89db05..6e367041f691 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -33,8 +33,6 @@
 #include <linux/bcma/bcma.h>
 #include <linux/debugfs.h>
 #include <linux/vmalloc.h>
-#include <linux/platform_data/brcmfmac-sdio.h>
-#include <linux/moduleparam.h>
 #include <asm/unaligned.h>
 #include <defs.h>
 #include <brcmu_wifi.h>
@@ -44,6 +42,8 @@
 #include "sdio.h"
 #include "chip.h"
 #include "firmware.h"
+#include "core.h"
+#include "common.h"
 
 #define DCMD_RESP_TIMEOUT	msecs_to_jiffies(2500)
 #define CTL_DONE_TIMEOUT	msecs_to_jiffies(2500)
@@ -3775,26 +3775,28 @@ static const struct brcmf_buscore_ops brcmf_sdio_buscore_ops = {
 static bool
 brcmf_sdio_probe_attach(struct brcmf_sdio *bus)
 {
+	struct brcmf_sdio_dev *sdiodev;
 	u8 clkctl = 0;
 	int err = 0;
 	int reg_addr;
 	u32 reg_val;
 	u32 drivestrength;
 
-	sdio_claim_host(bus->sdiodev->func[1]);
+	sdiodev = bus->sdiodev;
+	sdio_claim_host(sdiodev->func[1]);
 
 	pr_debug("F1 signature read @0x18000000=0x%4x\n",
-		 brcmf_sdiod_regrl(bus->sdiodev, SI_ENUM_BASE, NULL));
+		 brcmf_sdiod_regrl(sdiodev, SI_ENUM_BASE, NULL));
 
 	/*
 	 * Force PLL off until brcmf_chip_attach()
 	 * programs PLL control regs
 	 */
 
-	brcmf_sdiod_regwb(bus->sdiodev, SBSDIO_FUNC1_CHIPCLKCSR,
+	brcmf_sdiod_regwb(sdiodev, SBSDIO_FUNC1_CHIPCLKCSR,
 			  BRCMF_INIT_CLKCTL1, &err);
 	if (!err)
-		clkctl = brcmf_sdiod_regrb(bus->sdiodev,
+		clkctl = brcmf_sdiod_regrb(sdiodev,
 					   SBSDIO_FUNC1_CHIPCLKCSR, &err);
 
 	if (err || ((clkctl & ~SBSDIO_AVBITS) != BRCMF_INIT_CLKCTL1)) {
@@ -3803,50 +3805,77 @@ brcmf_sdio_probe_attach(struct brcmf_sdio *bus)
 		goto fail;
 	}
 
-	bus->ci = brcmf_chip_attach(bus->sdiodev, &brcmf_sdio_buscore_ops);
+	bus->ci = brcmf_chip_attach(sdiodev, &brcmf_sdio_buscore_ops);
 	if (IS_ERR(bus->ci)) {
 		brcmf_err("brcmf_chip_attach failed!\n");
 		bus->ci = NULL;
 		goto fail;
 	}
+	sdiodev->pdata = brcmf_get_module_param(sdiodev->dev,
+						   BRCMF_BUSTYPE_SDIO,
+						   bus->ci->chip,
+						   bus->ci->chiprev);
+	/* platform specific configuration:
+	 *   alignments must be at least 4 bytes for ADMA
+	 */
+	bus->head_align = ALIGNMENT;
+	bus->sgentry_align = ALIGNMENT;
+	if (sdiodev->pdata) {
+		if (sdiodev->pdata->sd_head_align > ALIGNMENT)
+			bus->head_align = sdiodev->pdata->sd_head_align;
+		if (sdiodev->pdata->sd_sgentry_align > ALIGNMENT)
+			bus->sgentry_align = sdiodev->pdata->sd_sgentry_align;
+	}
+	/* allocate scatter-gather table. sg support
+	 * will be disabled upon allocation failure.
+	 */
+	brcmf_sdiod_sgtable_alloc(sdiodev);
+
+#ifdef CONFIG_PM_SLEEP
+	/* wowl can be supported when KEEP_POWER is true and (WAKE_SDIO_IRQ
+	 * is true or when platform data OOB irq is true).
+	 */
+	if ((sdio_get_host_pm_caps(sdiodev->func[1]) & MMC_PM_KEEP_POWER) &&
+	    ((sdio_get_host_pm_caps(sdiodev->func[1]) & MMC_PM_WAKE_SDIO_IRQ) ||
+	     (sdiodev->pdata && sdiodev->pdata->oob_irq_supported)))
+		sdiodev->bus_if->wowl_supported = true;
+#endif
 
 	if (brcmf_sdio_kso_init(bus)) {
 		brcmf_err("error enabling KSO\n");
 		goto fail;
 	}
 
-	if ((bus->sdiodev->pdata) && (bus->sdiodev->pdata->drive_strength))
-		drivestrength = bus->sdiodev->pdata->drive_strength;
+	if ((sdiodev->pdata) && (sdiodev->pdata->drive_strength))
+		drivestrength = sdiodev->pdata->drive_strength;
 	else
 		drivestrength = DEFAULT_SDIO_DRIVE_STRENGTH;
-	brcmf_sdio_drivestrengthinit(bus->sdiodev, bus->ci, drivestrength);
+	brcmf_sdio_drivestrengthinit(sdiodev, bus->ci, drivestrength);
 
 	/* Set card control so an SDIO card reset does a WLAN backplane reset */
-	reg_val = brcmf_sdiod_regrb(bus->sdiodev,
-				    SDIO_CCCR_BRCM_CARDCTRL, &err);
+	reg_val = brcmf_sdiod_regrb(sdiodev, SDIO_CCCR_BRCM_CARDCTRL, &err);
 	if (err)
 		goto fail;
 
 	reg_val |= SDIO_CCCR_BRCM_CARDCTRL_WLANRESET;
 
-	brcmf_sdiod_regwb(bus->sdiodev,
-			  SDIO_CCCR_BRCM_CARDCTRL, reg_val, &err);
+	brcmf_sdiod_regwb(sdiodev, SDIO_CCCR_BRCM_CARDCTRL, reg_val, &err);
 	if (err)
 		goto fail;
 
 	/* set PMUControl so a backplane reset does PMU state reload */
 	reg_addr = CORE_CC_REG(brcmf_chip_get_pmu(bus->ci)->base, pmucontrol);
-	reg_val = brcmf_sdiod_regrl(bus->sdiodev, reg_addr, &err);
+	reg_val = brcmf_sdiod_regrl(sdiodev, reg_addr, &err);
 	if (err)
 		goto fail;
 
 	reg_val |= (BCMA_CC_PMU_CTL_RES_RELOAD << BCMA_CC_PMU_CTL_RES_SHIFT);
 
-	brcmf_sdiod_regwl(bus->sdiodev, reg_addr, reg_val, &err);
+	brcmf_sdiod_regwl(sdiodev, reg_addr, reg_val, &err);
 	if (err)
 		goto fail;
 
-	sdio_release_host(bus->sdiodev->func[1]);
+	sdio_release_host(sdiodev->func[1]);
 
 	brcmu_pktq_init(&bus->txq, (PRIOMASK + 1), TXQLEN);
 
@@ -3867,7 +3896,7 @@ brcmf_sdio_probe_attach(struct brcmf_sdio *bus)
 	return true;
 
 fail:
-	sdio_release_host(bus->sdiodev->func[1]);
+	sdio_release_host(sdiodev->func[1]);
 	return false;
 }
 
@@ -4045,18 +4074,6 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
 	bus->txminmax = BRCMF_TXMINMAX;
 	bus->tx_seq = SDPCM_SEQ_WRAP - 1;
 
-	/* platform specific configuration:
-	 *   alignments must be at least 4 bytes for ADMA
-	 */
-	bus->head_align = ALIGNMENT;
-	bus->sgentry_align = ALIGNMENT;
-	if (sdiodev->pdata) {
-		if (sdiodev->pdata->sd_head_align > ALIGNMENT)
-			bus->head_align = sdiodev->pdata->sd_head_align;
-		if (sdiodev->pdata->sd_sgentry_align > ALIGNMENT)
-			bus->sgentry_align = sdiodev->pdata->sd_sgentry_align;
-	}
-
 	/* single-threaded workqueue */
 	wq = alloc_ordered_workqueue("brcmf_wq/%s", WQ_MEM_RECLAIM,
 				     dev_name(&sdiodev->func[1]->dev));
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
index 23f223150cef..50df9cb21af2 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
@@ -184,7 +184,7 @@ struct brcmf_sdio_dev {
 	struct brcmf_sdio *bus;
 	struct device *dev;
 	struct brcmf_bus *bus_if;
-	struct brcmfmac_sdio_platform_data *pdata;
+	struct brcmfmac_sdio_pd *pdata;
 	bool oob_irq_requested;
 	bool irq_en;			/* irq enable flags */
 	spinlock_t irq_en_lock;
diff --git a/include/linux/platform_data/brcmfmac-sdio.h b/include/linux/platform_data/brcmfmac-sdio.h
deleted file mode 100644
index e75dcbf2b230..000000000000
--- a/include/linux/platform_data/brcmfmac-sdio.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2013 Broadcom Corporation
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifndef _LINUX_BRCMFMAC_PLATFORM_H
-#define _LINUX_BRCMFMAC_PLATFORM_H
-
-/*
- * Platform specific driver functions and data. Through the platform specific
- * device data functions can be provided to help the brcmfmac driver to
- * operate with the device in combination with the used platform.
- *
- * Use the platform data in the following (similar) way:
- *
- *
-#include <brcmfmac_platform.h>
-
-
-static void brcmfmac_power_on(void)
-{
-}
-
-static void brcmfmac_power_off(void)
-{
-}
-
-static void brcmfmac_reset(void)
-{
-}
-
-static struct brcmfmac_sdio_platform_data brcmfmac_sdio_pdata = {
-	.power_on		= brcmfmac_power_on,
-	.power_off		= brcmfmac_power_off,
-	.reset			= brcmfmac_reset
-};
-
-static struct platform_device brcmfmac_device = {
-	.name			= BRCMFMAC_SDIO_PDATA_NAME,
-	.id			= PLATFORM_DEVID_NONE,
-	.dev.platform_data	= &brcmfmac_sdio_pdata
-};
-
-void __init brcmfmac_init_pdata(void)
-{
-	brcmfmac_sdio_pdata.oob_irq_supported = true;
-	brcmfmac_sdio_pdata.oob_irq_nr = gpio_to_irq(GPIO_BRCMF_SDIO_OOB);
-	brcmfmac_sdio_pdata.oob_irq_flags = IORESOURCE_IRQ |
-					    IORESOURCE_IRQ_HIGHLEVEL;
-	platform_device_register(&brcmfmac_device);
-}
- *
- *
- * Note: the brcmfmac can be loaded as module or be statically built-in into
- * the kernel. If built-in then do note that it uses module_init (and
- * module_exit) routines which equal device_initcall. So if you intend to
- * create a module with the platform specific data for the brcmfmac and have
- * it built-in to the kernel then use a higher initcall then device_initcall
- * (see init.h). If this is not done then brcmfmac will load without problems
- * but will not pickup the platform data.
- *
- * When the driver does not "detect" platform driver data then it will continue
- * without reporting anything and just assume there is no data needed. Which is
- * probably true for most platforms.
- *
- * Explanation of the platform_data fields:
- *
- * drive_strength: is the preferred drive_strength to be used for the SDIO
- * pins. If 0 then a default value will be used. This is the target drive
- * strength, the exact drive strength which will be used depends on the
- * capabilities of the device.
- *
- * oob_irq_supported: does the board have support for OOB interrupts. SDIO
- * in-band interrupts are relatively slow and for having less overhead on
- * interrupt processing an out of band interrupt can be used. If the HW
- * supports this then enable this by setting this field to true and configure
- * the oob related fields.
- *
- * oob_irq_nr, oob_irq_flags: the OOB interrupt information. The values are
- * used for registering the irq using request_irq function.
- *
- * broken_sg_support: flag for broken sg list support of SDIO host controller.
- * Set this to true if the SDIO host controller has higher align requirement
- * than 32 bytes for each scatterlist item.
- *
- * sd_head_align: alignment requirement for start of data buffer
- *
- * sd_sgentry_align: length alignment requirement for each sg entry
- *
- * power_on: This function is called by the brcmfmac when the module gets
- * loaded. This can be particularly useful for low power devices. The platform
- * spcific routine may for example decide to power up the complete device.
- * If there is no use-case for this function then provide NULL.
- *
- * power_off: This function is called by the brcmfmac when the module gets
- * unloaded. At this point the device can be powered down or otherwise be reset.
- * So if an actual power_off is not supported but reset is then reset the device
- * when this function gets called. This can be particularly useful for low power
- * devices. If there is no use-case for this function (either power-down or
- * reset) then provide NULL.
- *
- * reset: This function can get called if the device communication broke down.
- * This functionality is particularly useful in case of SDIO type devices. It is
- * possible to reset a dongle via sdio data interface, but it requires that
- * this is fully functional. This function is chip/module specific and this
- * function should return only after the complete reset has completed.
- */
-
-#define BRCMFMAC_SDIO_PDATA_NAME	"brcmfmac_sdio"
-
-struct brcmfmac_sdio_platform_data {
-	unsigned int drive_strength;
-	bool oob_irq_supported;
-	unsigned int oob_irq_nr;
-	unsigned long oob_irq_flags;
-	bool broken_sg_support;
-	unsigned short sd_head_align;
-	unsigned short sd_sgentry_align;
-	void (*power_on)(void);
-	void (*power_off)(void);
-	void (*reset)(void);
-};
-
-#endif /* _LINUX_BRCMFMAC_PLATFORM_H */
diff --git a/include/linux/platform_data/brcmfmac.h b/include/linux/platform_data/brcmfmac.h
new file mode 100644
index 000000000000..1d30bf278231
--- /dev/null
+++ b/include/linux/platform_data/brcmfmac.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 201 Broadcom Corporation
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _LINUX_BRCMFMAC_PLATFORM_H
+#define _LINUX_BRCMFMAC_PLATFORM_H
+
+
+#define BRCMFMAC_PDATA_NAME		"brcmfmac"
+
+#define BRCMFMAC_COUNTRY_BUF_SZ		4
+
+
+/*
+ * Platform specific driver functions and data. Through the platform specific
+ * device data functions and data can be provided to help the brcmfmac driver to
+ * operate with the device in combination with the used platform.
+ */
+
+
+/**
+ * Note: the brcmfmac can be loaded as module or be statically built-in into
+ * the kernel. If built-in then do note that it uses module_init (and
+ * module_exit) routines which equal device_initcall. So if you intend to
+ * create a module with the platform specific data for the brcmfmac and have
+ * it built-in to the kernel then use a higher initcall then device_initcall
+ * (see init.h). If this is not done then brcmfmac will load without problems
+ * but will not pickup the platform data.
+ *
+ * When the driver does not "detect" platform driver data then it will continue
+ * without reporting anything and just assume there is no data needed. Which is
+ * probably true for most platforms.
+ */
+
+/**
+ * enum brcmf_bus_type - Bus type identifier. Currently SDIO, USB and PCIE are
+ *			 supported.
+ */
+enum brcmf_bus_type {
+	BRCMF_BUSTYPE_SDIO,
+	BRCMF_BUSTYPE_USB,
+	BRCMF_BUSTYPE_PCIE
+};
+
+
+/**
+ * struct brcmfmac_sdio_pd - SDIO Device specific platform data.
+ *
+ * @txglomsz:		SDIO txglom size. Use 0 if default of driver is to be
+ *			used.
+ * @drive_strength:	is the preferred drive_strength to be used for the SDIO
+ *			pins. If 0 then a default value will be used. This is
+ *			the target drive strength, the exact drive strength
+ *			which will be used depends on the capabilities of the
+ *			device.
+ * @oob_irq_supported:	does the board have support for OOB interrupts. SDIO
+ *			in-band interrupts are relatively slow and for having
+ *			less overhead on interrupt processing an out of band
+ *			interrupt can be used. If the HW supports this then
+ *			enable this by setting this field to true and configure
+ *			the oob related fields.
+ * @oob_irq_nr,
+ * @oob_irq_flags:	the OOB interrupt information. The values are used for
+ *			registering the irq using request_irq function.
+ * @broken_sg_support:	flag for broken sg list support of SDIO host controller.
+ *			Set this to true if the SDIO host controller has higher
+ *			align requirement than 32 bytes for each scatterlist
+ *			item.
+ * @sd_head_align:	alignment requirement for start of data buffer.
+ * @sd_sgentry_align:	length alignment requirement for each sg entry.
+ * @reset:		This function can get called if the device communication
+ *			broke down. This functionality is particularly useful in
+ *			case of SDIO type devices. It is possible to reset a
+ *			dongle via sdio data interface, but it requires that
+ *			this is fully functional. This function is chip/module
+ *			specific and this function should return only after the
+ *			complete reset has completed.
+ */
+struct brcmfmac_sdio_pd {
+	int		txglomsz;
+	unsigned int	drive_strength;
+	bool		oob_irq_supported;
+	unsigned int	oob_irq_nr;
+	unsigned long	oob_irq_flags;
+	bool		broken_sg_support;
+	unsigned short	sd_head_align;
+	unsigned short	sd_sgentry_align;
+	void		(*reset)(void);
+};
+
+/**
+ * struct brcmfmac_pd_cc_entry - Struct for translating user space country code
+ *				 (iso3166) to firmware country code and
+ *				 revision.
+ *
+ * @iso3166:	iso3166 alpha 2 country code string.
+ * @cc:		firmware country code string.
+ * @rev:	firmware country code revision.
+ */
+struct brcmfmac_pd_cc_entry {
+	char	iso3166[BRCMFMAC_COUNTRY_BUF_SZ];
+	char	cc[BRCMFMAC_COUNTRY_BUF_SZ];
+	s32	rev;
+};
+
+/**
+ * struct brcmfmac_pd_cc - Struct for translating country codes as set by user
+ *			   space to a country code and rev which can be used by
+ *			   firmware.
+ *
+ * @table_size:	number of entries in table (> 0)
+ * @table:	array of 1 or more elements with translation information.
+ */
+struct brcmfmac_pd_cc {
+	int				table_size;
+	struct brcmfmac_pd_cc_entry	table[0];
+};
+
+/**
+ * struct brcmfmac_pd_device - Device specific platform data. (id/rev/bus_type)
+ *			       is the unique identifier of the device.
+ *
+ * @id:			ID of the device for which this data is. In case of SDIO
+ *			or PCIE this is the chipid as identified by chip.c In
+ *			case of USB this is the chipid as identified by the
+ *			device query.
+ * @rev:		chip revision, see id.
+ * @bus_type:		The type of bus. Some chipid/rev exist for different bus
+ *			types. Each bus type has its own set of settings.
+ * @feature_disable:	Bitmask of features to disable (override), See feature.c
+ *			in brcmfmac for details.
+ * @country_codes:	If available, pointer to struct for translating country
+ *			codes.
+ * @bus:		Bus specific (union) device settings. Currently only
+ *			SDIO.
+ */
+struct brcmfmac_pd_device {
+	unsigned int		id;
+	unsigned int		rev;
+	enum brcmf_bus_type	bus_type;
+	unsigned int		feature_disable;
+	struct brcmfmac_pd_cc	*country_codes;
+	union {
+		struct brcmfmac_sdio_pd sdio;
+	} bus;
+};
+
+/**
+ * struct brcmfmac_platform_data - BRCMFMAC specific platform data.
+ *
+ * @power_on:	This function is called by the brcmfmac driver when the module
+ *		gets loaded. This can be particularly useful for low power
+ *		devices. The platform spcific routine may for example decide to
+ *		power up the complete device. If there is no use-case for this
+ *		function then provide NULL.
+ * @power_off:	This function is called by the brcmfmac when the module gets
+ *		unloaded. At this point the devices can be powered down or
+ *		otherwise be reset. So if an actual power_off is not supported
+ *		but reset is supported by the devices then reset the devices
+ *		when this function gets called. This can be particularly useful
+ *		for low power devices. If there is no use-case for this
+ *		function then provide NULL.
+ */
+struct brcmfmac_platform_data {
+	void	(*power_on)(void);
+	void	(*power_off)(void);
+	char	*fw_alternative_path;
+	int	device_count;
+	struct brcmfmac_pd_device devices[0];
+};
+
+
+#endif /* _LINUX_BRCMFMAC_PLATFORM_H */
-- 
cgit v1.2.3


From 2e62f9b2a41e4ade1a0bb3c1bbda4defe4c67243 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Fri, 12 Feb 2016 10:15:43 +0100
Subject: bcma: drop unneeded fields from bcma_pflash struct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most of info stored in this struct wasn't really used anywhere as we put
all that data in platform data & resource as well.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/driver_mips.c                  | 11 ++++-------
 include/linux/bcma/bcma_driver_chipcommon.h |  3 ---
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index a40a203314db..20c134c016dc 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -288,18 +288,15 @@ static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore)
 	case BCMA_CC_FLASHT_PARA:
 		bcma_debug(bus, "Found parallel flash\n");
 		pflash->present = true;
-		pflash->window = BCMA_SOC_FLASH2;
-		pflash->window_size = BCMA_SOC_FLASH2_SZ;
 
 		if ((bcma_read32(cc->core, BCMA_CC_FLASH_CFG) &
 		     BCMA_CC_FLASH_CFG_DS) == 0)
-			pflash->buswidth = 1;
+			bcma_pflash_data.width = 1;
 		else
-			pflash->buswidth = 2;
+			bcma_pflash_data.width = 2;
 
-		bcma_pflash_data.width = pflash->buswidth;
-		bcma_pflash_resource.start = pflash->window;
-		bcma_pflash_resource.end = pflash->window + pflash->window_size;
+		bcma_pflash_resource.start = BCMA_SOC_FLASH2;
+		bcma_pflash_resource.end = BCMA_SOC_FLASH2 + BCMA_SOC_FLASH2_SZ;
 
 		break;
 	default:
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 700d0c6f7480..16eaaad9dda5 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -579,9 +579,6 @@ struct bcma_chipcommon_pmu {
 #ifdef CONFIG_BCMA_DRIVER_MIPS
 struct bcma_pflash {
 	bool present;
-	u8 buswidth;
-	u32 window;
-	u32 window_size;
 };
 
 #ifdef CONFIG_BCMA_SFLASH
-- 
cgit v1.2.3


From d6a3b51ada68c2bd3e184f4729ce626a1721cf74 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Fri, 12 Feb 2016 10:15:44 +0100
Subject: bcma: move parallel flash support to separated file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This follows the way of handling other flashes and cleans code a bit. As
next task we will want to move flash code to ChipCommon driver as:
1) Flash controllers are accesible using ChipCommon registers
2) This code isn't MIPS specific
This change prepares bcma for that.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/Kconfig                        |  5 +++
 drivers/bcma/Makefile                       |  1 +
 drivers/bcma/bcma_private.h                 | 18 +++++++++--
 drivers/bcma/driver_chipcommon_pflash.c     | 49 +++++++++++++++++++++++++++++
 drivers/bcma/driver_mips.c                  | 35 +--------------------
 drivers/bcma/main.c                         |  2 +-
 include/linux/bcma/bcma_driver_chipcommon.h |  8 +++--
 7 files changed, 78 insertions(+), 40 deletions(-)
 create mode 100644 drivers/bcma/driver_chipcommon_pflash.c

(limited to 'include/linux')

diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
index 023d448ed3fa..efdc2ae8441a 100644
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -70,6 +70,11 @@ config BCMA_DRIVER_MIPS
 
 	  If unsure, say N
 
+config BCMA_PFLASH
+	bool
+	depends on BCMA_DRIVER_MIPS
+	default y
+
 config BCMA_SFLASH
 	bool
 	depends on BCMA_DRIVER_MIPS
diff --git a/drivers/bcma/Makefile b/drivers/bcma/Makefile
index f32af9b76bcd..087948a1d20d 100644
--- a/drivers/bcma/Makefile
+++ b/drivers/bcma/Makefile
@@ -1,6 +1,7 @@
 bcma-y					+= main.o scan.o core.o sprom.o
 bcma-y					+= driver_chipcommon.o driver_chipcommon_pmu.o
 bcma-y					+= driver_chipcommon_b.o
+bcma-$(CONFIG_BCMA_PFLASH)		+= driver_chipcommon_pflash.o
 bcma-$(CONFIG_BCMA_SFLASH)		+= driver_chipcommon_sflash.o
 bcma-$(CONFIG_BCMA_NFLASH)		+= driver_chipcommon_nflash.o
 bcma-$(CONFIG_BCMA_DRIVER_PCI)		+= driver_pci.o
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 7e4ddfb076d3..eda09090cb52 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -47,9 +47,6 @@ int bcma_sprom_get(struct bcma_bus *bus);
 void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc);
 void bcma_core_chipcommon_init(struct bcma_drv_cc *cc);
 void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable);
-#ifdef CONFIG_BCMA_DRIVER_MIPS
-extern struct platform_device bcma_pflash_dev;
-#endif /* CONFIG_BCMA_DRIVER_MIPS */
 
 /* driver_chipcommon_b.c */
 int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb);
@@ -61,6 +58,21 @@ void bcma_pmu_init(struct bcma_drv_cc *cc);
 u32 bcma_pmu_get_alp_clock(struct bcma_drv_cc *cc);
 u32 bcma_pmu_get_cpu_clock(struct bcma_drv_cc *cc);
 
+/**************************************************
+ * driver_chipcommon_sflash.c
+ **************************************************/
+
+#ifdef CONFIG_BCMA_PFLASH
+extern struct platform_device bcma_pflash_dev;
+int bcma_pflash_init(struct bcma_drv_cc *cc);
+#else
+static inline int bcma_pflash_init(struct bcma_drv_cc *cc)
+{
+	bcma_err(cc->core->bus, "Parallel flash not supported\n");
+	return 0;
+}
+#endif /* CONFIG_BCMA_PFLASH */
+
 #ifdef CONFIG_BCMA_SFLASH
 /* driver_chipcommon_sflash.c */
 int bcma_sflash_init(struct bcma_drv_cc *cc);
diff --git a/drivers/bcma/driver_chipcommon_pflash.c b/drivers/bcma/driver_chipcommon_pflash.c
new file mode 100644
index 000000000000..3b497c9ee0d4
--- /dev/null
+++ b/drivers/bcma/driver_chipcommon_pflash.c
@@ -0,0 +1,49 @@
+/*
+ * Broadcom specific AMBA
+ * ChipCommon parallel flash
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+
+#include <linux/bcma/bcma.h>
+#include <linux/mtd/physmap.h>
+#include <linux/platform_device.h>
+
+static const char * const part_probes[] = { "bcm47xxpart", NULL };
+
+static struct physmap_flash_data bcma_pflash_data = {
+	.part_probe_types	= part_probes,
+};
+
+static struct resource bcma_pflash_resource = {
+	.name	= "bcma_pflash",
+	.flags  = IORESOURCE_MEM,
+};
+
+struct platform_device bcma_pflash_dev = {
+	.name		= "physmap-flash",
+	.dev		= {
+		.platform_data  = &bcma_pflash_data,
+	},
+	.resource	= &bcma_pflash_resource,
+	.num_resources	= 1,
+};
+
+int bcma_pflash_init(struct bcma_drv_cc *cc)
+{
+	struct bcma_pflash *pflash = &cc->pflash;
+
+	pflash->present = true;
+
+	if (!(bcma_read32(cc->core, BCMA_CC_FLASH_CFG) & BCMA_CC_FLASH_CFG_DS))
+		bcma_pflash_data.width = 1;
+	else
+		bcma_pflash_data.width = 2;
+
+	bcma_pflash_resource.start = BCMA_SOC_FLASH2;
+	bcma_pflash_resource.end = BCMA_SOC_FLASH2 + BCMA_SOC_FLASH2_SZ;
+
+	return 0;
+}
diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index 20c134c016dc..967b0e85e2cc 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -14,8 +14,6 @@
 
 #include <linux/bcma/bcma.h>
 
-#include <linux/mtd/physmap.h>
-#include <linux/platform_device.h>
 #include <linux/serial.h>
 #include <linux/serial_core.h>
 #include <linux/serial_reg.h>
@@ -32,26 +30,6 @@ enum bcma_boot_dev {
 	BCMA_BOOT_DEV_NAND,
 };
 
-static const char * const part_probes[] = { "bcm47xxpart", NULL };
-
-static struct physmap_flash_data bcma_pflash_data = {
-	.part_probe_types	= part_probes,
-};
-
-static struct resource bcma_pflash_resource = {
-	.name	= "bcma_pflash",
-	.flags  = IORESOURCE_MEM,
-};
-
-struct platform_device bcma_pflash_dev = {
-	.name		= "physmap-flash",
-	.dev		= {
-		.platform_data  = &bcma_pflash_data,
-	},
-	.resource	= &bcma_pflash_resource,
-	.num_resources	= 1,
-};
-
 /* The 47162a0 hangs when reading MIPS DMP registers registers */
 static inline bool bcma_core_mips_bcm47162a0_quirk(struct bcma_device *dev)
 {
@@ -276,7 +254,6 @@ static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore)
 {
 	struct bcma_bus *bus = mcore->core->bus;
 	struct bcma_drv_cc *cc = &bus->drv_cc;
-	struct bcma_pflash *pflash = &cc->pflash;
 	enum bcma_boot_dev boot_dev;
 
 	switch (cc->capabilities & BCMA_CC_CAP_FLASHT) {
@@ -287,17 +264,7 @@ static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore)
 		break;
 	case BCMA_CC_FLASHT_PARA:
 		bcma_debug(bus, "Found parallel flash\n");
-		pflash->present = true;
-
-		if ((bcma_read32(cc->core, BCMA_CC_FLASH_CFG) &
-		     BCMA_CC_FLASH_CFG_DS) == 0)
-			bcma_pflash_data.width = 1;
-		else
-			bcma_pflash_data.width = 2;
-
-		bcma_pflash_resource.start = BCMA_SOC_FLASH2;
-		bcma_pflash_resource.end = BCMA_SOC_FLASH2 + BCMA_SOC_FLASH2_SZ;
-
+		bcma_pflash_init(cc);
 		break;
 	default:
 		bcma_err(bus, "Flash type not supported\n");
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index c466f752b067..786be8fed39e 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -350,7 +350,7 @@ static int bcma_register_devices(struct bcma_bus *bus)
 		bcma_register_core(bus, core);
 	}
 
-#ifdef CONFIG_BCMA_DRIVER_MIPS
+#ifdef CONFIG_BCMA_PFLASH
 	if (bus->drv_cc.pflash.present) {
 		err = platform_device_register(&bcma_pflash_dev);
 		if (err)
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 16eaaad9dda5..846513c73606 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -576,10 +576,11 @@ struct bcma_chipcommon_pmu {
 	u32 crystalfreq;	/* The active crystal frequency (in kHz) */
 };
 
-#ifdef CONFIG_BCMA_DRIVER_MIPS
+#ifdef CONFIG_BCMA_PFLASH
 struct bcma_pflash {
 	bool present;
 };
+#endif
 
 #ifdef CONFIG_BCMA_SFLASH
 struct mtd_info;
@@ -603,6 +604,7 @@ struct bcma_nflash {
 };
 #endif
 
+#ifdef CONFIG_BCMA_DRIVER_MIPS
 struct bcma_serial_port {
 	void *regs;
 	unsigned long clockspeed;
@@ -622,8 +624,9 @@ struct bcma_drv_cc {
 	/* Fast Powerup Delay constant */
 	u16 fast_pwrup_delay;
 	struct bcma_chipcommon_pmu pmu;
-#ifdef CONFIG_BCMA_DRIVER_MIPS
+#ifdef CONFIG_BCMA_PFLASH
 	struct bcma_pflash pflash;
+#endif
 #ifdef CONFIG_BCMA_SFLASH
 	struct bcma_sflash sflash;
 #endif
@@ -631,6 +634,7 @@ struct bcma_drv_cc {
 	struct bcma_nflash nflash;
 #endif
 
+#ifdef CONFIG_BCMA_DRIVER_MIPS
 	int nr_serial_ports;
 	struct bcma_serial_port serial_ports[4];
 #endif /* CONFIG_BCMA_DRIVER_MIPS */
-- 
cgit v1.2.3


From bc4b024a8b8bd7dceb2697299aad2bda57d065e0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 7 Mar 2016 10:40:02 -0600
Subject: PCI: Move pci_dma_* helpers to common code

For a long time all architectures implement the pci_dma_* functions using
the generic DMA API, and they all use the same header to do so.

Move this header, pci-dma-compat.h, to include/linux and include it from
the generic pci.h instead of having each arch duplicate this include.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 arch/alpha/include/asm/pci.h         |   7 ---
 arch/arm/include/asm/pci.h           |   1 -
 arch/arm64/include/asm/pci.h         |   1 -
 arch/avr32/include/asm/pci.h         |   2 -
 arch/blackfin/include/asm/pci.h      |   1 -
 arch/cris/include/asm/pci.h          |   3 -
 arch/frv/include/asm/pci.h           |   1 -
 arch/ia64/include/asm/pci.h          |   2 -
 arch/m68k/include/asm/pci.h          |   1 -
 arch/microblaze/include/asm/pci.h    |   2 -
 arch/mips/include/asm/pci.h          |   3 -
 arch/mn10300/include/asm/pci.h       |   3 -
 arch/parisc/include/asm/pci.h        |   3 -
 arch/powerpc/include/asm/pci.h       |   2 -
 arch/s390/include/asm/pci.h          |   1 -
 arch/sh/include/asm/pci.h            |   3 -
 arch/sparc/include/asm/pci.h         |   3 -
 arch/tile/include/asm/pci.h          |   3 -
 arch/unicore32/include/asm/pci.h     |   1 -
 arch/x86/include/asm/pci.h           |   3 -
 arch/xtensa/include/asm/pci.h        |   3 -
 include/asm-generic/pci-dma-compat.h | 118 -----------------------------------
 include/linux/pci-dma-compat.h       | 118 +++++++++++++++++++++++++++++++++++
 include/linux/pci.h                  |   4 ++
 24 files changed, 122 insertions(+), 167 deletions(-)
 delete mode 100644 include/asm-generic/pci-dma-compat.h
 create mode 100644 include/linux/pci-dma-compat.h

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h
index 75d8865d763d..a06c24b3a2e1 100644
--- a/arch/alpha/include/asm/pci.h
+++ b/arch/alpha/include/asm/pci.h
@@ -65,13 +65,6 @@ extern void pcibios_set_master(struct pci_dev *dev);
    decisions.  */
 #define PCI_DMA_BUS_IS_PHYS  0
 
-#ifdef CONFIG_PCI
-
-/* implement the pci_ DMA API in terms of the generic device dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
-#endif
-
 /* TODO: integrate with include/asm-generic/pci.h ? */
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 {
diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h
index d7de19a77d51..057d381f4e57 100644
--- a/arch/arm/include/asm/pci.h
+++ b/arch/arm/include/asm/pci.h
@@ -2,7 +2,6 @@
 #define ASMARM_PCI_H
 
 #ifdef __KERNEL__
-#include <asm-generic/pci-dma-compat.h>
 #include <asm/mach/pci.h> /* for pci_sys_data */
 
 extern unsigned long pcibios_min_io;
diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
index f75b04e8d732..b9a7ba9ca44c 100644
--- a/arch/arm64/include/asm/pci.h
+++ b/arch/arm64/include/asm/pci.h
@@ -7,7 +7,6 @@
 #include <linux/dma-mapping.h>
 
 #include <asm/io.h>
-#include <asm-generic/pci-dma-compat.h>
 
 #define PCIBIOS_MIN_IO		0x1000
 #define PCIBIOS_MIN_MEM		0
diff --git a/arch/avr32/include/asm/pci.h b/arch/avr32/include/asm/pci.h
index a32a02372017..0f5f134b896a 100644
--- a/arch/avr32/include/asm/pci.h
+++ b/arch/avr32/include/asm/pci.h
@@ -5,6 +5,4 @@
 
 #define PCI_DMA_BUS_IS_PHYS	(1)
 
-#include <asm-generic/pci-dma-compat.h>
-
 #endif /* __ASM_AVR32_PCI_H__ */
diff --git a/arch/blackfin/include/asm/pci.h b/arch/blackfin/include/asm/pci.h
index 14efc0db1ade..11ea1cb35036 100644
--- a/arch/blackfin/include/asm/pci.h
+++ b/arch/blackfin/include/asm/pci.h
@@ -4,7 +4,6 @@
 #define _ASM_BFIN_PCI_H
 
 #include <linux/scatterlist.h>
-#include <asm-generic/pci-dma-compat.h>
 #include <asm-generic/pci.h>
 
 #define PCIBIOS_MIN_IO 0x00001000
diff --git a/arch/cris/include/asm/pci.h b/arch/cris/include/asm/pci.h
index c15b4b4baafa..b1b289df04c7 100644
--- a/arch/cris/include/asm/pci.h
+++ b/arch/cris/include/asm/pci.h
@@ -48,9 +48,6 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 
 #endif /* __KERNEL__ */
 
-/* implement the pci_ DMA API in terms of the generic device dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
 /* generic pci stuff */
 #include <asm-generic/pci.h>
 
diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h
index 7e96b572f728..809cfc6707ab 100644
--- a/arch/frv/include/asm/pci.h
+++ b/arch/frv/include/asm/pci.h
@@ -15,7 +15,6 @@
 
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
-#include <asm-generic/pci-dma-compat.h>
 #include <asm-generic/pci.h>
 
 struct pci_dev;
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 07039d168f37..c0835b0dc722 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -50,8 +50,6 @@ struct pci_dev;
 extern unsigned long ia64_max_iommu_merge_mask;
 #define PCI_DMA_BUS_IS_PHYS	(ia64_max_iommu_merge_mask == ~0UL)
 
-#include <asm-generic/pci-dma-compat.h>
-
 #define HAVE_PCI_MMAP
 extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
 				enum pci_mmap_state mmap_state, int write_combine);
diff --git a/arch/m68k/include/asm/pci.h b/arch/m68k/include/asm/pci.h
index 848c3dfaad50..3a3dbcf4051d 100644
--- a/arch/m68k/include/asm/pci.h
+++ b/arch/m68k/include/asm/pci.h
@@ -1,7 +1,6 @@
 #ifndef _ASM_M68K_PCI_H
 #define _ASM_M68K_PCI_H
 
-#include <asm-generic/pci-dma-compat.h>
 #include <asm-generic/pci.h>
 
 /* The PCI address space does equal the physical memory
diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h
index dc9eb6657e3a..fc3ecb55f1b2 100644
--- a/arch/microblaze/include/asm/pci.h
+++ b/arch/microblaze/include/asm/pci.h
@@ -22,8 +22,6 @@
 #include <asm/prom.h>
 #include <asm/pci-bridge.h>
 
-#include <asm-generic/pci-dma-compat.h>
-
 #define PCIBIOS_MIN_IO		0x1000
 #define PCIBIOS_MIN_MEM		0x10000000
 
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 108d19376bb1..8c16fb7b8fdb 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -124,9 +124,6 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 
 #endif /* __KERNEL__ */
 
-/* implement the pci_ DMA API in terms of the generic device dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
 /* Do platform specific device initialization at pci_enable_device() time */
 extern int pcibios_plat_dev_init(struct pci_dev *dev);
 
diff --git a/arch/mn10300/include/asm/pci.h b/arch/mn10300/include/asm/pci.h
index be3debb8fc02..51159fff025a 100644
--- a/arch/mn10300/include/asm/pci.h
+++ b/arch/mn10300/include/asm/pci.h
@@ -80,9 +80,6 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 
 #endif /* __KERNEL__ */
 
-/* implement the pci_ DMA API in terms of the generic device dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 {
 	return channel ? 15 : 14;
diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h
index 89c53bfff055..defebd956585 100644
--- a/arch/parisc/include/asm/pci.h
+++ b/arch/parisc/include/asm/pci.h
@@ -194,9 +194,6 @@ extern void pcibios_init_bridge(struct pci_dev *);
 #define PCIBIOS_MIN_IO          0x10
 #define PCIBIOS_MIN_MEM         0x1000 /* NBPG - but pci/setup-res.c dies */
 
-/* export the pci_ DMA API in terms of the dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 {
 	return channel ? 15 : 14;
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 6f8065a7d487..a6f3ac0d4602 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -20,8 +20,6 @@
 #include <asm/prom.h>
 #include <asm/pci-bridge.h>
 
-#include <asm-generic/pci-dma-compat.h>
-
 /* Return values for pci_controller_ops.probe_mode function */
 #define PCI_PROBE_NONE		-1	/* Don't look at this bus at all */
 #define PCI_PROBE_NORMAL	0	/* Do normal PCI probing */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index c873e682b67f..fdba308649d7 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -9,7 +9,6 @@
 #include <linux/pci.h>
 #include <linux/mutex.h>
 #include <asm-generic/pci.h>
-#include <asm-generic/pci-dma-compat.h>
 #include <asm/pci_clp.h>
 #include <asm/pci_debug.h>
 
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index e343dbd02e41..644314f2b1ef 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -105,9 +105,6 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 	return channel ? 15 : 14;
 }
 
-/* generic DMA-mapping stuff */
-#include <asm-generic/pci-dma-compat.h>
-
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_PCI_H */
 
diff --git a/arch/sparc/include/asm/pci.h b/arch/sparc/include/asm/pci.h
index d9c031f9910f..6e14fd179335 100644
--- a/arch/sparc/include/asm/pci.h
+++ b/arch/sparc/include/asm/pci.h
@@ -5,7 +5,4 @@
 #else
 #include <asm/pci_32.h>
 #endif
-
-#include <asm-generic/pci-dma-compat.h>
-
 #endif
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index dfedd7ac7298..fe3de505b024 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -226,7 +226,4 @@ static inline int pcibios_assign_all_busses(void)
 /* Use any cpu for PCI. */
 #define cpumask_of_pcibus(bus) cpu_online_mask
 
-/* implement the pci_ DMA API in terms of the generic device dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
 #endif /* _ASM_TILE_PCI_H */
diff --git a/arch/unicore32/include/asm/pci.h b/arch/unicore32/include/asm/pci.h
index eb9dccecb338..37e55d018de5 100644
--- a/arch/unicore32/include/asm/pci.h
+++ b/arch/unicore32/include/asm/pci.h
@@ -13,7 +13,6 @@
 #define __UNICORE_PCI_H__
 
 #ifdef __KERNEL__
-#include <asm-generic/pci-dma-compat.h>
 #include <asm-generic/pci.h>
 #include <mach/hardware.h> /* for PCIBIOS_MIN_* */
 
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 462594320d39..7fd8eaa8714d 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -105,9 +105,6 @@ void native_restore_msi_irqs(struct pci_dev *dev);
 #include <asm/pci_64.h>
 #endif
 
-/* implement the pci_ DMA API in terms of the generic device dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
 /* generic pci stuff */
 #include <asm-generic/pci.h>
 
diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h
index e438a00fbd63..5d6bd932ba4e 100644
--- a/arch/xtensa/include/asm/pci.h
+++ b/arch/xtensa/include/asm/pci.h
@@ -55,9 +55,6 @@ int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
 
 #endif /* __KERNEL__ */
 
-/* Implement the pci_ DMA API in terms of the generic device dma_ one */
-#include <asm-generic/pci-dma-compat.h>
-
 /* Generic PCI */
 #include <asm-generic/pci.h>
 
diff --git a/include/asm-generic/pci-dma-compat.h b/include/asm-generic/pci-dma-compat.h
deleted file mode 100644
index eafce7b6f052..000000000000
--- a/include/asm-generic/pci-dma-compat.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/* include this file if the platform implements the dma_ DMA Mapping API
- * and wants to provide the pci_ DMA Mapping API in terms of it */
-
-#ifndef _ASM_GENERIC_PCI_DMA_COMPAT_H
-#define _ASM_GENERIC_PCI_DMA_COMPAT_H
-
-#include <linux/dma-mapping.h>
-
-static inline void *
-pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
-		     dma_addr_t *dma_handle)
-{
-	return dma_alloc_coherent(hwdev == NULL ? NULL : &hwdev->dev, size, dma_handle, GFP_ATOMIC);
-}
-
-static inline void *
-pci_zalloc_consistent(struct pci_dev *hwdev, size_t size,
-		      dma_addr_t *dma_handle)
-{
-	return dma_zalloc_coherent(hwdev == NULL ? NULL : &hwdev->dev,
-				   size, dma_handle, GFP_ATOMIC);
-}
-
-static inline void
-pci_free_consistent(struct pci_dev *hwdev, size_t size,
-		    void *vaddr, dma_addr_t dma_handle)
-{
-	dma_free_coherent(hwdev == NULL ? NULL : &hwdev->dev, size, vaddr, dma_handle);
-}
-
-static inline dma_addr_t
-pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction)
-{
-	return dma_map_single(hwdev == NULL ? NULL : &hwdev->dev, ptr, size, (enum dma_data_direction)direction);
-}
-
-static inline void
-pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
-		 size_t size, int direction)
-{
-	dma_unmap_single(hwdev == NULL ? NULL : &hwdev->dev, dma_addr, size, (enum dma_data_direction)direction);
-}
-
-static inline dma_addr_t
-pci_map_page(struct pci_dev *hwdev, struct page *page,
-	     unsigned long offset, size_t size, int direction)
-{
-	return dma_map_page(hwdev == NULL ? NULL : &hwdev->dev, page, offset, size, (enum dma_data_direction)direction);
-}
-
-static inline void
-pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
-	       size_t size, int direction)
-{
-	dma_unmap_page(hwdev == NULL ? NULL : &hwdev->dev, dma_address, size, (enum dma_data_direction)direction);
-}
-
-static inline int
-pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
-	   int nents, int direction)
-{
-	return dma_map_sg(hwdev == NULL ? NULL : &hwdev->dev, sg, nents, (enum dma_data_direction)direction);
-}
-
-static inline void
-pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
-	     int nents, int direction)
-{
-	dma_unmap_sg(hwdev == NULL ? NULL : &hwdev->dev, sg, nents, (enum dma_data_direction)direction);
-}
-
-static inline void
-pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle,
-		    size_t size, int direction)
-{
-	dma_sync_single_for_cpu(hwdev == NULL ? NULL : &hwdev->dev, dma_handle, size, (enum dma_data_direction)direction);
-}
-
-static inline void
-pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle,
-		    size_t size, int direction)
-{
-	dma_sync_single_for_device(hwdev == NULL ? NULL : &hwdev->dev, dma_handle, size, (enum dma_data_direction)direction);
-}
-
-static inline void
-pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg,
-		int nelems, int direction)
-{
-	dma_sync_sg_for_cpu(hwdev == NULL ? NULL : &hwdev->dev, sg, nelems, (enum dma_data_direction)direction);
-}
-
-static inline void
-pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg,
-		int nelems, int direction)
-{
-	dma_sync_sg_for_device(hwdev == NULL ? NULL : &hwdev->dev, sg, nelems, (enum dma_data_direction)direction);
-}
-
-static inline int
-pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr)
-{
-	return dma_mapping_error(&pdev->dev, dma_addr);
-}
-
-#ifdef CONFIG_PCI
-static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask)
-{
-	return dma_set_mask(&dev->dev, mask);
-}
-
-static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
-{
-	return dma_set_coherent_mask(&dev->dev, mask);
-}
-#endif
-
-#endif
diff --git a/include/linux/pci-dma-compat.h b/include/linux/pci-dma-compat.h
new file mode 100644
index 000000000000..eafce7b6f052
--- /dev/null
+++ b/include/linux/pci-dma-compat.h
@@ -0,0 +1,118 @@
+/* include this file if the platform implements the dma_ DMA Mapping API
+ * and wants to provide the pci_ DMA Mapping API in terms of it */
+
+#ifndef _ASM_GENERIC_PCI_DMA_COMPAT_H
+#define _ASM_GENERIC_PCI_DMA_COMPAT_H
+
+#include <linux/dma-mapping.h>
+
+static inline void *
+pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
+		     dma_addr_t *dma_handle)
+{
+	return dma_alloc_coherent(hwdev == NULL ? NULL : &hwdev->dev, size, dma_handle, GFP_ATOMIC);
+}
+
+static inline void *
+pci_zalloc_consistent(struct pci_dev *hwdev, size_t size,
+		      dma_addr_t *dma_handle)
+{
+	return dma_zalloc_coherent(hwdev == NULL ? NULL : &hwdev->dev,
+				   size, dma_handle, GFP_ATOMIC);
+}
+
+static inline void
+pci_free_consistent(struct pci_dev *hwdev, size_t size,
+		    void *vaddr, dma_addr_t dma_handle)
+{
+	dma_free_coherent(hwdev == NULL ? NULL : &hwdev->dev, size, vaddr, dma_handle);
+}
+
+static inline dma_addr_t
+pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction)
+{
+	return dma_map_single(hwdev == NULL ? NULL : &hwdev->dev, ptr, size, (enum dma_data_direction)direction);
+}
+
+static inline void
+pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
+		 size_t size, int direction)
+{
+	dma_unmap_single(hwdev == NULL ? NULL : &hwdev->dev, dma_addr, size, (enum dma_data_direction)direction);
+}
+
+static inline dma_addr_t
+pci_map_page(struct pci_dev *hwdev, struct page *page,
+	     unsigned long offset, size_t size, int direction)
+{
+	return dma_map_page(hwdev == NULL ? NULL : &hwdev->dev, page, offset, size, (enum dma_data_direction)direction);
+}
+
+static inline void
+pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
+	       size_t size, int direction)
+{
+	dma_unmap_page(hwdev == NULL ? NULL : &hwdev->dev, dma_address, size, (enum dma_data_direction)direction);
+}
+
+static inline int
+pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+	   int nents, int direction)
+{
+	return dma_map_sg(hwdev == NULL ? NULL : &hwdev->dev, sg, nents, (enum dma_data_direction)direction);
+}
+
+static inline void
+pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+	     int nents, int direction)
+{
+	dma_unmap_sg(hwdev == NULL ? NULL : &hwdev->dev, sg, nents, (enum dma_data_direction)direction);
+}
+
+static inline void
+pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle,
+		    size_t size, int direction)
+{
+	dma_sync_single_for_cpu(hwdev == NULL ? NULL : &hwdev->dev, dma_handle, size, (enum dma_data_direction)direction);
+}
+
+static inline void
+pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle,
+		    size_t size, int direction)
+{
+	dma_sync_single_for_device(hwdev == NULL ? NULL : &hwdev->dev, dma_handle, size, (enum dma_data_direction)direction);
+}
+
+static inline void
+pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg,
+		int nelems, int direction)
+{
+	dma_sync_sg_for_cpu(hwdev == NULL ? NULL : &hwdev->dev, sg, nelems, (enum dma_data_direction)direction);
+}
+
+static inline void
+pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg,
+		int nelems, int direction)
+{
+	dma_sync_sg_for_device(hwdev == NULL ? NULL : &hwdev->dev, sg, nelems, (enum dma_data_direction)direction);
+}
+
+static inline int
+pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr)
+{
+	return dma_mapping_error(&pdev->dev, dma_addr);
+}
+
+#ifdef CONFIG_PCI
+static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask)
+{
+	return dma_set_mask(&dev->dev, mask);
+}
+
+static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
+{
+	return dma_set_coherent_mask(&dev->dev, mask);
+}
+#endif
+
+#endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 3d371c150753..5db6e0ce9504 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2011,4 +2011,8 @@ static inline bool pci_ari_enabled(struct pci_bus *bus)
 {
 	return bus->self && bus->self->ari_enabled;
 }
+
+/* provide the legacy pci_dma_* API */
+#include <linux/pci-dma-compat.h>
+
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From fe537670eab767157eecc50538bd28e8d9b0ce9f Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 7 Mar 2016 11:39:16 -0600
Subject: PCI: Consolidate PCI DMA constants and interfaces in
 linux/pci-dma-compat.h

Christoph added a generic include/linux/pci-dma-compat.h, so now there's
one place with most of the PCI DMA interfaces.  Move more PCI DMA-related
things there:

  - The PCI_DMA_* direction constants from linux/pci.h
  - The pci_set_dma_max_seg_size() and pci_set_dma_seg_boundary()
    CONFIG_PCI implementations from drivers/pci/pci.c
  - The pci_set_dma_max_seg_size() and pci_set_dma_seg_boundary()
    !CONFIG_PCI stubs from linux/pci.h
  - The pci_set_dma_mask() and pci_set_consistent_dma_mask()
    !CONFIG_PCI stubs from linux/pci.h

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.c              | 12 ------------
 include/linux/pci-dma-compat.h | 29 +++++++++++++++++++++++++++++
 include/linux/pci.h            | 19 +------------------
 3 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 64c0a1215f84..0a9c8db51c08 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3385,18 +3385,6 @@ bool pci_check_and_unmask_intx(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_check_and_unmask_intx);
 
-int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size)
-{
-	return dma_set_max_seg_size(&dev->dev, size);
-}
-EXPORT_SYMBOL(pci_set_dma_max_seg_size);
-
-int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask)
-{
-	return dma_set_seg_boundary(&dev->dev, mask);
-}
-EXPORT_SYMBOL(pci_set_dma_seg_boundary);
-
 /**
  * pci_wait_for_pending_transaction - waits for pending transaction
  * @dev: the PCI device to operate on
diff --git a/include/linux/pci-dma-compat.h b/include/linux/pci-dma-compat.h
index eafce7b6f052..39726caef5b1 100644
--- a/include/linux/pci-dma-compat.h
+++ b/include/linux/pci-dma-compat.h
@@ -6,6 +6,12 @@
 
 #include <linux/dma-mapping.h>
 
+/* This defines the direction arg to the DMA mapping routines. */
+#define PCI_DMA_BIDIRECTIONAL	0
+#define PCI_DMA_TODEVICE	1
+#define PCI_DMA_FROMDEVICE	2
+#define PCI_DMA_NONE		3
+
 static inline void *
 pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
 		     dma_addr_t *dma_handle)
@@ -113,6 +119,29 @@ static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
 {
 	return dma_set_coherent_mask(&dev->dev, mask);
 }
+
+static inline int pci_set_dma_max_seg_size(struct pci_dev *dev,
+					   unsigned int size)
+{
+	return dma_set_max_seg_size(&dev->dev, size);
+}
+
+static inline int pci_set_dma_seg_boundary(struct pci_dev *dev,
+					   unsigned long mask)
+{
+	return dma_set_seg_boundary(&dev->dev, mask);
+}
+#else
+static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask)
+{ return -EIO; }
+static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
+{ return -EIO; }
+static inline int pci_set_dma_max_seg_size(struct pci_dev *dev,
+					   unsigned int size)
+{ return -EIO; }
+static inline int pci_set_dma_seg_boundary(struct pci_dev *dev,
+					   unsigned long mask)
+{ return -EIO; }
 #endif
 
 #endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5db6e0ce9504..5049bd60ba90 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -70,12 +70,6 @@ enum pci_mmap_state {
 	pci_mmap_mem
 };
 
-/* This defines the direction arg to the DMA mapping routines. */
-#define PCI_DMA_BIDIRECTIONAL	0
-#define PCI_DMA_TODEVICE	1
-#define PCI_DMA_FROMDEVICE	2
-#define PCI_DMA_NONE		3
-
 /*
  *  For PCI devices, the region numbers are assigned this way:
  */
@@ -1038,8 +1032,6 @@ void pci_intx(struct pci_dev *dev, int enable);
 bool pci_intx_mask_supported(struct pci_dev *dev);
 bool pci_check_and_mask_intx(struct pci_dev *dev);
 bool pci_check_and_unmask_intx(struct pci_dev *dev);
-int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size);
-int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask);
 int pci_wait_for_pending(struct pci_dev *dev, int pos, u16 mask);
 int pci_wait_for_pending_transaction(struct pci_dev *dev);
 int pcix_get_max_mmrbc(struct pci_dev *dev);
@@ -1255,6 +1247,7 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno);
 
 int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 		      unsigned int command_bits, u32 flags);
+
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
 #include <linux/pci-dma.h>
@@ -1466,16 +1459,6 @@ static inline struct pci_dev *pci_get_class(unsigned int class,
 static inline void pci_set_master(struct pci_dev *dev) { }
 static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; }
 static inline void pci_disable_device(struct pci_dev *dev) { }
-static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask)
-{ return -EIO; }
-static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
-{ return -EIO; }
-static inline int pci_set_dma_max_seg_size(struct pci_dev *dev,
-					unsigned int size)
-{ return -EIO; }
-static inline int pci_set_dma_seg_boundary(struct pci_dev *dev,
-					unsigned long mask)
-{ return -EIO; }
 static inline int pci_assign_resource(struct pci_dev *dev, int i)
 { return -EBUSY; }
 static inline int __pci_register_driver(struct pci_driver *drv,
-- 
cgit v1.2.3


From 088c86183012495b53ecc1c734909e5712a40b66 Mon Sep 17 00:00:00 2001
From: Manish Chopra <manish.chopra@qlogic.com>
Date: Fri, 4 Mar 2016 12:35:05 -0500
Subject: qed/qede: Add infrastructure support for hardware GRO

This patch adds mainly structures and APIs prototype changes
in order to give support for qede slowpath/fastpath support
for the same.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: Manish Chopra <manish.chopra@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_hsi.h    | 14 ++++-
 drivers/net/ethernet/qlogic/qed/qed_l2.c     | 81 +++++++++++++++++++---------
 drivers/net/ethernet/qlogic/qede/qede_main.c | 17 +++---
 include/linux/qed/qed_eth_if.h               | 12 +++--
 4 files changed, 88 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 592e0e6d9b42..236db8a99ec3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -2919,7 +2919,19 @@ struct eth_vport_rx_mode {
 };
 
 struct eth_vport_tpa_param {
-	u64     reserved[2];
+	u8	tpa_ipv4_en_flg;
+	u8	tpa_ipv6_en_flg;
+	u8	tpa_ipv4_tunn_en_flg;
+	u8	tpa_ipv6_tunn_en_flg;
+	u8	tpa_pkt_split_flg;
+	u8	tpa_hdr_data_split_flg;
+	u8	tpa_gro_consistent_flg;
+	u8	tpa_max_aggs_num;
+	u16	tpa_max_size;
+	u16	tpa_min_size_to_start;
+	u16	tpa_min_size_to_cont;
+	u8	max_buff_num;
+	u8	reserved;
 };
 
 struct eth_vport_tx_mode {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 102ddc73b841..3f35c6ca9252 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -132,16 +132,29 @@ struct qed_sp_vport_update_params {
 	struct qed_filter_accept_flags	accept_flags;
 };
 
+enum qed_tpa_mode {
+	QED_TPA_MODE_NONE,
+	QED_TPA_MODE_UNUSED,
+	QED_TPA_MODE_GRO,
+	QED_TPA_MODE_MAX
+};
+
+struct qed_sp_vport_start_params {
+	enum qed_tpa_mode	tpa_mode;
+	bool			remove_inner_vlan;
+	bool			drop_ttl0;
+	u8			max_buffers_per_cqe;
+	u32			concrete_fid;
+	u16			opaque_fid;
+	u8			vport_id;
+	u16			mtu;
+};
+
 #define QED_MAX_SGES_NUM 16
 #define CRC32_POLY 0x1edc6f41
 
 static int qed_sp_vport_start(struct qed_hwfn *p_hwfn,
-			      u32 concrete_fid,
-			      u16 opaque_fid,
-			      u8 vport_id,
-			      u16 mtu,
-			      u8 drop_ttl0_flg,
-			      u8 inner_vlan_removal_en_flg)
+			      struct qed_sp_vport_start_params *p_params)
 {
 	struct vport_start_ramrod_data *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent =  NULL;
@@ -150,13 +163,13 @@ static int qed_sp_vport_start(struct qed_hwfn *p_hwfn,
 	u16 rx_mode = 0;
 	u8 abs_vport_id = 0;
 
-	rc = qed_fw_vport(p_hwfn, vport_id, &abs_vport_id);
+	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
 	if (rc != 0)
 		return rc;
 
 	memset(&init_data, 0, sizeof(init_data));
 	init_data.cid = qed_spq_get_cid(p_hwfn);
-	init_data.opaque_fid = opaque_fid;
+	init_data.opaque_fid = p_params->opaque_fid;
 	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -168,9 +181,9 @@ static int qed_sp_vport_start(struct qed_hwfn *p_hwfn,
 	p_ramrod		= &p_ent->ramrod.vport_start;
 	p_ramrod->vport_id	= abs_vport_id;
 
-	p_ramrod->mtu			= cpu_to_le16(mtu);
-	p_ramrod->inner_vlan_removal_en = inner_vlan_removal_en_flg;
-	p_ramrod->drop_ttl0_en		= drop_ttl0_flg;
+	p_ramrod->mtu			= cpu_to_le16(p_params->mtu);
+	p_ramrod->inner_vlan_removal_en	= p_params->remove_inner_vlan;
+	p_ramrod->drop_ttl0_en		= p_params->drop_ttl0;
 
 	SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_UCAST_DROP_ALL, 1);
 	SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_MCAST_DROP_ALL, 1);
@@ -181,9 +194,26 @@ static int qed_sp_vport_start(struct qed_hwfn *p_hwfn,
 	memset(&p_ramrod->tpa_param, 0,
 	       sizeof(struct eth_vport_tpa_param));
 
+	p_ramrod->tpa_param.max_buff_num = p_params->max_buffers_per_cqe;
+
+	switch (p_params->tpa_mode) {
+	case QED_TPA_MODE_GRO:
+		p_ramrod->tpa_param.tpa_max_aggs_num = ETH_TPA_MAX_AGGS_NUM;
+		p_ramrod->tpa_param.tpa_max_size = (u16)-1;
+		p_ramrod->tpa_param.tpa_min_size_to_cont = p_params->mtu / 2;
+		p_ramrod->tpa_param.tpa_min_size_to_start = p_params->mtu / 2;
+		p_ramrod->tpa_param.tpa_ipv4_en_flg = 1;
+		p_ramrod->tpa_param.tpa_ipv6_en_flg = 1;
+		p_ramrod->tpa_param.tpa_pkt_split_flg = 1;
+		p_ramrod->tpa_param.tpa_gro_consistent_flg = 1;
+		break;
+	default:
+		break;
+	}
+
 	/* Software Function ID in hwfn (PFs are 0 - 15, VFs are 16 - 135) */
 	p_ramrod->sw_fid = qed_concrete_to_sw_fid(p_hwfn->cdev,
-						  concrete_fid);
+						  p_params->concrete_fid);
 
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
@@ -1592,24 +1622,25 @@ static void qed_register_eth_ops(struct qed_dev *cdev,
 }
 
 static int qed_start_vport(struct qed_dev *cdev,
-			   u8 vport_id,
-			   u16 mtu,
-			   u8 drop_ttl0_flg,
-			   u8 inner_vlan_removal_en_flg)
+			   struct qed_start_vport_params *params)
 {
 	int rc, i;
 
 	for_each_hwfn(cdev, i) {
+		struct qed_sp_vport_start_params start = { 0 };
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
-		rc = qed_sp_vport_start(p_hwfn,
-					p_hwfn->hw_info.concrete_fid,
-					p_hwfn->hw_info.opaque_fid,
-					vport_id,
-					mtu,
-					drop_ttl0_flg,
-					inner_vlan_removal_en_flg);
-
+		start.tpa_mode = params->gro_enable ? QED_TPA_MODE_GRO :
+							QED_TPA_MODE_NONE;
+		start.remove_inner_vlan = params->remove_inner_vlan;
+		start.drop_ttl0 = params->drop_ttl0;
+		start.opaque_fid = p_hwfn->hw_info.opaque_fid;
+		start.concrete_fid = p_hwfn->hw_info.concrete_fid;
+		start.vport_id = params->vport_id;
+		start.max_buffers_per_cqe = 16;
+		start.mtu = params->mtu;
+
+		rc = qed_sp_vport_start(p_hwfn, &start);
 		if (rc) {
 			DP_ERR(cdev, "Failed to start VPORT\n");
 			return rc;
@@ -1619,7 +1650,7 @@ static int qed_start_vport(struct qed_dev *cdev,
 
 		DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
 			   "Started V-PORT %d with MTU %d\n",
-			   vport_id, mtu);
+			   start.vport_id, start.mtu);
 	}
 
 	qed_reset_vport_stats(cdev);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index ddd9e4aaa500..f75f334af7bd 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -2466,11 +2466,12 @@ static int qede_stop_queues(struct qede_dev *edev)
 static int qede_start_queues(struct qede_dev *edev)
 {
 	int rc, tc, i;
-	int vport_id = 0, drop_ttl0_flg = 1, vlan_removal_en = 1;
+	int vlan_removal_en = 1;
 	struct qed_dev *cdev = edev->cdev;
 	struct qed_update_vport_rss_params *rss_params = &edev->rss_params;
 	struct qed_update_vport_params vport_update_params;
 	struct qed_queue_start_common_params q_params;
+	struct qed_start_vport_params start = {0};
 
 	if (!edev->num_rss) {
 		DP_ERR(edev,
@@ -2478,10 +2479,12 @@ static int qede_start_queues(struct qede_dev *edev)
 		return -EINVAL;
 	}
 
-	rc = edev->ops->vport_start(cdev, vport_id,
-				    edev->ndev->mtu,
-				    drop_ttl0_flg,
-				    vlan_removal_en);
+	start.mtu = edev->ndev->mtu;
+	start.vport_id = 0;
+	start.drop_ttl0 = true;
+	start.remove_inner_vlan = vlan_removal_en;
+
+	rc = edev->ops->vport_start(cdev, &start);
 
 	if (rc) {
 		DP_ERR(edev, "Start V-PORT failed %d\n", rc);
@@ -2490,7 +2493,7 @@ static int qede_start_queues(struct qede_dev *edev)
 
 	DP_VERBOSE(edev, NETIF_MSG_IFUP,
 		   "Start vport ramrod passed, vport_id = %d, MTU = %d, vlan_removal_en = %d\n",
-		   vport_id, edev->ndev->mtu + 0xe, vlan_removal_en);
+		   start.vport_id, edev->ndev->mtu + 0xe, vlan_removal_en);
 
 	for_each_rss(i) {
 		struct qede_fastpath *fp = &edev->fp_array[i];
@@ -2555,7 +2558,7 @@ static int qede_start_queues(struct qede_dev *edev)
 
 	/* Prepare and send the vport enable */
 	memset(&vport_update_params, 0, sizeof(vport_update_params));
-	vport_update_params.vport_id = vport_id;
+	vport_update_params.vport_id = start.vport_id;
 	vport_update_params.update_vport_active_flg = 1;
 	vport_update_params.vport_active_flg = 1;
 
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index e53b0ca49e41..e1d69834a11f 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -39,6 +39,14 @@ struct qed_update_vport_params {
 	struct qed_update_vport_rss_params rss_params;
 };
 
+struct qed_start_vport_params {
+	bool remove_inner_vlan;
+	bool gro_enable;
+	bool drop_ttl0;
+	u8 vport_id;
+	u16 mtu;
+};
+
 struct qed_stop_rxq_params {
 	u8 rss_id;
 	u8 rx_queue_id;
@@ -118,9 +126,7 @@ struct qed_eth_ops {
 			     void *cookie);
 
 	int (*vport_start)(struct qed_dev *cdev,
-			   u8 vport_id, u16 mtu,
-			   u8 drop_ttl0_flg,
-			   u8 inner_vlan_removal_en_flg);
+			   struct qed_start_vport_params *params);
 
 	int (*vport_stop)(struct qed_dev *cdev,
 			  u8 vport_id);
-- 
cgit v1.2.3


From 2958ccee3690717f711431b546d7f194d8fa4f8b Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@atmel.com>
Date: Mon, 22 Feb 2016 15:18:55 +0100
Subject: tty/serial: at91: fix bad offset for UART timeout register

With SAMA5D2, the UART has hw timeout but the offset of the register to
define this value is not the same as the one for USART.
When using the new UART, the value of this register was 0 so we never
get timeout irqs. It involves that when using DMA, we were stuck until
the execution of the dma callback which happens when a buffer is full
(so after receiving 2048 bytes).

Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 19 +++++++++++++------
 include/linux/atmel_serial.h      |  3 ++-
 2 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index c0e724633fe3..d9439e6ab719 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -159,6 +159,7 @@ struct atmel_uart_port {
 	u32			rts_high;
 	u32			rts_low;
 	bool			ms_irq_enabled;
+	u32			rtor;	/* address of receiver timeout register if it exists */
 	bool			has_hw_timer;
 	struct timer_list	uart_timer;
 
@@ -1718,12 +1719,16 @@ static void atmel_get_ip_name(struct uart_port *port)
 
 	atmel_port->has_hw_timer = false;
 
-	if (name == usart || name == new_uart) {
-		dev_dbg(port->dev, "Usart or uart with hw timer\n");
+	if (name == new_uart) {
+		dev_dbg(port->dev, "Uart with hw timer");
 		atmel_port->has_hw_timer = true;
+		atmel_port->rtor = ATMEL_UA_RTOR;
+	} else if (name == usart) {
+		dev_dbg(port->dev, "Usart\n");
+		atmel_port->has_hw_timer = true;
+		atmel_port->rtor = ATMEL_US_RTOR;
 	} else if (name == dbgu_uart) {
 		dev_dbg(port->dev, "Dbgu or uart without hw timer\n");
-		atmel_port->has_hw_timer = false;
 	} else {
 		/* fallback for older SoCs: use version field */
 		version = atmel_uart_readl(port, ATMEL_US_VERSION);
@@ -1732,11 +1737,11 @@ static void atmel_get_ip_name(struct uart_port *port)
 		case 0x10213:
 			dev_dbg(port->dev, "This version is usart\n");
 			atmel_port->has_hw_timer = true;
+			atmel_port->rtor = ATMEL_US_RTOR;
 			break;
 		case 0x203:
 		case 0x10202:
 			dev_dbg(port->dev, "This version is uart\n");
-			atmel_port->has_hw_timer = false;
 			break;
 		default:
 			dev_err(port->dev, "Not supported ip name nor version, set to uart\n");
@@ -1841,7 +1846,8 @@ static int atmel_startup(struct uart_port *port)
 					jiffies + uart_poll_timeout(port));
 		/* set USART timeout */
 		} else {
-			atmel_uart_writel(port, ATMEL_US_RTOR, PDC_RX_TIMEOUT);
+			atmel_uart_writel(port, atmel_port->rtor,
+					  PDC_RX_TIMEOUT);
 			atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_STTTO);
 
 			atmel_uart_writel(port, ATMEL_US_IER,
@@ -1856,7 +1862,8 @@ static int atmel_startup(struct uart_port *port)
 					jiffies + uart_poll_timeout(port));
 		/* set USART timeout */
 		} else {
-			atmel_uart_writel(port, ATMEL_US_RTOR, PDC_RX_TIMEOUT);
+			atmel_uart_writel(port, atmel_port->rtor,
+					  PDC_RX_TIMEOUT);
 			atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_STTTO);
 
 			atmel_uart_writel(port, ATMEL_US_IER,
diff --git a/include/linux/atmel_serial.h b/include/linux/atmel_serial.h
index ee696d7e8a43..5a4d664af87a 100644
--- a/include/linux/atmel_serial.h
+++ b/include/linux/atmel_serial.h
@@ -119,7 +119,8 @@
 #define ATMEL_US_BRGR		0x20	/* Baud Rate Generator Register */
 #define	ATMEL_US_CD		GENMASK(15, 0)	/* Clock Divider */
 
-#define ATMEL_US_RTOR		0x24	/* Receiver Time-out Register */
+#define ATMEL_US_RTOR		0x24	/* Receiver Time-out Register for USART */
+#define ATMEL_UA_RTOR		0x28	/* Receiver Time-out Register for UART */
 #define	ATMEL_US_TO		GENMASK(15, 0)	/* Time-out Value */
 
 #define ATMEL_US_TTGR		0x28	/* Transmitter Timeguard Register */
-- 
cgit v1.2.3


From 0607616d9311096bfd1c325c36ffbfa71ccd40ab Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 25 Feb 2016 15:33:46 +0100
Subject: serial: sa1100: make sa1100_register_uart_fns a function

Calling sa1100_register_uart_fns() leaves the port structure
unused when CONFIG_SERIAL_SA1100 is disabled, and we get a
compiler warning about that:

arch/arm/mach-sa1100/badge4.c:317:31: warning: 'badge4_port_fns' defined but not used [-Wunused-variable]
 static struct sa1100_port_fns badge4_port_fns __initdata = {

This turns the two empty macros into empty inline functions,
which has the same effect, but lets the compiler know that the
variables are intentionally unused.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/platform_data/sa11x0-serial.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/sa11x0-serial.h b/include/linux/platform_data/sa11x0-serial.h
index 4504d5d592f0..009e1d83fe39 100644
--- a/include/linux/platform_data/sa11x0-serial.h
+++ b/include/linux/platform_data/sa11x0-serial.h
@@ -26,8 +26,12 @@ struct sa1100_port_fns {
 void sa1100_register_uart_fns(struct sa1100_port_fns *fns);
 void sa1100_register_uart(int idx, int port);
 #else
-#define sa1100_register_uart_fns(fns) do { } while (0)
-#define sa1100_register_uart(idx,port) do { } while (0)
+static inline void sa1100_register_uart_fns(struct sa1100_port_fns *fns)
+{
+}
+static inline void sa1100_register_uart(int idx, int port)
+{
+}
 #endif
 
 #endif
-- 
cgit v1.2.3


From f5b8aa78ef086248145363bf1ffe5ca4348b6a98 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Mon, 7 Mar 2016 10:46:51 +0100
Subject: mtd: kill the ecclayout->oobavail field

ecclayout->oobavail is just redundant with the mtd->oobavail field.
Moreover, it prevents static const definition of ecc layouts since the
NAND framework is calculating this value based on the ecclayout->oobfree
field.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/docg3.c                   |  5 ++-
 drivers/mtd/mtdswap.c                         | 16 ++++-----
 drivers/mtd/nand/brcmnand/brcmnand.c          |  8 ++---
 drivers/mtd/nand/docg4.c                      |  1 -
 drivers/mtd/nand/hisi504_nand.c               |  1 -
 drivers/mtd/nand/nand_base.c                  | 14 ++++----
 drivers/mtd/onenand/onenand_base.c            | 16 ++++-----
 drivers/mtd/tests/oobtest.c                   | 49 +++++++++++++--------------
 drivers/staging/mt29f_spinand/mt29f_spinand.c |  1 -
 fs/jffs2/wbuf.c                               |  6 ++--
 include/linux/mtd/mtd.h                       |  1 -
 11 files changed, 53 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
index c3a2695a4420..e7b2e439696c 100644
--- a/drivers/mtd/devices/docg3.c
+++ b/drivers/mtd/devices/docg3.c
@@ -72,13 +72,11 @@ MODULE_PARM_DESC(reliable_mode, "Set the docg3 mode (0=normal MLC, 1=fast, "
  * @eccbytes: 8 bytes are used (1 for Hamming ECC, 7 for BCH ECC)
  * @eccpos: ecc positions (byte 7 is Hamming ECC, byte 8-14 are BCH ECC)
  * @oobfree: free pageinfo bytes (byte 0 until byte 6, byte 15
- * @oobavail: 8 available bytes remaining after ECC toll
  */
 static struct nand_ecclayout docg3_oobinfo = {
 	.eccbytes = 8,
 	.eccpos = {7, 8, 9, 10, 11, 12, 13, 14},
 	.oobfree = {{0, 7}, {15, 1} },
-	.oobavail = 8,
 };
 
 static inline u8 doc_readb(struct docg3 *docg3, u16 reg)
@@ -1438,7 +1436,7 @@ static int doc_write_oob(struct mtd_info *mtd, loff_t ofs,
 		oobdelta = mtd->oobsize;
 		break;
 	case MTD_OPS_AUTO_OOB:
-		oobdelta = mtd->ecclayout->oobavail;
+		oobdelta = mtd->oobavail;
 		break;
 	default:
 		return -EINVAL;
@@ -1860,6 +1858,7 @@ static int __init doc_set_driver_info(int chip_id, struct mtd_info *mtd)
 	mtd->_write_oob = doc_write_oob;
 	mtd->_block_isbad = doc_block_isbad;
 	mtd->ecclayout = &docg3_oobinfo;
+	mtd->oobavail = 8;
 	mtd->ecc_strength = DOC_ECC_BCH_T;
 
 	return 0;
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index fc8b3d16cce7..d330eb1d3eba 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -346,7 +346,7 @@ static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
 	if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset))
 		return MTDSWAP_SCANNED_BAD;
 
-	ops.ooblen = 2 * d->mtd->ecclayout->oobavail;
+	ops.ooblen = 2 * d->mtd->oobavail;
 	ops.oobbuf = d->oob_buf;
 	ops.ooboffs = 0;
 	ops.datbuf = NULL;
@@ -359,7 +359,7 @@ static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
 
 	data = (struct mtdswap_oobdata *)d->oob_buf;
 	data2 = (struct mtdswap_oobdata *)
-		(d->oob_buf + d->mtd->ecclayout->oobavail);
+		(d->oob_buf + d->mtd->oobavail);
 
 	if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) {
 		eb->erase_count = le32_to_cpu(data->count);
@@ -933,7 +933,7 @@ static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
 
 	ops.mode = MTD_OPS_AUTO_OOB;
 	ops.len = mtd->writesize;
-	ops.ooblen = mtd->ecclayout->oobavail;
+	ops.ooblen = mtd->oobavail;
 	ops.ooboffs = 0;
 	ops.datbuf = d->page_buf;
 	ops.oobbuf = d->oob_buf;
@@ -945,7 +945,7 @@ static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
 		for (i = 0; i < mtd_pages; i++) {
 			patt = mtdswap_test_patt(test + i);
 			memset(d->page_buf, patt, mtd->writesize);
-			memset(d->oob_buf, patt, mtd->ecclayout->oobavail);
+			memset(d->oob_buf, patt, mtd->oobavail);
 			ret = mtd_write_oob(mtd, pos, &ops);
 			if (ret)
 				goto error;
@@ -964,7 +964,7 @@ static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
 				if (p1[j] != patt)
 					goto error;
 
-			for (j = 0; j < mtd->ecclayout->oobavail; j++)
+			for (j = 0; j < mtd->oobavail; j++)
 				if (p2[j] != (unsigned char)patt)
 					goto error;
 
@@ -1387,7 +1387,7 @@ static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks,
 	if (!d->page_buf)
 		goto page_buf_fail;
 
-	d->oob_buf = kmalloc(2 * mtd->ecclayout->oobavail, GFP_KERNEL);
+	d->oob_buf = kmalloc(2 * mtd->oobavail, GFP_KERNEL);
 	if (!d->oob_buf)
 		goto oob_buf_fail;
 
@@ -1454,10 +1454,10 @@ static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 		return;
 	}
 
-	if (!mtd->oobsize || oinfo->oobavail < MTDSWAP_OOBSIZE) {
+	if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) {
 		printk(KERN_ERR "%s: Not enough free bytes in OOB, "
 			"%d available, %zu needed.\n",
-			MTDSWAP_PREFIX, oinfo->oobavail, MTDSWAP_OOBSIZE);
+			MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE);
 		return;
 	}
 
diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index f7009c1cb90c..e0528397306a 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -828,7 +828,8 @@ static struct nand_ecclayout *brcmnand_create_layout(int ecc_level,
 				    idx2 >= MTD_MAX_OOBFREE_ENTRIES_LARGE - 1)
 				break;
 		}
-		goto out;
+
+		return layout;
 	}
 
 	/*
@@ -879,10 +880,7 @@ static struct nand_ecclayout *brcmnand_create_layout(int ecc_level,
 				idx2 >= MTD_MAX_OOBFREE_ENTRIES_LARGE - 1)
 			break;
 	}
-out:
-	/* Sum available OOB */
-	for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES_LARGE; i++)
-		layout->oobavail += layout->oobfree[i].length;
+
 	return layout;
 }
 
diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index df4165b02c62..fb46fd7e056b 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c
@@ -225,7 +225,6 @@ struct docg4_priv {
 static struct nand_ecclayout docg4_oobinfo = {
 	.eccbytes = 9,
 	.eccpos = {7, 8, 9, 10, 11, 12, 13, 14, 15},
-	.oobavail = 5,
 	.oobfree = { {.offset = 2, .length = 5} }
 };
 
diff --git a/drivers/mtd/nand/hisi504_nand.c b/drivers/mtd/nand/hisi504_nand.c
index f8d37f36a81c..96502b624cfb 100644
--- a/drivers/mtd/nand/hisi504_nand.c
+++ b/drivers/mtd/nand/hisi504_nand.c
@@ -632,7 +632,6 @@ static void hisi_nfc_host_init(struct hinfc_host *host)
 }
 
 static struct nand_ecclayout nand_ecc_2K_16bits = {
-	.oobavail = 6,
 	.oobfree = { {2, 6} },
 };
 
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 596a9b0503da..53993df0c25a 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2076,7 +2076,7 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from,
 	stats = mtd->ecc_stats;
 
 	if (ops->mode == MTD_OPS_AUTO_OOB)
-		len = chip->ecc.layout->oobavail;
+		len = mtd->oobavail;
 	else
 		len = mtd->oobsize;
 
@@ -2767,7 +2767,7 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
 			 __func__, (unsigned int)to, (int)ops->ooblen);
 
 	if (ops->mode == MTD_OPS_AUTO_OOB)
-		len = chip->ecc.layout->oobavail;
+		len = mtd->oobavail;
 	else
 		len = mtd->oobsize;
 
@@ -4325,11 +4325,11 @@ int nand_scan_tail(struct mtd_info *mtd)
 	 * The number of bytes available for a client to place data into
 	 * the out of band area.
 	 */
-	ecc->layout->oobavail = 0;
-	for (i = 0; ecc->layout->oobfree[i].length
-			&& i < ARRAY_SIZE(ecc->layout->oobfree); i++)
-		ecc->layout->oobavail += ecc->layout->oobfree[i].length;
-	mtd->oobavail = ecc->layout->oobavail;
+	mtd->oobavail = 0;
+	if (ecc->layout) {
+		for (i = 0; ecc->layout->oobfree[i].length; i++)
+			mtd->oobavail += ecc->layout->oobfree[i].length;
+	}
 
 	/* ECC sanity check: warn if it's too weak */
 	if (!nand_ecc_strength_good(mtd))
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 652d01832873..df47537d1feb 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1125,7 +1125,7 @@ static int onenand_mlc_read_ops_nolock(struct mtd_info *mtd, loff_t from,
 			(int)len);
 
 	if (ops->mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
+		oobsize = mtd->oobavail;
 	else
 		oobsize = mtd->oobsize;
 
@@ -1230,7 +1230,7 @@ static int onenand_read_ops_nolock(struct mtd_info *mtd, loff_t from,
 			(int)len);
 
 	if (ops->mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
+		oobsize = mtd->oobavail;
 	else
 		oobsize = mtd->oobsize;
 
@@ -1365,7 +1365,7 @@ static int onenand_read_oob_nolock(struct mtd_info *mtd, loff_t from,
 	ops->oobretlen = 0;
 
 	if (mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
+		oobsize = mtd->oobavail;
 	else
 		oobsize = mtd->oobsize;
 
@@ -1887,7 +1887,7 @@ static int onenand_write_ops_nolock(struct mtd_info *mtd, loff_t to,
 		return 0;
 
 	if (ops->mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
+		oobsize = mtd->oobavail;
 	else
 		oobsize = mtd->oobsize;
 
@@ -2063,7 +2063,7 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to,
 	ops->oobretlen = 0;
 
 	if (mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
+		oobsize = mtd->oobavail;
 	else
 		oobsize = mtd->oobsize;
 
@@ -4050,12 +4050,10 @@ int onenand_scan(struct mtd_info *mtd, int maxchips)
 	 * The number of bytes available for a client to place data into
 	 * the out of band area
 	 */
-	this->ecclayout->oobavail = 0;
+	mtd->oobavail = 0;
 	for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES &&
 	    this->ecclayout->oobfree[i].length; i++)
-		this->ecclayout->oobavail +=
-			this->ecclayout->oobfree[i].length;
-	mtd->oobavail = this->ecclayout->oobavail;
+		mtd->oobavail += this->ecclayout->oobfree[i].length;
 
 	mtd->ecclayout = this->ecclayout;
 	mtd->ecc_strength = 1;
diff --git a/drivers/mtd/tests/oobtest.c b/drivers/mtd/tests/oobtest.c
index 31762120eb56..1cb3f7758fb6 100644
--- a/drivers/mtd/tests/oobtest.c
+++ b/drivers/mtd/tests/oobtest.c
@@ -215,19 +215,19 @@ static int verify_eraseblock(int ebnum)
 			pr_info("ignoring error as within bitflip_limit\n");
 		}
 
-		if (use_offset != 0 || use_len < mtd->ecclayout->oobavail) {
+		if (use_offset != 0 || use_len < mtd->oobavail) {
 			int k;
 
 			ops.mode      = MTD_OPS_AUTO_OOB;
 			ops.len       = 0;
 			ops.retlen    = 0;
-			ops.ooblen    = mtd->ecclayout->oobavail;
+			ops.ooblen    = mtd->oobavail;
 			ops.oobretlen = 0;
 			ops.ooboffs   = 0;
 			ops.datbuf    = NULL;
 			ops.oobbuf    = readbuf;
 			err = mtd_read_oob(mtd, addr, &ops);
-			if (err || ops.oobretlen != mtd->ecclayout->oobavail) {
+			if (err || ops.oobretlen != mtd->oobavail) {
 				pr_err("error: readoob failed at %#llx\n",
 						(long long)addr);
 				errcnt += 1;
@@ -244,7 +244,7 @@ static int verify_eraseblock(int ebnum)
 			/* verify post-(use_offset + use_len) area for 0xff */
 			k = use_offset + use_len;
 			bitflips += memffshow(addr, k, readbuf + k,
-					      mtd->ecclayout->oobavail - k);
+					      mtd->oobavail - k);
 
 			if (bitflips > bitflip_limit) {
 				pr_err("error: verify failed at %#llx\n",
@@ -269,8 +269,8 @@ static int verify_eraseblock_in_one_go(int ebnum)
 	struct mtd_oob_ops ops;
 	int err = 0;
 	loff_t addr = (loff_t)ebnum * mtd->erasesize;
-	size_t len = mtd->ecclayout->oobavail * pgcnt;
-	size_t oobavail = mtd->ecclayout->oobavail;
+	size_t len = mtd->oobavail * pgcnt;
+	size_t oobavail = mtd->oobavail;
 	size_t bitflips;
 	int i;
 
@@ -394,8 +394,8 @@ static int __init mtd_oobtest_init(void)
 		goto out;
 
 	use_offset = 0;
-	use_len = mtd->ecclayout->oobavail;
-	use_len_max = mtd->ecclayout->oobavail;
+	use_len = mtd->oobavail;
+	use_len_max = mtd->oobavail;
 	vary_offset = 0;
 
 	/* First test: write all OOB, read it back and verify */
@@ -460,8 +460,8 @@ static int __init mtd_oobtest_init(void)
 
 	/* Write all eraseblocks */
 	use_offset = 0;
-	use_len = mtd->ecclayout->oobavail;
-	use_len_max = mtd->ecclayout->oobavail;
+	use_len = mtd->oobavail;
+	use_len_max = mtd->oobavail;
 	vary_offset = 1;
 	prandom_seed_state(&rnd_state, 5);
 
@@ -471,8 +471,8 @@ static int __init mtd_oobtest_init(void)
 
 	/* Check all eraseblocks */
 	use_offset = 0;
-	use_len = mtd->ecclayout->oobavail;
-	use_len_max = mtd->ecclayout->oobavail;
+	use_len = mtd->oobavail;
+	use_len_max = mtd->oobavail;
 	vary_offset = 1;
 	prandom_seed_state(&rnd_state, 5);
 	err = verify_all_eraseblocks();
@@ -480,8 +480,8 @@ static int __init mtd_oobtest_init(void)
 		goto out;
 
 	use_offset = 0;
-	use_len = mtd->ecclayout->oobavail;
-	use_len_max = mtd->ecclayout->oobavail;
+	use_len = mtd->oobavail;
+	use_len_max = mtd->oobavail;
 	vary_offset = 0;
 
 	/* Fourth test: try to write off end of device */
@@ -501,7 +501,7 @@ static int __init mtd_oobtest_init(void)
 	ops.retlen    = 0;
 	ops.ooblen    = 1;
 	ops.oobretlen = 0;
-	ops.ooboffs   = mtd->ecclayout->oobavail;
+	ops.ooboffs   = mtd->oobavail;
 	ops.datbuf    = NULL;
 	ops.oobbuf    = writebuf;
 	pr_info("attempting to start write past end of OOB\n");
@@ -521,7 +521,7 @@ static int __init mtd_oobtest_init(void)
 	ops.retlen    = 0;
 	ops.ooblen    = 1;
 	ops.oobretlen = 0;
-	ops.ooboffs   = mtd->ecclayout->oobavail;
+	ops.ooboffs   = mtd->oobavail;
 	ops.datbuf    = NULL;
 	ops.oobbuf    = readbuf;
 	pr_info("attempting to start read past end of OOB\n");
@@ -543,7 +543,7 @@ static int __init mtd_oobtest_init(void)
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail + 1;
+		ops.ooblen    = mtd->oobavail + 1;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 0;
 		ops.datbuf    = NULL;
@@ -563,7 +563,7 @@ static int __init mtd_oobtest_init(void)
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail + 1;
+		ops.ooblen    = mtd->oobavail + 1;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 0;
 		ops.datbuf    = NULL;
@@ -587,7 +587,7 @@ static int __init mtd_oobtest_init(void)
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail;
+		ops.ooblen    = mtd->oobavail;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 1;
 		ops.datbuf    = NULL;
@@ -607,7 +607,7 @@ static int __init mtd_oobtest_init(void)
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail;
+		ops.ooblen    = mtd->oobavail;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 1;
 		ops.datbuf    = NULL;
@@ -638,7 +638,7 @@ static int __init mtd_oobtest_init(void)
 	for (i = 0; i < ebcnt - 1; ++i) {
 		int cnt = 2;
 		int pg;
-		size_t sz = mtd->ecclayout->oobavail;
+		size_t sz = mtd->oobavail;
 		if (bbt[i] || bbt[i + 1])
 			continue;
 		addr = (loff_t)(i + 1) * mtd->erasesize - mtd->writesize;
@@ -673,13 +673,12 @@ static int __init mtd_oobtest_init(void)
 	for (i = 0; i < ebcnt - 1; ++i) {
 		if (bbt[i] || bbt[i + 1])
 			continue;
-		prandom_bytes_state(&rnd_state, writebuf,
-					mtd->ecclayout->oobavail * 2);
+		prandom_bytes_state(&rnd_state, writebuf, mtd->oobavail * 2);
 		addr = (loff_t)(i + 1) * mtd->erasesize - mtd->writesize;
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail * 2;
+		ops.ooblen    = mtd->oobavail * 2;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 0;
 		ops.datbuf    = NULL;
@@ -688,7 +687,7 @@ static int __init mtd_oobtest_init(void)
 		if (err)
 			goto out;
 		if (memcmpshow(addr, readbuf, writebuf,
-			       mtd->ecclayout->oobavail * 2)) {
+			       mtd->oobavail * 2)) {
 			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index 197d1124733d..67283766710f 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -49,7 +49,6 @@ static struct nand_ecclayout spinand_oob_64 = {
 		17, 18, 19, 20, 21, 22,
 		33, 34, 35, 36, 37, 38,
 		49, 50, 51, 52, 53, 54, },
-	.oobavail = 32,
 	.oobfree = {
 		{.offset = 8,
 			.length = 8},
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 5a3da3f52908..b25d28a21212 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1183,22 +1183,20 @@ void jffs2_dirty_trigger(struct jffs2_sb_info *c)
 
 int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
 {
-	struct nand_ecclayout *oinfo = c->mtd->ecclayout;
-
 	if (!c->mtd->oobsize)
 		return 0;
 
 	/* Cleanmarker is out-of-band, so inline size zero */
 	c->cleanmarker_size = 0;
 
-	if (!oinfo || oinfo->oobavail == 0) {
+	if (c->mtd->oobavail == 0) {
 		pr_err("inconsistent device description\n");
 		return -EINVAL;
 	}
 
 	jffs2_dbg(1, "using OOB on NAND\n");
 
-	c->oobavail = oinfo->oobavail;
+	c->oobavail = c->mtd->oobavail;
 
 	/* Initialise write buffer */
 	init_rwsem(&c->wbuf_sem);
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index cc84923011c0..9cf13c4bccc8 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -105,7 +105,6 @@ struct mtd_oob_ops {
 struct nand_ecclayout {
 	__u32 eccbytes;
 	__u32 eccpos[MTD_MAX_ECCPOS_ENTRIES_LARGE];
-	__u32 oobavail;
 	struct nand_oobfree oobfree[MTD_MAX_OOBFREE_ENTRIES_LARGE];
 };
 
-- 
cgit v1.2.3


From 29f1058a90b319b01c4cf469720e0350212d5c9c Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Mon, 7 Mar 2016 10:46:52 +0100
Subject: mtd: create an mtd_oobavail() helper and make use of it

Currently, all MTD drivers/sublayers exposing an OOB area are
doing the same kind of test to extract the available OOB size
based on the mtd_info and mtd_oob_ops structures.
Move this common logic into an inline function and make use of it.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Suggested-by: Priit Laes <plaes@plaes.org>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdpart.c              |  5 +----
 drivers/mtd/nand/nand_base.c       | 16 ++++------------
 drivers/mtd/onenand/onenand_base.c | 19 +++----------------
 include/linux/mtd/mtd.h            |  5 +++++
 4 files changed, 13 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 10bf304027dd..08de4b2cf0f5 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -126,10 +126,7 @@ static int part_read_oob(struct mtd_info *mtd, loff_t from,
 	if (ops->oobbuf) {
 		size_t len, pages;
 
-		if (ops->mode == MTD_OPS_AUTO_OOB)
-			len = mtd->oobavail;
-		else
-			len = mtd->oobsize;
+		len = mtd_oobavail(mtd, ops);
 		pages = mtd_div_by_ws(mtd->size, mtd);
 		pages -= mtd_div_by_ws(from, mtd);
 		if (ops->ooboffs + ops->ooblen > pages * len)
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 53993df0c25a..a36f15b314ba 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1723,8 +1723,7 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 	int ret = 0;
 	uint32_t readlen = ops->len;
 	uint32_t oobreadlen = ops->ooblen;
-	uint32_t max_oobsize = ops->mode == MTD_OPS_AUTO_OOB ?
-		mtd->oobavail : mtd->oobsize;
+	uint32_t max_oobsize = mtd_oobavail(mtd, ops);
 
 	uint8_t *bufpoi, *oob, *buf;
 	int use_bufpoi;
@@ -2075,10 +2074,7 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from,
 
 	stats = mtd->ecc_stats;
 
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		len = mtd->oobavail;
-	else
-		len = mtd->oobsize;
+	len = mtd_oobavail(mtd, ops);
 
 	if (unlikely(ops->ooboffs >= len)) {
 		pr_debug("%s: attempt to start read outside oob\n",
@@ -2575,8 +2571,7 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 	uint32_t writelen = ops->len;
 
 	uint32_t oobwritelen = ops->ooblen;
-	uint32_t oobmaxlen = ops->mode == MTD_OPS_AUTO_OOB ?
-				mtd->oobavail : mtd->oobsize;
+	uint32_t oobmaxlen = mtd_oobavail(mtd, ops);
 
 	uint8_t *oob = ops->oobbuf;
 	uint8_t *buf = ops->datbuf;
@@ -2766,10 +2761,7 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
 	pr_debug("%s: to = 0x%08x, len = %i\n",
 			 __func__, (unsigned int)to, (int)ops->ooblen);
 
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		len = mtd->oobavail;
-	else
-		len = mtd->oobsize;
+	len = mtd_oobavail(mtd, ops);
 
 	/* Do not allow write past end of page */
 	if ((ops->ooboffs + ops->ooblen) > len) {
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index df47537d1feb..af28bb3ae7cf 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1124,11 +1124,7 @@ static int onenand_mlc_read_ops_nolock(struct mtd_info *mtd, loff_t from,
 	pr_debug("%s: from = 0x%08x, len = %i\n", __func__, (unsigned int)from,
 			(int)len);
 
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		oobsize = mtd->oobavail;
-	else
-		oobsize = mtd->oobsize;
-
+	oobsize = mtd_oobavail(mtd, ops);
 	oobcolumn = from & (mtd->oobsize - 1);
 
 	/* Do not allow reads past end of device */
@@ -1229,11 +1225,7 @@ static int onenand_read_ops_nolock(struct mtd_info *mtd, loff_t from,
 	pr_debug("%s: from = 0x%08x, len = %i\n", __func__, (unsigned int)from,
 			(int)len);
 
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		oobsize = mtd->oobavail;
-	else
-		oobsize = mtd->oobsize;
-
+	oobsize = mtd_oobavail(mtd, ops);
 	oobcolumn = from & (mtd->oobsize - 1);
 
 	/* Do not allow reads past end of device */
@@ -1885,12 +1877,7 @@ static int onenand_write_ops_nolock(struct mtd_info *mtd, loff_t to,
 	/* Check zero length */
 	if (!len)
 		return 0;
-
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		oobsize = mtd->oobavail;
-	else
-		oobsize = mtd->oobsize;
-
+	oobsize = mtd_oobavail(mtd, ops);
 	oobcolumn = to & (mtd->oobsize - 1);
 
 	column = to & (mtd->writesize - 1);
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 9cf13c4bccc8..771272187316 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -264,6 +264,11 @@ static inline struct device_node *mtd_get_of_node(struct mtd_info *mtd)
 	return mtd->dev.of_node;
 }
 
+static inline int mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops)
+{
+	return ops->mode == MTD_OPS_AUTO_OOB ? mtd->oobavail : mtd->oobsize;
+}
+
 int mtd_erase(struct mtd_info *mtd, struct erase_info *instr);
 int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen,
 	      void **virt, resource_size_t *phys);
-- 
cgit v1.2.3


From a8c65d504e0b2256f7672506ae6ea68d88ef020a Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Mon, 7 Mar 2016 10:46:54 +0100
Subject: mtd: nand: simplify nand_bch_init() usage

nand_bch_init() requires several arguments which could directly be deduced
from the mtd device. Get rid of those useless parameters.

nand_bch_init() is also requiring the caller to provide a proper eccbytes
value, while this value could be deduced from the ecc.size and
ecc.strength value. Fallback to eccbytes calculation when it is set to 0.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/nand_base.c |  6 ++----
 drivers/mtd/nand/nand_bch.c  | 27 +++++++++++++++++----------
 drivers/mtd/nand/omap2.c     | 28 ++++++++++++----------------
 include/linux/mtd/nand_bch.h |  8 ++------
 4 files changed, 33 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index a36f15b314ba..191e4f75a513 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -4279,10 +4279,8 @@ int nand_scan_tail(struct mtd_info *mtd)
 		}
 
 		/* See nand_bch_init() for details. */
-		ecc->bytes = DIV_ROUND_UP(
-				ecc->strength * fls(8 * ecc->size), 8);
-		ecc->priv = nand_bch_init(mtd, ecc->size, ecc->bytes,
-					       &ecc->layout);
+		ecc->bytes = 0;
+		ecc->priv = nand_bch_init(mtd);
 		if (!ecc->priv) {
 			pr_warn("BCH ECC initialization failed!\n");
 			BUG();
diff --git a/drivers/mtd/nand/nand_bch.c b/drivers/mtd/nand/nand_bch.c
index a87c1b628dfc..b585bae37929 100644
--- a/drivers/mtd/nand/nand_bch.c
+++ b/drivers/mtd/nand/nand_bch.c
@@ -107,9 +107,6 @@ EXPORT_SYMBOL(nand_bch_correct_data);
 /**
  * nand_bch_init - [NAND Interface] Initialize NAND BCH error correction
  * @mtd:	MTD block structure
- * @eccsize:	ecc block size in bytes
- * @eccbytes:	ecc length in bytes
- * @ecclayout:	output default layout
  *
  * Returns:
  *  a pointer to a new NAND BCH control structure, or NULL upon failure
@@ -123,14 +120,21 @@ EXPORT_SYMBOL(nand_bch_correct_data);
  * @eccsize = 512  (thus, m=13 is the smallest integer such that 2^m-1 > 512*8)
  * @eccbytes = 7   (7 bytes are required to store m*t = 13*4 = 52 bits)
  */
-struct nand_bch_control *
-nand_bch_init(struct mtd_info *mtd, unsigned int eccsize, unsigned int eccbytes,
-	      struct nand_ecclayout **ecclayout)
+struct nand_bch_control *nand_bch_init(struct mtd_info *mtd)
 {
+	struct nand_chip *nand = mtd_to_nand(mtd);
 	unsigned int m, t, eccsteps, i;
-	struct nand_ecclayout *layout;
+	struct nand_ecclayout *layout = nand->ecc.layout;
 	struct nand_bch_control *nbc = NULL;
 	unsigned char *erased_page;
+	unsigned int eccsize = nand->ecc.size;
+	unsigned int eccbytes = nand->ecc.bytes;
+	unsigned int eccstrength = nand->ecc.strength;
+
+	if (!eccbytes && eccstrength) {
+		eccbytes = DIV_ROUND_UP(eccstrength * fls(8 * eccsize), 8);
+		nand->ecc.bytes = eccbytes;
+	}
 
 	if (!eccsize || !eccbytes) {
 		printk(KERN_WARNING "ecc parameters not supplied\n");
@@ -158,7 +162,7 @@ nand_bch_init(struct mtd_info *mtd, unsigned int eccsize, unsigned int eccbytes,
 	eccsteps = mtd->writesize/eccsize;
 
 	/* if no ecc placement scheme was provided, build one */
-	if (!*ecclayout) {
+	if (!layout) {
 
 		/* handle large page devices only */
 		if (mtd->oobsize < 64) {
@@ -184,7 +188,7 @@ nand_bch_init(struct mtd_info *mtd, unsigned int eccsize, unsigned int eccbytes,
 		layout->oobfree[0].offset = 2;
 		layout->oobfree[0].length = mtd->oobsize-2-layout->eccbytes;
 
-		*ecclayout = layout;
+		nand->ecc.layout = layout;
 	}
 
 	/* sanity checks */
@@ -192,7 +196,7 @@ nand_bch_init(struct mtd_info *mtd, unsigned int eccsize, unsigned int eccbytes,
 		printk(KERN_WARNING "eccsize %u is too large\n", eccsize);
 		goto fail;
 	}
-	if ((*ecclayout)->eccbytes != (eccsteps*eccbytes)) {
+	if (layout->eccbytes != (eccsteps*eccbytes)) {
 		printk(KERN_WARNING "invalid ecc layout\n");
 		goto fail;
 	}
@@ -216,6 +220,9 @@ nand_bch_init(struct mtd_info *mtd, unsigned int eccsize, unsigned int eccbytes,
 	for (i = 0; i < eccbytes; i++)
 		nbc->eccmask[i] ^= 0xff;
 
+	if (!eccstrength)
+		nand->ecc.strength = (eccbytes * 8) / fls(8 * eccsize);
+
 	return nbc;
 fail:
 	nand_bch_free(nbc);
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index c553f78ab83f..0749ca1a1456 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1807,13 +1807,19 @@ static int omap_nand_probe(struct platform_device *pdev)
 		goto return_error;
 	}
 
+	/*
+	 * Bail out earlier to let NAND_ECC_SOFT code create its own
+	 * ecclayout instead of using ours.
+	 */
+	if (info->ecc_opt == OMAP_ECC_HAM1_CODE_SW) {
+		nand_chip->ecc.mode = NAND_ECC_SOFT;
+		goto scan_tail;
+	}
+
 	/* populate MTD interface based on ECC scheme */
 	ecclayout		= &info->oobinfo;
+	nand_chip->ecc.layout	= ecclayout;
 	switch (info->ecc_opt) {
-	case OMAP_ECC_HAM1_CODE_SW:
-		nand_chip->ecc.mode = NAND_ECC_SOFT;
-		break;
-
 	case OMAP_ECC_HAM1_CODE_HW:
 		pr_info("nand: using OMAP_ECC_HAM1_CODE_HW\n");
 		nand_chip->ecc.mode             = NAND_ECC_HW;
@@ -1861,10 +1867,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 		ecclayout->oobfree->offset	= 1 +
 				ecclayout->eccpos[ecclayout->eccbytes - 1] + 1;
 		/* software bch library is used for locating errors */
-		nand_chip->ecc.priv		= nand_bch_init(mtd,
-							nand_chip->ecc.size,
-							nand_chip->ecc.bytes,
-							&ecclayout);
+		nand_chip->ecc.priv		= nand_bch_init(mtd);
 		if (!nand_chip->ecc.priv) {
 			dev_err(&info->pdev->dev, "unable to use BCH library\n");
 			err = -EINVAL;
@@ -1925,10 +1928,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 		ecclayout->oobfree->offset	= 1 +
 				ecclayout->eccpos[ecclayout->eccbytes - 1] + 1;
 		/* software bch library is used for locating errors */
-		nand_chip->ecc.priv		= nand_bch_init(mtd,
-							nand_chip->ecc.size,
-							nand_chip->ecc.bytes,
-							&ecclayout);
+		nand_chip->ecc.priv		= nand_bch_init(mtd);
 		if (!nand_chip->ecc.priv) {
 			dev_err(&info->pdev->dev, "unable to use BCH library\n");
 			err = -EINVAL;
@@ -2002,9 +2002,6 @@ static int omap_nand_probe(struct platform_device *pdev)
 		goto return_error;
 	}
 
-	if (info->ecc_opt == OMAP_ECC_HAM1_CODE_SW)
-		goto scan_tail;
-
 	/* all OOB bytes from oobfree->offset till end off OOB are free */
 	ecclayout->oobfree->length = mtd->oobsize - ecclayout->oobfree->offset;
 	/* check if NAND device's OOB is enough to store ECC signatures */
@@ -2015,7 +2012,6 @@ static int omap_nand_probe(struct platform_device *pdev)
 		err = -EINVAL;
 		goto return_error;
 	}
-	nand_chip->ecc.layout = ecclayout;
 
 scan_tail:
 	/* second phase scan */
diff --git a/include/linux/mtd/nand_bch.h b/include/linux/mtd/nand_bch.h
index fb0bc3420a10..98f20ef05d60 100644
--- a/include/linux/mtd/nand_bch.h
+++ b/include/linux/mtd/nand_bch.h
@@ -32,9 +32,7 @@ int nand_bch_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc,
 /*
  * Initialize BCH encoder/decoder
  */
-struct nand_bch_control *
-nand_bch_init(struct mtd_info *mtd, unsigned int eccsize,
-	      unsigned int eccbytes, struct nand_ecclayout **ecclayout);
+struct nand_bch_control *nand_bch_init(struct mtd_info *mtd);
 /*
  * Release BCH encoder/decoder resources
  */
@@ -58,9 +56,7 @@ nand_bch_correct_data(struct mtd_info *mtd, unsigned char *buf,
 	return -ENOTSUPP;
 }
 
-static inline struct nand_bch_control *
-nand_bch_init(struct mtd_info *mtd, unsigned int eccsize,
-	      unsigned int eccbytes, struct nand_ecclayout **ecclayout)
+static inline struct nand_bch_control *nand_bch_init(struct mtd_info *mtd)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From 3dd8012a8eeb3702fa17450ec1a16a3f38af138d Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Fri, 29 Jan 2016 11:25:36 -0800
Subject: mtd: spi-nor: add TB (Top/Bottom) protect support

Some flash support a bit in the status register that inverts protection
so that it applies to the bottom of the flash, not the top. This yields
additions to the protection range table, as noted in the comments.

Because this feature is not universal to all flash that support
lock/unlock, control it via a new flag.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Tested-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
---
 drivers/mtd/spi-nor/spi-nor.c | 70 ++++++++++++++++++++++++++++++++++++++-----
 include/linux/mtd/spi-nor.h   |  2 ++
 2 files changed, 65 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index d1edafc51cbb..2aebf18f7c36 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -70,6 +70,11 @@ struct flash_info {
 #define SPI_NOR_QUAD_READ	BIT(6)	/* Flash supports Quad Read */
 #define USE_FSR			BIT(7)	/* use flag status register */
 #define SPI_NOR_HAS_LOCK	BIT(8)	/* Flash supports lock/unlock via SR */
+#define SPI_NOR_HAS_TB		BIT(9)	/*
+					 * Flash SR has Top/Bottom (TB) protect
+					 * bit. Must be used with
+					 * SPI_NOR_HAS_LOCK.
+					 */
 };
 
 #define JEDEC_MFR(info)	((info)->id[0])
@@ -435,7 +440,10 @@ static void stm_get_locked_range(struct spi_nor *nor, u8 sr, loff_t *ofs,
 	} else {
 		pow = ((sr & mask) ^ mask) >> shift;
 		*len = mtd->size >> pow;
-		*ofs = mtd->size - *len;
+		if (nor->flags & SNOR_F_HAS_SR_TB && sr & SR_TB)
+			*ofs = 0;
+		else
+			*ofs = mtd->size - *len;
 	}
 }
 
@@ -476,12 +484,14 @@ static int stm_is_unlocked_sr(struct spi_nor *nor, loff_t ofs, uint64_t len,
 
 /*
  * Lock a region of the flash. Compatible with ST Micro and similar flash.
- * Supports only the block protection bits BP{0,1,2} in the status register
+ * Supports the block protection bits BP{0,1,2} in the status register
  * (SR). Does not support these features found in newer SR bitfields:
- *   - TB: top/bottom protect - only handle TB=0 (top protect)
  *   - SEC: sector/block protect - only handle SEC=0 (block protect)
  *   - CMP: complement protect - only support CMP=0 (range is not complemented)
  *
+ * Support for the following is provided conditionally for some flash:
+ *   - TB: top/bottom protect
+ *
  * Sample table portion for 8MB flash (Winbond w25q64fw):
  *
  *   SEC  |  TB   |  BP2  |  BP1  |  BP0  |  Prot Length  | Protected Portion
@@ -494,6 +504,13 @@ static int stm_is_unlocked_sr(struct spi_nor *nor, loff_t ofs, uint64_t len,
  *    0   |   0   |   1   |   0   |   1   |  2 MB         | Upper 1/4
  *    0   |   0   |   1   |   1   |   0   |  4 MB         | Upper 1/2
  *    X   |   X   |   1   |   1   |   1   |  8 MB         | ALL
+ *  ------|-------|-------|-------|-------|---------------|-------------------
+ *    0   |   1   |   0   |   0   |   1   |  128 KB       | Lower 1/64
+ *    0   |   1   |   0   |   1   |   0   |  256 KB       | Lower 1/32
+ *    0   |   1   |   0   |   1   |   1   |  512 KB       | Lower 1/16
+ *    0   |   1   |   1   |   0   |   0   |  1 MB         | Lower 1/8
+ *    0   |   1   |   1   |   0   |   1   |  2 MB         | Lower 1/4
+ *    0   |   1   |   1   |   1   |   0   |  4 MB         | Lower 1/2
  *
  * Returns negative on errors, 0 on success.
  */
@@ -504,6 +521,8 @@ static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 	u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
 	u8 shift = ffs(mask) - 1, pow, val;
 	loff_t lock_len;
+	bool can_be_top = true, can_be_bottom = nor->flags & SNOR_F_HAS_SR_TB;
+	bool use_top;
 	int ret;
 
 	status_old = read_sr(nor);
@@ -514,13 +533,26 @@ static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 	if (stm_is_locked_sr(nor, ofs, len, status_old))
 		return 0;
 
+	/* If anything below us is unlocked, we can't use 'bottom' protection */
+	if (!stm_is_locked_sr(nor, 0, ofs, status_old))
+		can_be_bottom = false;
+
 	/* If anything above us is unlocked, we can't use 'top' protection */
 	if (!stm_is_locked_sr(nor, ofs + len, mtd->size - (ofs + len),
 				status_old))
+		can_be_top = false;
+
+	if (!can_be_bottom && !can_be_top)
 		return -EINVAL;
 
+	/* Prefer top, if both are valid */
+	use_top = can_be_top;
+
 	/* lock_len: length of region that should end up locked */
-	lock_len = mtd->size - ofs;
+	if (use_top)
+		lock_len = mtd->size - ofs;
+	else
+		lock_len = ofs + len;
 
 	/*
 	 * Need smallest pow such that:
@@ -539,11 +571,14 @@ static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 	if (!(val & mask))
 		return -EINVAL;
 
-	status_new = (status_old & ~mask) | val;
+	status_new = (status_old & ~mask & ~SR_TB) | val;
 
 	/* Disallow further writes if WP pin is asserted */
 	status_new |= SR_SRWD;
 
+	if (!use_top)
+		status_new |= SR_TB;
+
 	/* Don't bother if they're the same */
 	if (status_new == status_old)
 		return 0;
@@ -571,6 +606,8 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 	u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
 	u8 shift = ffs(mask) - 1, pow, val;
 	loff_t lock_len;
+	bool can_be_top = true, can_be_bottom = nor->flags & SNOR_F_HAS_SR_TB;
+	bool use_top;
 	int ret;
 
 	status_old = read_sr(nor);
@@ -583,10 +620,24 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 
 	/* If anything below us is locked, we can't use 'top' protection */
 	if (!stm_is_unlocked_sr(nor, 0, ofs, status_old))
+		can_be_top = false;
+
+	/* If anything above us is locked, we can't use 'bottom' protection */
+	if (!stm_is_unlocked_sr(nor, ofs + len, mtd->size - (ofs + len),
+				status_old))
+		can_be_bottom = false;
+
+	if (!can_be_bottom && !can_be_top)
 		return -EINVAL;
 
+	/* Prefer top, if both are valid */
+	use_top = can_be_top;
+
 	/* lock_len: length of region that should remain locked */
-	lock_len = mtd->size - (ofs + len);
+	if (use_top)
+		lock_len = mtd->size - (ofs + len);
+	else
+		lock_len = ofs;
 
 	/*
 	 * Need largest pow such that:
@@ -607,12 +658,15 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 			return -EINVAL;
 	}
 
-	status_new = (status_old & ~mask) | val;
+	status_new = (status_old & ~mask & ~SR_TB) | val;
 
 	/* Don't protect status register if we're fully unlocked */
 	if (lock_len == mtd->size)
 		status_new &= ~SR_SRWD;
 
+	if (!use_top)
+		status_new |= SR_TB;
+
 	/* Don't bother if they're the same */
 	if (status_new == status_old)
 		return 0;
@@ -1277,6 +1331,8 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 
 	if (info->flags & USE_FSR)
 		nor->flags |= SNOR_F_USE_FSR;
+	if (info->flags & SPI_NOR_HAS_TB)
+		nor->flags |= SNOR_F_HAS_SR_TB;
 
 #ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
 	/* prefer "small sector" erase if possible */
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 62356d50815b..3c36113a88e1 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -85,6 +85,7 @@
 #define SR_BP0			BIT(2)	/* Block protect 0 */
 #define SR_BP1			BIT(3)	/* Block protect 1 */
 #define SR_BP2			BIT(4)	/* Block protect 2 */
+#define SR_TB			BIT(5)	/* Top/Bottom protect */
 #define SR_SRWD			BIT(7)	/* SR write protect */
 
 #define SR_QUAD_EN_MX		BIT(6)	/* Macronix Quad I/O */
@@ -116,6 +117,7 @@ enum spi_nor_ops {
 
 enum spi_nor_option_flags {
 	SNOR_F_USE_FSR		= BIT(0),
+	SNOR_F_HAS_SR_TB	= BIT(1),
 };
 
 /**
-- 
cgit v1.2.3


From c4a5923055c9e0c87dfc0387f7cda5ee2bbac3c1 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Thu, 10 Dec 2015 12:50:45 -0600
Subject: tracing: Add event record param to trigger_ops.func()

Some triggers may need access to the trace event, so pass it in.  Also
fix up the existing trigger funcs and their callers.

Link: http://lkml.kernel.org/r/543e31e9fc445ef61077421ab219033401c39846.1449767187.git.tom.zanussi@linux.intel.com

Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Tested-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h        |  7 ++++---
 kernel/trace/trace.h                |  6 ++++--
 kernel/trace/trace_events_trigger.c | 36 +++++++++++++++++++-----------------
 3 files changed, 27 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 925730bc9fc1..0d6930e941e8 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -430,7 +430,8 @@ extern int call_filter_check_discard(struct trace_event_call *call, void *rec,
 extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
 						   void *rec);
 extern void event_triggers_post_call(struct trace_event_file *file,
-				     enum event_trigger_type tt);
+				     enum event_trigger_type tt,
+				     void *rec);
 
 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
 
@@ -517,7 +518,7 @@ event_trigger_unlock_commit(struct trace_event_file *file,
 		trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc);
 
 	if (tt)
-		event_triggers_post_call(file, tt);
+		event_triggers_post_call(file, tt, entry);
 }
 
 /**
@@ -550,7 +551,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file,
 						irq_flags, pc, regs);
 
 	if (tt)
-		event_triggers_post_call(file, tt);
+		event_triggers_post_call(file, tt, entry);
 }
 
 #ifdef CONFIG_BPF_EVENTS
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b2bc956e2b0d..c10456e72106 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1201,7 +1201,8 @@ extern int register_event_command(struct event_command *cmd);
  * @func: The trigger 'probe' function called when the triggering
  *	event occurs.  The data passed into this callback is the data
  *	that was supplied to the event_command @reg() function that
- *	registered the trigger (see struct event_command).
+ *	registered the trigger (see struct event_command) along with
+ *	the trace record, rec.
  *
  * @init: An optional initialization function called for the trigger
  *	when the trigger is registered (via the event_command reg()
@@ -1226,7 +1227,8 @@ extern int register_event_command(struct event_command *cmd);
  *	(see trace_event_triggers.c).
  */
 struct event_trigger_ops {
-	void			(*func)(struct event_trigger_data *data);
+	void			(*func)(struct event_trigger_data *data,
+					void *rec);
 	int			(*init)(struct event_trigger_ops *ops,
 					struct event_trigger_data *data);
 	void			(*free)(struct event_trigger_ops *ops,
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index f40424f35dcb..0a62887c63c0 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -73,7 +73,7 @@ event_triggers_call(struct trace_event_file *file, void *rec)
 
 	list_for_each_entry_rcu(data, &file->triggers, list) {
 		if (!rec) {
-			data->ops->func(data);
+			data->ops->func(data, rec);
 			continue;
 		}
 		filter = rcu_dereference_sched(data->filter);
@@ -83,7 +83,7 @@ event_triggers_call(struct trace_event_file *file, void *rec)
 			tt |= data->cmd_ops->trigger_type;
 			continue;
 		}
-		data->ops->func(data);
+		data->ops->func(data, rec);
 	}
 	return tt;
 }
@@ -93,6 +93,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call);
  * event_triggers_post_call - Call 'post_triggers' for a trace event
  * @file: The trace_event_file associated with the event
  * @tt: enum event_trigger_type containing a set bit for each trigger to invoke
+ * @rec: The trace entry for the event
  *
  * For each trigger associated with an event, invoke the trigger
  * function registered with the associated trigger command, if the
@@ -103,13 +104,14 @@ EXPORT_SYMBOL_GPL(event_triggers_call);
  */
 void
 event_triggers_post_call(struct trace_event_file *file,
-			 enum event_trigger_type tt)
+			 enum event_trigger_type tt,
+			 void *rec)
 {
 	struct event_trigger_data *data;
 
 	list_for_each_entry_rcu(data, &file->triggers, list) {
 		if (data->cmd_ops->trigger_type & tt)
-			data->ops->func(data);
+			data->ops->func(data, rec);
 	}
 }
 EXPORT_SYMBOL_GPL(event_triggers_post_call);
@@ -745,7 +747,7 @@ int set_trigger_filter(char *filter_str,
 }
 
 static void
-traceon_trigger(struct event_trigger_data *data)
+traceon_trigger(struct event_trigger_data *data, void *rec)
 {
 	if (tracing_is_on())
 		return;
@@ -754,7 +756,7 @@ traceon_trigger(struct event_trigger_data *data)
 }
 
 static void
-traceon_count_trigger(struct event_trigger_data *data)
+traceon_count_trigger(struct event_trigger_data *data, void *rec)
 {
 	if (tracing_is_on())
 		return;
@@ -769,7 +771,7 @@ traceon_count_trigger(struct event_trigger_data *data)
 }
 
 static void
-traceoff_trigger(struct event_trigger_data *data)
+traceoff_trigger(struct event_trigger_data *data, void *rec)
 {
 	if (!tracing_is_on())
 		return;
@@ -778,7 +780,7 @@ traceoff_trigger(struct event_trigger_data *data)
 }
 
 static void
-traceoff_count_trigger(struct event_trigger_data *data)
+traceoff_count_trigger(struct event_trigger_data *data, void *rec)
 {
 	if (!tracing_is_on())
 		return;
@@ -874,13 +876,13 @@ static struct event_command trigger_traceoff_cmd = {
 
 #ifdef CONFIG_TRACER_SNAPSHOT
 static void
-snapshot_trigger(struct event_trigger_data *data)
+snapshot_trigger(struct event_trigger_data *data, void *rec)
 {
 	tracing_snapshot();
 }
 
 static void
-snapshot_count_trigger(struct event_trigger_data *data)
+snapshot_count_trigger(struct event_trigger_data *data, void *rec)
 {
 	if (!data->count)
 		return;
@@ -888,7 +890,7 @@ snapshot_count_trigger(struct event_trigger_data *data)
 	if (data->count != -1)
 		(data->count)--;
 
-	snapshot_trigger(data);
+	snapshot_trigger(data, rec);
 }
 
 static int
@@ -967,13 +969,13 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; }
 #define STACK_SKIP 3
 
 static void
-stacktrace_trigger(struct event_trigger_data *data)
+stacktrace_trigger(struct event_trigger_data *data, void *rec)
 {
 	trace_dump_stack(STACK_SKIP);
 }
 
 static void
-stacktrace_count_trigger(struct event_trigger_data *data)
+stacktrace_count_trigger(struct event_trigger_data *data, void *rec)
 {
 	if (!data->count)
 		return;
@@ -981,7 +983,7 @@ stacktrace_count_trigger(struct event_trigger_data *data)
 	if (data->count != -1)
 		(data->count)--;
 
-	stacktrace_trigger(data);
+	stacktrace_trigger(data, rec);
 }
 
 static int
@@ -1052,7 +1054,7 @@ struct enable_trigger_data {
 };
 
 static void
-event_enable_trigger(struct event_trigger_data *data)
+event_enable_trigger(struct event_trigger_data *data, void *rec)
 {
 	struct enable_trigger_data *enable_data = data->private_data;
 
@@ -1063,7 +1065,7 @@ event_enable_trigger(struct event_trigger_data *data)
 }
 
 static void
-event_enable_count_trigger(struct event_trigger_data *data)
+event_enable_count_trigger(struct event_trigger_data *data, void *rec)
 {
 	struct enable_trigger_data *enable_data = data->private_data;
 
@@ -1077,7 +1079,7 @@ event_enable_count_trigger(struct event_trigger_data *data)
 	if (data->count != -1)
 		(data->count)--;
 
-	event_enable_trigger(data);
+	event_enable_trigger(data, rec);
 }
 
 static int
-- 
cgit v1.2.3


From e4857982f49d21c05a84351b56724bf353022355 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 8 Mar 2016 11:51:26 -0500
Subject: cgroup: use css_set->mg_dst_cgrp for the migration target cgroup

Migration can be multi-target on the default hierarchy when a
controller is enabled - processes belonging to each child cgroup have
to be moved to the child cgroup itself to refresh css association.

This isn't a problem for cgroup_migrate_add_src() as each source
css_set still maps to single source and target cgroups; however,
cgroup_migrate_prepare_dst() is called once after all source css_sets
are added and thus might not have a single destination cgroup.  This
is currently worked around by specifying NULL for @dst_cgrp and using
the source's default cgroup as destination as the only multi-target
migration in use is self-targetting.  While this works, it's subtle
and clunky.

As all taget cgroups are already specified while preparing the source
css_sets, this clunkiness can easily be removed by recording the
target cgroup in each source css_set.  This patch adds
css_set->mg_dst_cgrp which is recorded on cgroup_migrate_src() and
used by cgroup_migrate_prepare_dst().  This also makes migration code
ready for arbitrary multi-target migration.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h |  9 +++++----
 kernel/cgroup.c             | 26 +++++++++++++-------------
 2 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index aae8c94de4b3..590291d56049 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -191,12 +191,13 @@ struct css_set {
 
 	/*
 	 * If this cset is acting as the source of migration the following
-	 * two fields are set.  mg_src_cgrp is the source cgroup of the
-	 * on-going migration and mg_dst_cset is the destination cset the
-	 * target tasks on this cset should be migrated to.  Protected by
-	 * cgroup_mutex.
+	 * two fields are set.  mg_src_cgrp and mg_dst_cgrp are
+	 * respectively the source and destination cgroups of the on-going
+	 * migration.  mg_dst_cset is the destination cset the target tasks
+	 * on this cset should be migrated to.  Protected by cgroup_mutex.
 	 */
 	struct cgroup *mg_src_cgrp;
+	struct cgroup *mg_dst_cgrp;
 	struct css_set *mg_dst_cset;
 
 	/*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5dd761355033..fbd3e99a4e98 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2473,6 +2473,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
 	spin_lock_bh(&css_set_lock);
 	list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
 		cset->mg_src_cgrp = NULL;
+		cset->mg_dst_cgrp = NULL;
 		cset->mg_dst_cset = NULL;
 		list_del_init(&cset->mg_preload_node);
 		put_css_set_locked(cset);
@@ -2511,32 +2512,31 @@ static void cgroup_migrate_add_src(struct css_set *src_cset,
 		return;
 
 	WARN_ON(src_cset->mg_src_cgrp);
+	WARN_ON(src_cset->mg_dst_cgrp);
 	WARN_ON(!list_empty(&src_cset->mg_tasks));
 	WARN_ON(!list_empty(&src_cset->mg_node));
 
 	src_cset->mg_src_cgrp = src_cgrp;
+	src_cset->mg_dst_cgrp = dst_cgrp;
 	get_css_set(src_cset);
 	list_add(&src_cset->mg_preload_node, preloaded_csets);
 }
 
 /**
  * cgroup_migrate_prepare_dst - prepare destination css_sets for migration
- * @dst_cgrp: the destination cgroup (may be %NULL)
  * @preloaded_csets: list of preloaded source css_sets
  *
- * Tasks are about to be moved to @dst_cgrp and all the source css_sets
- * have been preloaded to @preloaded_csets.  This function looks up and
- * pins all destination css_sets, links each to its source, and append them
- * to @preloaded_csets.  If @dst_cgrp is %NULL, the destination of each
- * source css_set is assumed to be its cgroup on the default hierarchy.
+ * Tasks are about to be moved and all the source css_sets have been
+ * preloaded to @preloaded_csets.  This function looks up and pins all
+ * destination css_sets, links each to its source, and append them to
+ * @preloaded_csets.
  *
  * This function must be called after cgroup_migrate_add_src() has been
  * called on each migration source css_set.  After migration is performed
  * using cgroup_migrate(), cgroup_migrate_finish() must be called on
  * @preloaded_csets.
  */
-static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
-				      struct list_head *preloaded_csets)
+static int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets)
 {
 	LIST_HEAD(csets);
 	struct css_set *src_cset, *tmp_cset;
@@ -2547,8 +2547,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
 	list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) {
 		struct css_set *dst_cset;
 
-		dst_cset = find_css_set(src_cset,
-					dst_cgrp ?: src_cset->dfl_cgrp);
+		dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
 		if (!dst_cset)
 			goto err;
 
@@ -2561,6 +2560,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
 		 */
 		if (src_cset == dst_cset) {
 			src_cset->mg_src_cgrp = NULL;
+			src_cset->mg_dst_cgrp = NULL;
 			list_del_init(&src_cset->mg_preload_node);
 			put_css_set(src_cset);
 			put_css_set(dst_cset);
@@ -2657,7 +2657,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
 	spin_unlock_bh(&css_set_lock);
 
 	/* prepare dst csets and commit */
-	ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets);
+	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
 	if (!ret)
 		ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
 
@@ -2916,7 +2916,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 	spin_unlock_bh(&css_set_lock);
 
 	/* NULL dst indicates self on default hierarchy */
-	ret = cgroup_migrate_prepare_dst(NULL, &preloaded_csets);
+	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
 	if (ret)
 		goto out_finish;
 
@@ -4156,7 +4156,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
 		cgroup_migrate_add_src(link->cset, to, &preloaded_csets);
 	spin_unlock_bh(&css_set_lock);
 
-	ret = cgroup_migrate_prepare_dst(to, &preloaded_csets);
+	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
 	if (ret)
 		goto out_err;
 
-- 
cgit v1.2.3


From f6d635ad341d5cc0b9c7ab46adfbf3bf5886cee4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 8 Mar 2016 11:51:26 -0500
Subject: cgroup: implement cgroup_subsys->implicit_on_dfl

Some controllers, perf_event for now and possibly freezer in the
future, don't really make sense to control explicitly through
"cgroup.subtree_control".  For example, the primary role of perf_event
is identifying the cgroups of tasks; however, because the controller
also keeps a small amount of state per cgroup, it can't be replaced
with simple cgroup membership tests.

This patch implements cgroup_subsys->implicit_on_dfl flag.  When set,
the controller is implicitly enabled on all cgroups on the v2
hierarchy so that utility type controllers such as perf_event can be
enabled and function transparently.

An implicit controller doesn't show up in "cgroup.controllers" or
"cgroup.subtree_control", is exempt from no internal process rule and
can be stolen from the default hierarchy even if there are non-root
csses.

v2: Reimplemented on top of the recent updates to css handling and
    subsystem rebinding.  Rebinding implicit subsystems is now a
    simple matter of exempting it from the busy subsystem check.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 13 +++++++++++++
 kernel/cgroup.c             | 38 +++++++++++++++++++++++++++++++-------
 2 files changed, 44 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 590291d56049..34b42f03fcd8 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -450,6 +450,19 @@ struct cgroup_subsys {
 
 	bool early_init:1;
 
+	/*
+	 * If %true, the controller, on the default hierarchy, doesn't show
+	 * up in "cgroup.controllers" or "cgroup.subtree_control", is
+	 * implicitly enabled on all cgroups on the default hierarchy, and
+	 * bypasses the "no internal process" constraint.  This is for
+	 * utility type controllers which is transparent to userland.
+	 *
+	 * An implicit controller can be stolen from the default hierarchy
+	 * anytime and thus must be okay with offline csses from previous
+	 * hierarchies coexisting with csses for the current one.
+	 */
+	bool implicit_on_dfl:1;
+
 	/*
 	 * If %false, this subsystem is properly hierarchical -
 	 * configuration, resource accounting and restriction on a parent
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index fbd3e99a4e98..e22df5d81e59 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -186,6 +186,9 @@ static u16 cgroup_no_v1_mask;
 /* some controllers are not supported in the default hierarchy */
 static u16 cgrp_dfl_inhibit_ss_mask;
 
+/* some controllers are implicitly enabled on the default hierarchy */
+static unsigned long cgrp_dfl_implicit_ss_mask;
+
 /* The list of hierarchy roots */
 
 static LIST_HEAD(cgroup_roots);
@@ -359,8 +362,8 @@ static u16 cgroup_control(struct cgroup *cgrp)
 		return parent->subtree_control;
 
 	if (cgroup_on_dfl(cgrp))
-		root_ss_mask &= ~cgrp_dfl_inhibit_ss_mask;
-
+		root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
+				  cgrp_dfl_implicit_ss_mask);
 	return root_ss_mask;
 }
 
@@ -1327,6 +1330,8 @@ static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
 
 	lockdep_assert_held(&cgroup_mutex);
 
+	cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
+
 	while (true) {
 		u16 new_ss_mask = cur_ss_mask;
 
@@ -1512,8 +1517,13 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
 	lockdep_assert_held(&cgroup_mutex);
 
 	do_each_subsys_mask(ss, ssid, ss_mask) {
-		/* if @ss has non-root csses attached to it, can't move */
-		if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)))
+		/*
+		 * If @ss has non-root csses attached to it, can't move.
+		 * If @ss is an implicit controller, it is exempt from this
+		 * rule and can be stolen.
+		 */
+		if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
+		    !ss->implicit_on_dfl)
 			return -EBUSY;
 
 		/* can't move between two non-dummy roots either */
@@ -3039,6 +3049,18 @@ static void cgroup_restore_control(struct cgroup *cgrp)
 	}
 }
 
+static bool css_visible(struct cgroup_subsys_state *css)
+{
+	struct cgroup_subsys *ss = css->ss;
+	struct cgroup *cgrp = css->cgroup;
+
+	if (cgroup_control(cgrp) & (1 << ss->id))
+		return true;
+	if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
+		return false;
+	return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
+}
+
 /**
  * cgroup_apply_control_enable - enable or show csses according to control
  * @cgrp: root of the target subtree
@@ -3074,7 +3096,7 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp)
 					return PTR_ERR(css);
 			}
 
-			if (cgroup_control(dsct) & (1 << ss->id)) {
+			if (css_visible(css)) {
 				ret = css_populate_dir(css);
 				if (ret)
 					return ret;
@@ -3117,7 +3139,7 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp)
 			if (css->parent &&
 			    !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
 				kill_css(css);
-			} else if (!(cgroup_control(dsct) & (1 << ss->id))) {
+			} else if (!css_visible(css)) {
 				css_clear_dir(css);
 				if (ss->css_reset)
 					ss->css_reset(css);
@@ -5455,7 +5477,9 @@ int __init cgroup_init(void)
 
 		cgrp_dfl_root.subsys_mask |= 1 << ss->id;
 
-		if (!ss->dfl_cftypes)
+		if (ss->implicit_on_dfl)
+			cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
+		else if (!ss->dfl_cftypes)
 			cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
 
 		if (ss->dfl_cftypes == ss->legacy_cftypes) {
-- 
cgit v1.2.3


From 0c0e0736acad4e76e718456c75d78ad95eea0011 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 1 Mar 2016 11:38:46 -0600
Subject: PCI: Set ROM shadow location in arch code, not in PCI core

IORESOURCE_ROM_SHADOW means there is a copy of a device's option ROM in
RAM.  The existence of such a copy and its location are arch-specific.
Previously the IORESOURCE_ROM_SHADOW flag was set in arch code, but the
0xC0000-0xDFFFF location was hard-coded into the PCI core.

If we're using a shadow copy in RAM, disable the ROM BAR and release the
address space it was consuming.  Move the location information from the PCI
core to the arch code that sets IORESOURCE_ROM_SHADOW.  Save the location
of the RAM copy in the struct resource for PCI_ROM_RESOURCE.

After this change, pci_map_rom() will call pci_assign_resource() and
pci_enable_rom() for these IORESOURCE_ROM_SHADOW resources, which we did
not do before.  This is safe because:

  - pci_assign_resource() will do nothing because the resource is marked
    IORESOURCE_PCI_FIXED, which means we can't move it, and

  - pci_enable_rom() will not turn on the ROM BAR's enable bit because the
    resource is marked IORESOURCE_ROM_SHADOW, which means it is in RAM
    rather than in PCI memory space.

Storing the location in the struct resource means "lspci" will show the
shadow location, not the value from the ROM BAR.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 arch/ia64/pci/fixup.c  | 22 ++++++++++++++++------
 arch/x86/pci/fixup.c   | 22 ++++++++++++++++------
 drivers/pci/rom.c      | 11 -----------
 include/linux/ioport.h |  2 +-
 4 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c
index fd9ceff2b5a5..41caa99add51 100644
--- a/arch/ia64/pci/fixup.c
+++ b/arch/ia64/pci/fixup.c
@@ -17,14 +17,14 @@
  *
  * The standard boot ROM sequence for an x86 machine uses the BIOS
  * to select an initial video card for boot display. This boot video
- * card will have it's BIOS copied to C0000 in system RAM.
+ * card will have its BIOS copied to 0xC0000 in system RAM.
  * IORESOURCE_ROM_SHADOW is used to associate the boot video
  * card with this copy. On laptops this copy has to be used since
  * the main ROM may be compressed or combined with another image.
  * See pci_map_rom() for use of this flag. Before marking the device
  * with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set
- * by either arch cde or vga-arbitration, if so only apply the fixup to this
- * already determined primary video card.
+ * by either arch code or vga-arbitration; if so only apply the fixup to this
+ * already-determined primary video card.
  */
 
 static void pci_fixup_video(struct pci_dev *pdev)
@@ -32,6 +32,7 @@ static void pci_fixup_video(struct pci_dev *pdev)
 	struct pci_dev *bridge;
 	struct pci_bus *bus;
 	u16 config;
+	struct resource *res;
 
 	if ((strcmp(ia64_platform_name, "dig") != 0)
 	    && (strcmp(ia64_platform_name, "hpzx1")  != 0))
@@ -61,9 +62,18 @@ static void pci_fixup_video(struct pci_dev *pdev)
 	if (!vga_default_device() || pdev == vga_default_device()) {
 		pci_read_config_word(pdev, PCI_COMMAND, &config);
 		if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
-			pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW |
-				IORESOURCE_PCI_FIXED;
-			dev_printk(KERN_DEBUG, &pdev->dev, "Video device with shadowed ROM\n");
+			res = &pdev->resource[PCI_ROM_RESOURCE];
+
+			pci_disable_rom(pdev);
+			if (res->parent)
+				release_resource(res);
+
+			res->start = 0xC0000;
+			res->end = res->start + 0x20000 - 1;
+			res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW |
+				     IORESOURCE_PCI_FIXED;
+			dev_info(&pdev->dev, "Video device with shadowed ROM at %pR\n",
+				 res);
 		}
 	}
 }
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index dac027c1adb1..b7de1929714b 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -297,14 +297,14 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_MCH_PC1,	pcie_r
  *
  * The standard boot ROM sequence for an x86 machine uses the BIOS
  * to select an initial video card for boot display. This boot video
- * card will have it's BIOS copied to C0000 in system RAM.
+ * card will have its BIOS copied to 0xC0000 in system RAM.
  * IORESOURCE_ROM_SHADOW is used to associate the boot video
  * card with this copy. On laptops this copy has to be used since
  * the main ROM may be compressed or combined with another image.
  * See pci_map_rom() for use of this flag. Before marking the device
  * with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set
- * by either arch cde or vga-arbitration, if so only apply the fixup to this
- * already determined primary video card.
+ * by either arch code or vga-arbitration; if so only apply the fixup to this
+ * already-determined primary video card.
  */
 
 static void pci_fixup_video(struct pci_dev *pdev)
@@ -312,6 +312,7 @@ static void pci_fixup_video(struct pci_dev *pdev)
 	struct pci_dev *bridge;
 	struct pci_bus *bus;
 	u16 config;
+	struct resource *res;
 
 	/* Is VGA routed to us? */
 	bus = pdev->bus;
@@ -336,9 +337,18 @@ static void pci_fixup_video(struct pci_dev *pdev)
 	if (!vga_default_device() || pdev == vga_default_device()) {
 		pci_read_config_word(pdev, PCI_COMMAND, &config);
 		if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
-			pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW |
-				IORESOURCE_PCI_FIXED;
-			dev_printk(KERN_DEBUG, &pdev->dev, "Video device with shadowed ROM\n");
+			res = &pdev->resource[PCI_ROM_RESOURCE];
+
+			pci_disable_rom(pdev);
+			if (res->parent)
+				release_resource(res);
+
+			res->start = 0xC0000;
+			res->end = res->start + 0x20000 - 1;
+			res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW |
+				     IORESOURCE_PCI_FIXED;
+			dev_info(&pdev->dev, "Video device with shadowed ROM at %pR\n",
+				 res);
 		}
 	}
 }
diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index 5da8d061d600..80e82b13292f 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c
@@ -128,16 +128,6 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size)
 	loff_t start;
 	void __iomem *rom;
 
-	/*
-	 * IORESOURCE_ROM_SHADOW set on x86, x86_64 and IA64 supports legacy
-	 * memory map if the VGA enable bit of the Bridge Control register is
-	 * set for embedded VGA.
-	 */
-	if (res->flags & IORESOURCE_ROM_SHADOW) {
-		/* primary video rom always starts here */
-		start = (loff_t)0xC0000;
-		*size = 0x20000; /* cover C000:0 through E000:0 */
-	} else {
 		if (res->flags &
 			(IORESOURCE_ROM_COPY | IORESOURCE_ROM_BIOS_COPY)) {
 			*size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
@@ -157,7 +147,6 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size)
 			if (pci_enable_rom(pdev))
 				return NULL;
 		}
-	}
 
 	rom = ioremap(start, *size);
 	if (!rom) {
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 24bea087e7af..2cf16673f17a 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -98,7 +98,7 @@ struct resource {
 
 /* PCI ROM control bits (IORESOURCE_BITS) */
 #define IORESOURCE_ROM_ENABLE		(1<<0)	/* ROM is enabled, same as PCI_ROM_ADDRESS_ENABLE */
-#define IORESOURCE_ROM_SHADOW		(1<<1)	/* ROM is copy at C000:0 */
+#define IORESOURCE_ROM_SHADOW		(1<<1)	/* Use RAM image, not ROM BAR */
 #define IORESOURCE_ROM_COPY		(1<<2)	/* ROM is alloc'd copy, resource field overlaid */
 #define IORESOURCE_ROM_BIOS_COPY	(1<<3)	/* ROM is BIOS copy, resource field overlaid */
 
-- 
cgit v1.2.3


From 4774f400935fbb44a25f5e8fd5c5583431e28ac1 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Tue, 8 Mar 2016 10:34:04 -0800
Subject: Input: ad7879 - move header to platform_data directory

The header file is used by the SPI and I2C variant of the driver.
Therefore, move it to a more generic place under platform_data.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 arch/blackfin/mach-bf527/boards/ezbrd.c    |  2 +-
 arch/blackfin/mach-bf527/boards/ezkit.c    |  2 +-
 arch/blackfin/mach-bf527/boards/tll6527m.c |  2 +-
 arch/blackfin/mach-bf537/boards/stamp.c    |  2 +-
 arch/blackfin/mach-bf538/boards/ezkit.c    |  2 +-
 drivers/input/touchscreen/ad7879.c         | 10 ++++----
 include/linux/platform_data/ad7879.h       | 41 ++++++++++++++++++++++++++++++
 include/linux/spi/ad7879.h                 | 41 ------------------------------
 8 files changed, 51 insertions(+), 51 deletions(-)
 create mode 100644 include/linux/platform_data/ad7879.h
 delete mode 100644 include/linux/spi/ad7879.h

(limited to 'include/linux')

diff --git a/arch/blackfin/mach-bf527/boards/ezbrd.c b/arch/blackfin/mach-bf527/boards/ezbrd.c
index a3a572352769..80bcfd1d023e 100644
--- a/arch/blackfin/mach-bf527/boards/ezbrd.c
+++ b/arch/blackfin/mach-bf527/boards/ezbrd.c
@@ -279,7 +279,7 @@ static const struct ad7877_platform_data bfin_ad7877_ts_info = {
 #endif
 
 #if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879)
-#include <linux/spi/ad7879.h>
+#include <linux/platform_data/ad7879.h>
 static const struct ad7879_platform_data bfin_ad7879_ts_info = {
 	.model			= 7879,	/* Model = AD7879 */
 	.x_plate_ohms		= 620,	/* 620 Ohm from the touch datasheet */
diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c
index d4219e8e5ab8..571edfd2ecf3 100644
--- a/arch/blackfin/mach-bf527/boards/ezkit.c
+++ b/arch/blackfin/mach-bf527/boards/ezkit.c
@@ -477,7 +477,7 @@ static const struct ad7877_platform_data bfin_ad7877_ts_info = {
 #endif
 
 #if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879)
-#include <linux/spi/ad7879.h>
+#include <linux/platform_data/ad7879.h>
 static const struct ad7879_platform_data bfin_ad7879_ts_info = {
 	.model			= 7879,	/* Model = AD7879 */
 	.x_plate_ohms		= 620,	/* 620 Ohm from the touch datasheet */
diff --git a/arch/blackfin/mach-bf527/boards/tll6527m.c b/arch/blackfin/mach-bf527/boards/tll6527m.c
index a0f5856a5ff8..c1acce4c2e45 100644
--- a/arch/blackfin/mach-bf527/boards/tll6527m.c
+++ b/arch/blackfin/mach-bf527/boards/tll6527m.c
@@ -29,7 +29,7 @@
 #include <asm/dpmc.h>
 
 #if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879)
-#include <linux/spi/ad7879.h>
+#include <linux/platform_data/ad7879.h>
 #define LCD_BACKLIGHT_GPIO 0x40
 /* TLL6527M uses TLL7UIQ35 / ADI LCD EZ Extender. AD7879 AUX GPIO is used for
  * LCD Backlight Enable
diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index 88a19fc9844d..cb1b6a9ee4f2 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -776,7 +776,7 @@ static const struct ad7877_platform_data bfin_ad7877_ts_info = {
 #endif
 
 #if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879)
-#include <linux/spi/ad7879.h>
+#include <linux/platform_data/ad7879.h>
 static const struct ad7879_platform_data bfin_ad7879_ts_info = {
 	.model			= 7879,	/* Model = AD7879 */
 	.x_plate_ohms		= 620,	/* 620 Ohm from the touch datasheet */
diff --git a/arch/blackfin/mach-bf538/boards/ezkit.c b/arch/blackfin/mach-bf538/boards/ezkit.c
index ae2fcbb00119..4a03c4465dbe 100644
--- a/arch/blackfin/mach-bf538/boards/ezkit.c
+++ b/arch/blackfin/mach-bf538/boards/ezkit.c
@@ -521,7 +521,7 @@ static struct bfin5xx_spi_chip spi_flash_chip_info = {
 #endif	/* CONFIG_SPI_BFIN5XX */
 
 #if IS_ENABLED(CONFIG_TOUCHSCREEN_AD7879)
-#include <linux/spi/ad7879.h>
+#include <linux/platform_data/ad7879.h>
 static const struct ad7879_platform_data bfin_ad7879_ts_info = {
 	.model			= 7879,	/* Model = AD7879 */
 	.x_plate_ohms		= 620,	/* 620 Ohm from the touch datasheet */
diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index fec66ad80513..b32dbf389777 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -31,7 +31,7 @@
 #include <linux/i2c.h>
 #include <linux/gpio.h>
 
-#include <linux/spi/ad7879.h>
+#include <linux/platform_data/ad7879.h>
 #include <linux/module.h>
 #include "ad7879.h"
 
@@ -170,10 +170,10 @@ static int ad7879_report(struct ad7879 *ts)
 	 * filter.  The combination of these two techniques provides a robust
 	 * solution, discarding the spurious noise in the signal and keeping
 	 * only the data of interest.  The size of both filters is
-	 * programmable. (dev.platform_data, see linux/spi/ad7879.h) Other
-	 * user-programmable conversion controls include variable acquisition
-	 * time, and first conversion delay. Up to 16 averages can be taken
-	 * per conversion.
+	 * programmable. (dev.platform_data, see linux/platform_data/ad7879.h)
+	 * Other user-programmable conversion controls include variable
+	 * acquisition time, and first conversion delay. Up to 16 averages can
+	 * be taken per conversion.
 	 */
 
 	if (likely(x && z1)) {
diff --git a/include/linux/platform_data/ad7879.h b/include/linux/platform_data/ad7879.h
new file mode 100644
index 000000000000..69e2e1fd2bc8
--- /dev/null
+++ b/include/linux/platform_data/ad7879.h
@@ -0,0 +1,41 @@
+/* linux/platform_data/ad7879.h */
+
+/* Touchscreen characteristics vary between boards and models.  The
+ * platform_data for the device's "struct device" holds this information.
+ *
+ * It's OK if the min/max values are zero.
+ */
+struct ad7879_platform_data {
+	u16	model;			/* 7879 */
+	u16	x_plate_ohms;
+	u16	x_min, x_max;
+	u16	y_min, y_max;
+	u16	pressure_min, pressure_max;
+
+	bool	swap_xy;		/* swap x and y axes */
+
+	/* [0..255] 0=OFF Starts at 1=550us and goes
+	 * all the way to 9.440ms in steps of 35us.
+	 */
+	u8	pen_down_acc_interval;
+	/* [0..15] Starts at 0=128us and goes all the
+	 * way to 4.096ms in steps of 128us.
+	 */
+	u8	first_conversion_delay;
+	/* [0..3] 0 = 2us, 1 = 4us, 2 = 8us, 3 = 16us */
+	u8	acquisition_time;
+	/* [0..3] Average X middle samples 0 = 2, 1 = 4, 2 = 8, 3 = 16 */
+	u8	averaging;
+	/* [0..3] Perform X measurements 0 = OFF,
+	 * 1 = 4, 2 = 8, 3 = 16 (median > averaging)
+	 */
+	u8	median;
+	/* 1 = AUX/VBAT/GPIO export GPIO to gpiolib
+	 * requires CONFIG_GPIOLIB
+	 */
+	bool	gpio_export;
+	/* identifies the first GPIO number handled by this chip;
+	 * or, if negative, requests dynamic ID allocation.
+	 */
+	s32	gpio_base;
+};
diff --git a/include/linux/spi/ad7879.h b/include/linux/spi/ad7879.h
deleted file mode 100644
index 58368be0b4c0..000000000000
--- a/include/linux/spi/ad7879.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* linux/spi/ad7879.h */
-
-/* Touchscreen characteristics vary between boards and models.  The
- * platform_data for the device's "struct device" holds this information.
- *
- * It's OK if the min/max values are zero.
- */
-struct ad7879_platform_data {
-	u16	model;			/* 7879 */
-	u16	x_plate_ohms;
-	u16	x_min, x_max;
-	u16	y_min, y_max;
-	u16	pressure_min, pressure_max;
-
-	bool	swap_xy;		/* swap x and y axes */
-
-	/* [0..255] 0=OFF Starts at 1=550us and goes
-	 * all the way to 9.440ms in steps of 35us.
-	 */
-	u8	pen_down_acc_interval;
-	/* [0..15] Starts at 0=128us and goes all the
-	 * way to 4.096ms in steps of 128us.
-	 */
-	u8	first_conversion_delay;
-	/* [0..3] 0 = 2us, 1 = 4us, 2 = 8us, 3 = 16us */
-	u8	acquisition_time;
-	/* [0..3] Average X middle samples 0 = 2, 1 = 4, 2 = 8, 3 = 16 */
-	u8	averaging;
-	/* [0..3] Perform X measurements 0 = OFF,
-	 * 1 = 4, 2 = 8, 3 = 16 (median > averaging)
-	 */
-	u8	median;
-	/* 1 = AUX/VBAT/GPIO export GPIO to gpiolib
-	 * requires CONFIG_GPIOLIB
-	 */
-	bool	gpio_export;
-	/* identifies the first GPIO number handled by this chip;
-	 * or, if negative, requests dynamic ID allocation.
-	 */
-	s32	gpio_base;
-};
-- 
cgit v1.2.3


From b121d1e74d1f24654bdc3165d3db1ca149501356 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Mon, 7 Mar 2016 21:57:13 -0800
Subject: bpf: prevent kprobe+bpf deadlocks

if kprobe is placed within update or delete hash map helpers
that hold bucket spin lock and triggered bpf program is trying to
grab the spinlock for the same bucket on the same cpu, it will
deadlock.
Fix it by extending existing recursion prevention mechanism.

Note, map_lookup and other tracing helpers don't have this problem,
since they don't hold any locks and don't modify global data.
bpf_trace_printk has its own recursive check and ok as well.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  3 +++
 kernel/bpf/syscall.c     | 13 +++++++++++++
 kernel/trace/bpf_trace.c |  2 --
 3 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 51e498e5470e..4b070827200d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -10,6 +10,7 @@
 #include <uapi/linux/bpf.h>
 #include <linux/workqueue.h>
 #include <linux/file.h>
+#include <linux/percpu.h>
 
 struct bpf_map;
 
@@ -163,6 +164,8 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
 
 #ifdef CONFIG_BPF_SYSCALL
+DECLARE_PER_CPU(int, bpf_prog_active);
+
 void bpf_register_prog_type(struct bpf_prog_type_list *tl);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c95a753c2007..dc99f6a000f5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -18,6 +18,8 @@
 #include <linux/filter.h>
 #include <linux/version.h>
 
+DEFINE_PER_CPU(int, bpf_prog_active);
+
 int sysctl_unprivileged_bpf_disabled __read_mostly;
 
 static LIST_HEAD(bpf_map_types);
@@ -347,6 +349,11 @@ static int map_update_elem(union bpf_attr *attr)
 	if (copy_from_user(value, uvalue, value_size) != 0)
 		goto free_value;
 
+	/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+	 * inside bpf map update or delete otherwise deadlocks are possible
+	 */
+	preempt_disable();
+	__this_cpu_inc(bpf_prog_active);
 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
 		err = bpf_percpu_hash_update(map, key, value, attr->flags);
 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
@@ -356,6 +363,8 @@ static int map_update_elem(union bpf_attr *attr)
 		err = map->ops->map_update_elem(map, key, value, attr->flags);
 		rcu_read_unlock();
 	}
+	__this_cpu_dec(bpf_prog_active);
+	preempt_enable();
 
 free_value:
 	kfree(value);
@@ -394,9 +403,13 @@ static int map_delete_elem(union bpf_attr *attr)
 	if (copy_from_user(key, ukey, map->key_size) != 0)
 		goto free_key;
 
+	preempt_disable();
+	__this_cpu_inc(bpf_prog_active);
 	rcu_read_lock();
 	err = map->ops->map_delete_elem(map, key);
 	rcu_read_unlock();
+	__this_cpu_dec(bpf_prog_active);
+	preempt_enable();
 
 free_key:
 	kfree(key);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 4b8caa392b86..3e4ffb3ace5f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -13,8 +13,6 @@
 #include <linux/ctype.h>
 #include "trace.h"
 
-static DEFINE_PER_CPU(int, bpf_prog_active);
-
 /**
  * trace_call_bpf - invoke BPF program
  * @prog: BPF program
-- 
cgit v1.2.3


From 6c90598174322b8888029e40dd84a4eb01f56afe Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Mon, 7 Mar 2016 21:57:15 -0800
Subject: bpf: pre-allocate hash map elements

If kprobe is placed on spin_unlock then calling kmalloc/kfree from
bpf programs is not safe, since the following dead lock is possible:
kfree->spin_lock(kmem_cache_node->lock)...spin_unlock->kprobe->
bpf_prog->map_update->kmalloc->spin_lock(of the same kmem_cache_node->lock)
and deadlocks.

The following solutions were considered and some implemented, but
eventually discarded
- kmem_cache_create for every map
- add recursion check to slow-path of slub
- use reserved memory in bpf_map_update for in_irq or in preempt_disabled
- kmalloc via irq_work

At the end pre-allocation of all map elements turned out to be the simplest
solution and since the user is charged upfront for all the memory, such
pre-allocation doesn't affect the user space visible behavior.

Since it's impossible to tell whether kprobe is triggered in a safe
location from kmalloc point of view, use pre-allocation by default
and introduce new BPF_F_NO_PREALLOC flag.

While testing of per-cpu hash maps it was discovered
that alloc_percpu(GFP_ATOMIC) has odd corner cases and often
fails to allocate memory even when 90% of it is free.
The pre-allocation of per-cpu hash elements solves this problem as well.

Turned out that bpf_map_update() quickly followed by
bpf_map_lookup()+bpf_map_delete() is very common pattern used
in many of iovisor/bcc/tools, so there is additional benefit of
pre-allocation, since such use cases are must faster.

Since all hash map elements are now pre-allocated we can remove
atomic increment of htab->count and save few more cycles.

Also add bpf_map_precharge_memlock() to check rlimit_memlock early to avoid
large malloc/free done by users who don't have sufficient limits.

Pre-allocation is done with vmalloc and alloc/free is done
via percpu_freelist. Here are performance numbers for different
pre-allocation algorithms that were implemented, but discarded
in favor of percpu_freelist:

1 cpu:
pcpu_ida	2.1M
pcpu_ida nolock	2.3M
bt		2.4M
kmalloc		1.8M
hlist+spinlock	2.3M
pcpu_freelist	2.6M

4 cpu:
pcpu_ida	1.5M
pcpu_ida nolock	1.8M
bt w/smp_align	1.7M
bt no/smp_align	1.1M
kmalloc		0.7M
hlist+spinlock	0.2M
pcpu_freelist	2.0M

8 cpu:
pcpu_ida	0.7M
bt w/smp_align	0.8M
kmalloc		0.4M
pcpu_freelist	1.5M

32 cpu:
kmalloc		0.13M
pcpu_freelist	0.49M

pcpu_ida nolock is a modified percpu_ida algorithm without
percpu_ida_cpu locks and without cross-cpu tag stealing.
It's faster than existing percpu_ida, but not as fast as pcpu_freelist.

bt is a variant of block/blk-mq-tag.c simlified and customized
for bpf use case. bt w/smp_align is using cache line for every 'long'
(similar to blk-mq-tag). bt no/smp_align allocates 'long'
bitmasks continuously to save memory. It's comparable to percpu_ida
and in some cases faster, but slower than percpu_freelist

hlist+spinlock is the simplest free list with single spinlock.
As expeceted it has very bad scaling in SMP.

kmalloc is existing implementation which is still available via
BPF_F_NO_PREALLOC flag. It's significantly slower in single cpu and
in 8 cpu setup it's 3 times slower than pre-allocation with pcpu_freelist,
but saves memory, so in cases where map->max_entries can be large
and number of map update/delete per second is low, it may make
sense to use it.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |   2 +
 include/uapi/linux/bpf.h |   3 +
 kernel/bpf/hashtab.c     | 240 +++++++++++++++++++++++++++++++++--------------
 kernel/bpf/syscall.c     |  15 ++-
 4 files changed, 186 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4b070827200d..efd1d4ca95c6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -37,6 +37,7 @@ struct bpf_map {
 	u32 key_size;
 	u32 value_size;
 	u32 max_entries;
+	u32 map_flags;
 	u32 pages;
 	struct user_struct *user;
 	const struct bpf_map_ops *ops;
@@ -178,6 +179,7 @@ struct bpf_map *__bpf_map_get(struct fd f);
 void bpf_map_inc(struct bpf_map *map, bool uref);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
+int bpf_map_precharge_memlock(u32 pages);
 
 extern int sysctl_unprivileged_bpf_disabled;
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9221f653fee3..0e30b19012a5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -101,12 +101,15 @@ enum bpf_prog_type {
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
 #define BPF_EXIST	2 /* update existing element */
 
+#define BPF_F_NO_PREALLOC	(1U << 0)
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
 		__u32	key_size;	/* size of key in bytes */
 		__u32	value_size;	/* size of value in bytes */
 		__u32	max_entries;	/* max number of entries in a map */
+		__u32	map_flags;	/* prealloc or not */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index a68e95133fcd..fff3650d52fc 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1,4 +1,5 @@
 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -13,6 +14,7 @@
 #include <linux/jhash.h>
 #include <linux/filter.h>
 #include <linux/vmalloc.h>
+#include "percpu_freelist.h"
 
 struct bucket {
 	struct hlist_head head;
@@ -22,6 +24,8 @@ struct bucket {
 struct bpf_htab {
 	struct bpf_map map;
 	struct bucket *buckets;
+	void *elems;
+	struct pcpu_freelist freelist;
 	atomic_t count;	/* number of elements in this hashtable */
 	u32 n_buckets;	/* number of hash buckets */
 	u32 elem_size;	/* size of each element in bytes */
@@ -29,15 +33,86 @@ struct bpf_htab {
 
 /* each htab element is struct htab_elem + key + value */
 struct htab_elem {
-	struct hlist_node hash_node;
-	struct rcu_head rcu;
 	union {
-		u32 hash;
-		u32 key_size;
+		struct hlist_node hash_node;
+		struct bpf_htab *htab;
+		struct pcpu_freelist_node fnode;
 	};
+	struct rcu_head rcu;
+	u32 hash;
 	char key[0] __aligned(8);
 };
 
+static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
+				     void __percpu *pptr)
+{
+	*(void __percpu **)(l->key + key_size) = pptr;
+}
+
+static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
+{
+	return *(void __percpu **)(l->key + key_size);
+}
+
+static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
+{
+	return (struct htab_elem *) (htab->elems + i * htab->elem_size);
+}
+
+static void htab_free_elems(struct bpf_htab *htab)
+{
+	int i;
+
+	if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
+		goto free_elems;
+
+	for (i = 0; i < htab->map.max_entries; i++) {
+		void __percpu *pptr;
+
+		pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
+					 htab->map.key_size);
+		free_percpu(pptr);
+	}
+free_elems:
+	vfree(htab->elems);
+}
+
+static int prealloc_elems_and_freelist(struct bpf_htab *htab)
+{
+	int err = -ENOMEM, i;
+
+	htab->elems = vzalloc(htab->elem_size * htab->map.max_entries);
+	if (!htab->elems)
+		return -ENOMEM;
+
+	if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
+		goto skip_percpu_elems;
+
+	for (i = 0; i < htab->map.max_entries; i++) {
+		u32 size = round_up(htab->map.value_size, 8);
+		void __percpu *pptr;
+
+		pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN);
+		if (!pptr)
+			goto free_elems;
+		htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
+				  pptr);
+	}
+
+skip_percpu_elems:
+	err = pcpu_freelist_init(&htab->freelist);
+	if (err)
+		goto free_elems;
+
+	pcpu_freelist_populate(&htab->freelist, htab->elems, htab->elem_size,
+			       htab->map.max_entries);
+	return 0;
+
+free_elems:
+	htab_free_elems(htab);
+	return err;
+}
+
 /* Called from syscall */
 static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 {
@@ -46,6 +121,10 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	int err, i;
 	u64 cost;
 
+	if (attr->map_flags & ~BPF_F_NO_PREALLOC)
+		/* reserved bits should not be used */
+		return ERR_PTR(-EINVAL);
+
 	htab = kzalloc(sizeof(*htab), GFP_USER);
 	if (!htab)
 		return ERR_PTR(-ENOMEM);
@@ -55,6 +134,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	htab->map.key_size = attr->key_size;
 	htab->map.value_size = attr->value_size;
 	htab->map.max_entries = attr->max_entries;
+	htab->map.map_flags = attr->map_flags;
 
 	/* check sanity of attributes.
 	 * value_size == 0 may be allowed in the future to use map as a set
@@ -92,7 +172,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	if (percpu)
 		htab->elem_size += sizeof(void *);
 	else
-		htab->elem_size += htab->map.value_size;
+		htab->elem_size += round_up(htab->map.value_size, 8);
 
 	/* prevent zero size kmalloc and check for u32 overflow */
 	if (htab->n_buckets == 0 ||
@@ -112,6 +192,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 
 	htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
 
+	/* if map size is larger than memlock limit, reject it early */
+	err = bpf_map_precharge_memlock(htab->map.pages);
+	if (err)
+		goto free_htab;
+
 	err = -ENOMEM;
 	htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct bucket),
 				      GFP_USER | __GFP_NOWARN);
@@ -127,10 +212,16 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 		raw_spin_lock_init(&htab->buckets[i].lock);
 	}
 
-	atomic_set(&htab->count, 0);
+	if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
+		err = prealloc_elems_and_freelist(htab);
+		if (err)
+			goto free_buckets;
+	}
 
 	return &htab->map;
 
+free_buckets:
+	kvfree(htab->buckets);
 free_htab:
 	kfree(htab);
 	return ERR_PTR(err);
@@ -249,42 +340,42 @@ find_first_elem:
 		}
 	}
 
-	/* itereated over all buckets and all elements */
+	/* iterated over all buckets and all elements */
 	return -ENOENT;
 }
 
-
-static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
-				     void __percpu *pptr)
-{
-	*(void __percpu **)(l->key + key_size) = pptr;
-}
-
-static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
-{
-	return *(void __percpu **)(l->key + key_size);
-}
-
-static void htab_percpu_elem_free(struct htab_elem *l)
+static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
 {
-	free_percpu(htab_elem_get_ptr(l, l->key_size));
+	if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
+		free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
 	kfree(l);
+
 }
 
-static void htab_percpu_elem_free_rcu(struct rcu_head *head)
+static void htab_elem_free_rcu(struct rcu_head *head)
 {
 	struct htab_elem *l = container_of(head, struct htab_elem, rcu);
+	struct bpf_htab *htab = l->htab;
 
-	htab_percpu_elem_free(l);
+	/* must increment bpf_prog_active to avoid kprobe+bpf triggering while
+	 * we're calling kfree, otherwise deadlock is possible if kprobes
+	 * are placed somewhere inside of slub
+	 */
+	preempt_disable();
+	__this_cpu_inc(bpf_prog_active);
+	htab_elem_free(htab, l);
+	__this_cpu_dec(bpf_prog_active);
+	preempt_enable();
 }
 
-static void free_htab_elem(struct htab_elem *l, bool percpu, u32 key_size)
+static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 {
-	if (percpu) {
-		l->key_size = key_size;
-		call_rcu(&l->rcu, htab_percpu_elem_free_rcu);
+	if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
+		pcpu_freelist_push(&htab->freelist, &l->fnode);
 	} else {
-		kfree_rcu(l, rcu);
+		atomic_dec(&htab->count);
+		l->htab = htab;
+		call_rcu(&l->rcu, htab_elem_free_rcu);
 	}
 }
 
@@ -293,23 +384,39 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 					 bool percpu, bool onallcpus)
 {
 	u32 size = htab->map.value_size;
+	bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
 	struct htab_elem *l_new;
 	void __percpu *pptr;
 
-	l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
-	if (!l_new)
-		return NULL;
+	if (prealloc) {
+		l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
+		if (!l_new)
+			return ERR_PTR(-E2BIG);
+	} else {
+		if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
+			atomic_dec(&htab->count);
+			return ERR_PTR(-E2BIG);
+		}
+		l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
+		if (!l_new)
+			return ERR_PTR(-ENOMEM);
+	}
 
 	memcpy(l_new->key, key, key_size);
 	if (percpu) {
 		/* round up value_size to 8 bytes */
 		size = round_up(size, 8);
 
-		/* alloc_percpu zero-fills */
-		pptr = __alloc_percpu_gfp(size, 8, GFP_ATOMIC | __GFP_NOWARN);
-		if (!pptr) {
-			kfree(l_new);
-			return NULL;
+		if (prealloc) {
+			pptr = htab_elem_get_ptr(l_new, key_size);
+		} else {
+			/* alloc_percpu zero-fills */
+			pptr = __alloc_percpu_gfp(size, 8,
+						  GFP_ATOMIC | __GFP_NOWARN);
+			if (!pptr) {
+				kfree(l_new);
+				return ERR_PTR(-ENOMEM);
+			}
 		}
 
 		if (!onallcpus) {
@@ -324,7 +431,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 				off += size;
 			}
 		}
-		htab_elem_set_ptr(l_new, key_size, pptr);
+		if (!prealloc)
+			htab_elem_set_ptr(l_new, key_size, pptr);
 	} else {
 		memcpy(l_new->key + round_up(key_size, 8), value, size);
 	}
@@ -336,12 +444,6 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
 		       u64 map_flags)
 {
-	if (!l_old && unlikely(atomic_read(&htab->count) >= htab->map.max_entries))
-		/* if elem with this 'key' doesn't exist and we've reached
-		 * max_entries limit, fail insertion of new elem
-		 */
-		return -E2BIG;
-
 	if (l_old && map_flags == BPF_NOEXIST)
 		/* elem already exists */
 		return -EEXIST;
@@ -375,13 +477,6 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 
 	hash = htab_map_hash(key, key_size);
 
-	/* allocate new element outside of the lock, since
-	 * we're most likley going to insert it
-	 */
-	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
-	if (!l_new)
-		return -ENOMEM;
-
 	b = __select_bucket(htab, hash);
 	head = &b->head;
 
@@ -394,21 +489,24 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	if (ret)
 		goto err;
 
+	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
+	if (IS_ERR(l_new)) {
+		/* all pre-allocated elements are in use or memory exhausted */
+		ret = PTR_ERR(l_new);
+		goto err;
+	}
+
 	/* add new element to the head of the list, so that
 	 * concurrent search will find it before old elem
 	 */
 	hlist_add_head_rcu(&l_new->hash_node, head);
 	if (l_old) {
 		hlist_del_rcu(&l_old->hash_node);
-		kfree_rcu(l_old, rcu);
-	} else {
-		atomic_inc(&htab->count);
+		free_htab_elem(htab, l_old);
 	}
-	raw_spin_unlock_irqrestore(&b->lock, flags);
-	return 0;
+	ret = 0;
 err:
 	raw_spin_unlock_irqrestore(&b->lock, flags);
-	kfree(l_new);
 	return ret;
 }
 
@@ -466,12 +564,11 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 	} else {
 		l_new = alloc_htab_elem(htab, key, value, key_size,
 					hash, true, onallcpus);
-		if (!l_new) {
-			ret = -ENOMEM;
+		if (IS_ERR(l_new)) {
+			ret = PTR_ERR(l_new);
 			goto err;
 		}
 		hlist_add_head_rcu(&l_new->hash_node, head);
-		atomic_inc(&htab->count);
 	}
 	ret = 0;
 err:
@@ -489,7 +586,6 @@ static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 static int htab_map_delete_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	bool percpu = map->map_type == BPF_MAP_TYPE_PERCPU_HASH;
 	struct hlist_head *head;
 	struct bucket *b;
 	struct htab_elem *l;
@@ -511,8 +607,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
 
 	if (l) {
 		hlist_del_rcu(&l->hash_node);
-		atomic_dec(&htab->count);
-		free_htab_elem(l, percpu, key_size);
+		free_htab_elem(htab, l);
 		ret = 0;
 	}
 
@@ -531,17 +626,10 @@ static void delete_all_elements(struct bpf_htab *htab)
 
 		hlist_for_each_entry_safe(l, n, head, hash_node) {
 			hlist_del_rcu(&l->hash_node);
-			atomic_dec(&htab->count);
-			if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) {
-				l->key_size = htab->map.key_size;
-				htab_percpu_elem_free(l);
-			} else {
-				kfree(l);
-			}
+			htab_elem_free(htab, l);
 		}
 	}
 }
-
 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
 static void htab_map_free(struct bpf_map *map)
 {
@@ -554,10 +642,16 @@ static void htab_map_free(struct bpf_map *map)
 	 */
 	synchronize_rcu();
 
-	/* some of kfree_rcu() callbacks for elements of this map may not have
-	 * executed. It's ok. Proceed to free residual elements and map itself
+	/* some of free_htab_elem() callbacks for elements of this map may
+	 * not have executed. Wait for them.
 	 */
-	delete_all_elements(htab);
+	rcu_barrier();
+	if (htab->map.map_flags & BPF_F_NO_PREALLOC) {
+		delete_all_elements(htab);
+	} else {
+		htab_free_elems(htab);
+		pcpu_freelist_destroy(&htab->freelist);
+	}
 	kvfree(htab->buckets);
 	kfree(htab);
 }
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index dc99f6a000f5..cbd94b2144ff 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -48,6 +48,19 @@ void bpf_register_map_type(struct bpf_map_type_list *tl)
 	list_add(&tl->list_node, &bpf_map_types);
 }
 
+int bpf_map_precharge_memlock(u32 pages)
+{
+	struct user_struct *user = get_current_user();
+	unsigned long memlock_limit, cur;
+
+	memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	cur = atomic_long_read(&user->locked_vm);
+	free_uid(user);
+	if (cur + pages > memlock_limit)
+		return -EPERM;
+	return 0;
+}
+
 static int bpf_map_charge_memlock(struct bpf_map *map)
 {
 	struct user_struct *user = get_current_user();
@@ -153,7 +166,7 @@ int bpf_map_new_fd(struct bpf_map *map)
 		   offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
 		   sizeof(attr->CMD##_LAST_FIELD)) != NULL
 
-#define BPF_MAP_CREATE_LAST_FIELD max_entries
+#define BPF_MAP_CREATE_LAST_FIELD map_flags
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
 {
-- 
cgit v1.2.3


From 557c0c6e7df8e14a46bd7560d193fa5bbc00a858 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Mon, 7 Mar 2016 21:57:17 -0800
Subject: bpf: convert stackmap to pre-allocation

It was observed that calling bpf_get_stackid() from a kprobe inside
slub or from spin_unlock causes similar deadlock as with hashmap,
therefore convert stackmap to use pre-allocated memory.

The call_rcu is no longer feasible mechanism, since delayed freeing
causes bpf_get_stackid() to fail unpredictably when number of actual
stacks is significantly less than user requested max_entries.
Since elements are no longer freed into slub, we can push elements into
freelist immediately and let them be recycled.
However the very unlikley race between user space map_lookup() and
program-side recycling is possible:
     cpu0                          cpu1
     ----                          ----
user does lookup(stackidX)
starts copying ips into buffer
                                   delete(stackidX)
                                   calls bpf_get_stackid()
				   which recyles the element and
                                   overwrites with new stack trace

To avoid user space seeing a partial stack trace consisting of two
merged stack traces, do bucket = xchg(, NULL); copy; xchg(,bucket);
to preserve consistent stack trace delivery to user space.
Now we can move memset(,0) of left-over element value from critical
path of bpf_get_stackid() into slow-path of user space lookup.
Also disallow lookup() from bpf program, since it's useless and
program shouldn't be messing with collected stack trace.

Note that similar race between user space lookup and kernel side updates
is also present in hashmap, but it's not a new race. bpf programs were
always allowed to modify hash and array map elements while user space
is copying them.

Fixes: d5a3b1f69186 ("bpf: introduce BPF_MAP_TYPE_STACK_TRACE")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   |  1 +
 kernel/bpf/stackmap.c | 86 ++++++++++++++++++++++++++++++++++++++++-----------
 kernel/bpf/syscall.c  |  2 ++
 3 files changed, 71 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index efd1d4ca95c6..21ee41b92e8a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -195,6 +195,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
 			   u64 flags);
 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
 			    u64 flags);
+int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
 
 /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
  * forced to use 'long' read/writes to try to atomically copy long counters.
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index f0a02c344358..499d9e933f8e 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -10,9 +10,10 @@
 #include <linux/vmalloc.h>
 #include <linux/stacktrace.h>
 #include <linux/perf_event.h>
+#include "percpu_freelist.h"
 
 struct stack_map_bucket {
-	struct rcu_head rcu;
+	struct pcpu_freelist_node fnode;
 	u32 hash;
 	u32 nr;
 	u64 ip[];
@@ -20,10 +21,34 @@ struct stack_map_bucket {
 
 struct bpf_stack_map {
 	struct bpf_map map;
+	void *elems;
+	struct pcpu_freelist freelist;
 	u32 n_buckets;
-	struct stack_map_bucket __rcu *buckets[];
+	struct stack_map_bucket *buckets[];
 };
 
+static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
+{
+	u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
+	int err;
+
+	smap->elems = vzalloc(elem_size * smap->map.max_entries);
+	if (!smap->elems)
+		return -ENOMEM;
+
+	err = pcpu_freelist_init(&smap->freelist);
+	if (err)
+		goto free_elems;
+
+	pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size,
+			       smap->map.max_entries);
+	return 0;
+
+free_elems:
+	vfree(smap->elems);
+	return err;
+}
+
 /* Called from syscall */
 static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 {
@@ -70,12 +95,22 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 	smap->n_buckets = n_buckets;
 	smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
 
+	err = bpf_map_precharge_memlock(smap->map.pages);
+	if (err)
+		goto free_smap;
+
 	err = get_callchain_buffers();
 	if (err)
 		goto free_smap;
 
+	err = prealloc_elems_and_freelist(smap);
+	if (err)
+		goto put_buffers;
+
 	return &smap->map;
 
+put_buffers:
+	put_callchain_buffers();
 free_smap:
 	kvfree(smap);
 	return ERR_PTR(err);
@@ -121,7 +156,7 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
 	ips = trace->ip + skip + init_nr;
 	hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
 	id = hash & (smap->n_buckets - 1);
-	bucket = rcu_dereference(smap->buckets[id]);
+	bucket = READ_ONCE(smap->buckets[id]);
 
 	if (bucket && bucket->hash == hash) {
 		if (flags & BPF_F_FAST_STACK_CMP)
@@ -135,19 +170,18 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
 	if (bucket && !(flags & BPF_F_REUSE_STACKID))
 		return -EEXIST;
 
-	new_bucket = kmalloc(sizeof(struct stack_map_bucket) + map->value_size,
-			     GFP_ATOMIC | __GFP_NOWARN);
+	new_bucket = (struct stack_map_bucket *)
+		pcpu_freelist_pop(&smap->freelist);
 	if (unlikely(!new_bucket))
 		return -ENOMEM;
 
 	memcpy(new_bucket->ip, ips, trace_len);
-	memset(new_bucket->ip + trace_len / 8, 0, map->value_size - trace_len);
 	new_bucket->hash = hash;
 	new_bucket->nr = trace_nr;
 
 	old_bucket = xchg(&smap->buckets[id], new_bucket);
 	if (old_bucket)
-		kfree_rcu(old_bucket, rcu);
+		pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
 	return id;
 }
 
@@ -160,17 +194,34 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
-/* Called from syscall or from eBPF program */
+/* Called from eBPF program */
 static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	return NULL;
+}
+
+/* Called from syscall */
+int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 {
 	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
-	struct stack_map_bucket *bucket;
-	u32 id = *(u32 *)key;
+	struct stack_map_bucket *bucket, *old_bucket;
+	u32 id = *(u32 *)key, trace_len;
 
 	if (unlikely(id >= smap->n_buckets))
-		return NULL;
-	bucket = rcu_dereference(smap->buckets[id]);
-	return bucket ? bucket->ip : NULL;
+		return -ENOENT;
+
+	bucket = xchg(&smap->buckets[id], NULL);
+	if (!bucket)
+		return -ENOENT;
+
+	trace_len = bucket->nr * sizeof(u64);
+	memcpy(value, bucket->ip, trace_len);
+	memset(value + trace_len, 0, map->value_size - trace_len);
+
+	old_bucket = xchg(&smap->buckets[id], bucket);
+	if (old_bucket)
+		pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
+	return 0;
 }
 
 static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
@@ -196,7 +247,7 @@ static int stack_map_delete_elem(struct bpf_map *map, void *key)
 
 	old_bucket = xchg(&smap->buckets[id], NULL);
 	if (old_bucket) {
-		kfree_rcu(old_bucket, rcu);
+		pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
 		return 0;
 	} else {
 		return -ENOENT;
@@ -207,13 +258,12 @@ static int stack_map_delete_elem(struct bpf_map *map, void *key)
 static void stack_map_free(struct bpf_map *map)
 {
 	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
-	int i;
 
+	/* wait for bpf programs to complete before freeing stack map */
 	synchronize_rcu();
 
-	for (i = 0; i < smap->n_buckets; i++)
-		if (smap->buckets[i])
-			kfree_rcu(smap->buckets[i], rcu);
+	vfree(smap->elems);
+	pcpu_freelist_destroy(&smap->freelist);
 	kvfree(smap);
 	put_callchain_buffers();
 }
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cbd94b2144ff..2978d0d08869 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -290,6 +290,8 @@ static int map_lookup_elem(union bpf_attr *attr)
 		err = bpf_percpu_hash_copy(map, key, value);
 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 		err = bpf_percpu_array_copy(map, key, value);
+	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
+		err = bpf_stackmap_copy(map, key, value);
 	} else {
 		rcu_read_lock();
 		ptr = map->ops->map_lookup_elem(map, key);
-- 
cgit v1.2.3


From 057bd2e0528ec68b3d0481ede0b26a31a9e5d2f1 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 9 Feb 2016 15:30:47 +0100
Subject: PCI: Add pci_ops.{add,remove}_bus() callbacks

Add pci_ops.{add,remove}_bus() callbacks, which will be called on every
newly created bus and when a bus is being removed, respectively.  This can
be used by drivers to implement driver-specific initialization and teardown
of the bus, in addition to the architecture-specifics implemented by the
pcibios_add_bus() and the pcibios_remove_bus() functions.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/probe.c  | 6 ++++++
 drivers/pci/remove.c | 4 ++++
 include/linux/pci.h  | 2 ++
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 6d7ab9bb0d5a..3ea4de1c6926 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -758,6 +758,12 @@ add_dev:
 
 	pcibios_add_bus(child);
 
+	if (child->ops->add_bus) {
+		ret = child->ops->add_bus(child);
+		if (WARN_ON(ret < 0))
+			dev_err(&child->dev, "failed to add bus: %d\n", ret);
+	}
+
 	/* Create legacy_io and legacy_mem files for this bus */
 	pci_create_legacy_files(child);
 
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 8a280e9c2ad1..d35c7fcde3af 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -54,6 +54,10 @@ void pci_remove_bus(struct pci_bus *bus)
 	pci_bus_release_busn_res(bus);
 	up_write(&pci_bus_sem);
 	pci_remove_legacy_files(bus);
+
+	if (bus->ops->remove_bus)
+		bus->ops->remove_bus(bus);
+
 	pcibios_remove_bus(bus);
 	device_unregister(&bus->dev);
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 27df4a6585da..0f7ef14c4d10 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -578,6 +578,8 @@ static inline int pcibios_err_to_errno(int err)
 /* Low-level architecture-dependent routines */
 
 struct pci_ops {
+	int (*add_bus)(struct pci_bus *bus);
+	void (*remove_bus)(struct pci_bus *bus);
 	void __iomem *(*map_bus)(struct pci_bus *bus, unsigned int devfn, int where);
 	int (*read)(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val);
 	int (*write)(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val);
-- 
cgit v1.2.3


From c194f7ea7f68f2690533832ec22f0d7ed4f2d74d Mon Sep 17 00:00:00 2001
From: Wei Yang <weiyang@linux.vnet.ibm.com>
Date: Fri, 4 Mar 2016 10:53:03 +1100
Subject: PCI/IOV: Rename and export virtfn_{add, remove}

During EEH recovery, hotplug is applied to the devices which don't
have drivers or their drivers don't support EEH. However, the hotplug,
which was implemented based on PCI bus, can't be applied to VF directly.
Instead, we unplug and plug individual PCI devices (VFs).

This renames virtn_{add,remove}() and exports them so they can be used
in PCI hotplug during EEH recovery.

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/pci/iov.c   | 10 +++++-----
 include/linux/pci.h |  8 ++++++++
 2 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 31f31d460fc9..fa4f13869fa9 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -113,7 +113,7 @@ resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
 	return dev->sriov->barsz[resno - PCI_IOV_RESOURCES];
 }
 
-static int virtfn_add(struct pci_dev *dev, int id, int reset)
+int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset)
 {
 	int i;
 	int rc = -ENOMEM;
@@ -188,7 +188,7 @@ failed:
 	return rc;
 }
 
-static void virtfn_remove(struct pci_dev *dev, int id, int reset)
+void pci_iov_remove_virtfn(struct pci_dev *dev, int id, int reset)
 {
 	char buf[VIRTFN_ID_LEN];
 	struct pci_dev *virtfn;
@@ -321,7 +321,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 	}
 
 	for (i = 0; i < initial; i++) {
-		rc = virtfn_add(dev, i, 0);
+		rc = pci_iov_add_virtfn(dev, i, 0);
 		if (rc)
 			goto failed;
 	}
@@ -333,7 +333,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 
 failed:
 	while (i--)
-		virtfn_remove(dev, i, 0);
+		pci_iov_remove_virtfn(dev, i, 0);
 
 	pcibios_sriov_disable(dev);
 err_pcibios:
@@ -359,7 +359,7 @@ static void sriov_disable(struct pci_dev *dev)
 		return;
 
 	for (i = 0; i < iov->num_VFs; i++)
-		virtfn_remove(dev, i, 0);
+		pci_iov_remove_virtfn(dev, i, 0);
 
 	pcibios_sriov_disable(dev);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 27df4a6585da..3db5e30e8ede 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1738,6 +1738,8 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int id);
 
 int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
 void pci_disable_sriov(struct pci_dev *dev);
+int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset);
+void pci_iov_remove_virtfn(struct pci_dev *dev, int id, int reset);
 int pci_num_vf(struct pci_dev *dev);
 int pci_vfs_assigned(struct pci_dev *dev);
 int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs);
@@ -1754,6 +1756,12 @@ static inline int pci_iov_virtfn_devfn(struct pci_dev *dev, int id)
 }
 static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
 { return -ENODEV; }
+static inline int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset)
+{
+	return -ENOSYS;
+}
+static inline void pci_iov_remove_virtfn(struct pci_dev *dev,
+					 int id, int reset) { }
 static inline void pci_disable_sriov(struct pci_dev *dev) { }
 static inline int pci_num_vf(struct pci_dev *dev) { return 0; }
 static inline int pci_vfs_assigned(struct pci_dev *dev)
-- 
cgit v1.2.3


From 7b77061f8d03cdaf71d91ea356835131d651b103 Mon Sep 17 00:00:00 2001
From: Wei Yang <weiyang@linux.vnet.ibm.com>
Date: Fri, 4 Mar 2016 10:53:04 +1100
Subject: PCI: Add pcibios_bus_add_device() weak function

This adds weak function pcibios_bus_add_device() for arch dependent
code could do proper setup. For example, powerpc could setup EEH
related resources for SRIOV VFs.

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/pci/bus.c   | 3 +++
 include/linux/pci.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 89b3befc7155..6469ff6208b0 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -271,6 +271,8 @@ bool pci_bus_clip_resource(struct pci_dev *dev, int idx)
 
 void __weak pcibios_resource_survey_bus(struct pci_bus *bus) { }
 
+void __weak pcibios_bus_add_device(struct pci_dev *pdev) { }
+
 /**
  * pci_bus_add_device - start driver for a single device
  * @dev: device to add
@@ -285,6 +287,7 @@ void pci_bus_add_device(struct pci_dev *dev)
 	 * Can not put in pci_device_add yet because resources
 	 * are not assigned yet for some devices.
 	 */
+	pcibios_bus_add_device(dev);
 	pci_fixup_device(pci_fixup_final, dev);
 	pci_create_sysfs_dev_files(dev);
 	pci_proc_attach_device(dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 3db5e30e8ede..bc435d6293d2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -770,6 +770,7 @@ extern struct list_head pci_root_buses;	/* list of all known PCI buses */
 int no_pci_devices(void);
 
 void pcibios_resource_survey_bus(struct pci_bus *bus);
+void pcibios_bus_add_device(struct pci_dev *pdev);
 void pcibios_add_bus(struct pci_bus *bus);
 void pcibios_remove_bus(struct pci_bus *bus);
 void pcibios_fixup_bus(struct pci_bus *);
-- 
cgit v1.2.3


From e237a5518425155faa508a087f28269f58074b92 Mon Sep 17 00:00:00 2001
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Date: Mon, 15 Feb 2016 12:52:01 +0800
Subject: x86/ACPI/PCI: Recognize that Interrupt Line 255 means "not connected"

Per the x86-specific footnote to PCI spec r3.0, sec 6.2.4, the value 255 in
the Interrupt Line register means "unknown" or "no connection."
Previously, when we couldn't derive an IRQ from the _PRT, we fell back to
using the value from Interrupt Line as an IRQ.  It's questionable whether
we should do that at all, but the spec clearly suggests we shouldn't do it
for the value 255 on x86.

Calling request_irq() with IRQ 255 may succeed, but the driver won't
receive any interrupts.  Or, if IRQ 255 is shared with another device, it
may succeed, and the driver's ISR will be called at random times when the
*other* device interrupts.  Or it may fail if another device is using IRQ
255 with incompatible flags.  What we *want* is for request_irq() to fail
predictably so the driver can fall back to polling.

On x86, assume 255 in the Interrupt Line means the INTx line is not
connected.  In that case, set dev->irq to IRQ_NOTCONNECTED so request_irq()
will fail gracefully with -ENOTCONN.

We found this problem on a system where Secure Boot firmware assigned
Interrupt Line 255 to an i801_smbus device and another device was already
using MSI-X IRQ 255.  This was in v3.10, where i801_probe() fails if
request_irq() fails:

  i801_smbus 0000:00:1f.3: enabling device (0140 -> 0143)
  i801_smbus 0000:00:1f.3: can't derive routing for PCI INT C
  i801_smbus 0000:00:1f.3: PCI INT C: no GSI
  genirq: Flags mismatch irq 255. 00000080 (i801_smbus) vs. 00000000 (megasa)
  CPU: 0 PID: 2487 Comm: kworker/0:1 Not tainted 3.10.0-229.el7.x86_64 #1
  Hardware name: FUJITSU PRIMEQUEST 2800E2/D3736, BIOS PRIMEQUEST 2000 Serie5
  Call Trace:
    dump_stack+0x19/0x1b
    __setup_irq+0x54a/0x570
    request_threaded_irq+0xcc/0x170
    i801_probe+0x32f/0x508 [i2c_i801]
    local_pci_probe+0x45/0xa0
  i801_smbus 0000:00:1f.3: Failed to allocate irq 255: -16
  i801_smbus: probe of 0000:00:1f.3 failed with error -16

After aeb8a3d16ae0 ("i2c: i801: Check if interrupts are disabled"),
i801_probe() will fall back to polling if request_irq() fails.  But we
still need this patch because request_irq() may succeed or fail depending
on other devices in the system.  If request_irq() fails, i801_smbus will
work by falling back to polling, but if it succeeds, i801_smbus won't work
because it expects interrupts that it may not receive.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/pci_irq.c    | 29 +++++++++++++++++++++++++----
 include/linux/interrupt.h | 10 ++++++++++
 kernel/irq/manage.c       |  9 ++++++++-
 3 files changed, 43 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index c8e169e46673..2c45dd3acc17 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -33,6 +33,7 @@
 #include <linux/pci.h>
 #include <linux/acpi.h>
 #include <linux/slab.h>
+#include <linux/interrupt.h>
 
 #define PREFIX "ACPI: "
 
@@ -387,6 +388,23 @@ static inline int acpi_isa_register_gsi(struct pci_dev *dev)
 }
 #endif
 
+static inline bool acpi_pci_irq_valid(struct pci_dev *dev, u8 pin)
+{
+#ifdef CONFIG_X86
+	/*
+	 * On x86 irq line 0xff means "unknown" or "no connection"
+	 * (PCI 3.0, Section 6.2.4, footnote on page 223).
+	 */
+	if (dev->irq == 0xff) {
+		dev->irq = IRQ_NOTCONNECTED;
+		dev_warn(&dev->dev, "PCI INT %c: not connected\n",
+			 pin_name(pin));
+		return false;
+	}
+#endif
+	return true;
+}
+
 int acpi_pci_irq_enable(struct pci_dev *dev)
 {
 	struct acpi_prt_entry *entry;
@@ -431,11 +449,14 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 	} else
 		gsi = -1;
 
-	/*
-	 * No IRQ known to the ACPI subsystem - maybe the BIOS / 
-	 * driver reported one, then use it. Exit in any case.
-	 */
 	if (gsi < 0) {
+		/*
+		 * No IRQ known to the ACPI subsystem - maybe the BIOS /
+		 * driver reported one, then use it. Exit in any case.
+		 */
+		if (!acpi_pci_irq_valid(dev, pin))
+			return 0;
+
 		if (acpi_isa_register_gsi(dev))
 			dev_warn(&dev->dev, "PCI INT %c: no GSI\n",
 				 pin_name(pin));
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 0e95fcc75b2a..358076eda364 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -125,6 +125,16 @@ struct irqaction {
 
 extern irqreturn_t no_action(int cpl, void *dev_id);
 
+/*
+ * If a (PCI) device interrupt is not connected we set dev->irq to
+ * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we
+ * can distingiush that case from other error returns.
+ *
+ * 0x80000000 is guaranteed to be outside the available range of interrupts
+ * and easy to distinguish from other possible incorrect values.
+ */
+#define IRQ_NOTCONNECTED	(1U << 31)
+
 extern int __must_check
 request_threaded_irq(unsigned int irq, irq_handler_t handler,
 		     irq_handler_t thread_fn,
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 841187239adc..e79e60f50bce 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1609,6 +1609,9 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
 	struct irq_desc *desc;
 	int retval;
 
+	if (irq == IRQ_NOTCONNECTED)
+		return -ENOTCONN;
+
 	/*
 	 * Sanity-check: shared interrupts must pass in a real dev-ID,
 	 * otherwise we'll have trouble later trying to figure out
@@ -1699,9 +1702,13 @@ EXPORT_SYMBOL(request_threaded_irq);
 int request_any_context_irq(unsigned int irq, irq_handler_t handler,
 			    unsigned long flags, const char *name, void *dev_id)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 	int ret;
 
+	if (irq == IRQ_NOTCONNECTED)
+		return -ENOTCONN;
+
+	desc = irq_to_desc(irq);
 	if (!desc)
 		return -EINVAL;
 
-- 
cgit v1.2.3


From 3fab91ea284a3b795327dda915a3c150a49e4be2 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 29 Feb 2016 21:08:19 +0800
Subject: gpio: lp3943: Drop pin_used and lp3943_gpio_request/lp3943_gpio_free

The implementation of lp3943_gpio_request/lp3943_gpio_free test pin_used
for tracing the pin usage. However, gpiolib already checks FLAG_REQUESTED
flag for the same purpose. So remove the redundant implementation.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-lp3943.c | 22 ----------------------
 include/linux/mfd/lp3943.h |  6 ------
 2 files changed, 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-lp3943.c b/drivers/gpio/gpio-lp3943.c
index 6dc6725403ec..fdec94b0cfb0 100644
--- a/drivers/gpio/gpio-lp3943.c
+++ b/drivers/gpio/gpio-lp3943.c
@@ -45,26 +45,6 @@ struct lp3943_gpio {
 	u16 input_mask;		/* 1 = GPIO is input direction, 0 = output */
 };
 
-static int lp3943_gpio_request(struct gpio_chip *chip, unsigned offset)
-{
-	struct lp3943_gpio *lp3943_gpio = gpiochip_get_data(chip);
-	struct lp3943 *lp3943 = lp3943_gpio->lp3943;
-
-	/* Return an error if the pin is already assigned */
-	if (test_and_set_bit(offset, &lp3943->pin_used))
-		return -EBUSY;
-
-	return 0;
-}
-
-static void lp3943_gpio_free(struct gpio_chip *chip, unsigned offset)
-{
-	struct lp3943_gpio *lp3943_gpio = gpiochip_get_data(chip);
-	struct lp3943 *lp3943 = lp3943_gpio->lp3943;
-
-	clear_bit(offset, &lp3943->pin_used);
-}
-
 static int lp3943_gpio_set_mode(struct lp3943_gpio *lp3943_gpio, u8 offset,
 				u8 val)
 {
@@ -177,8 +157,6 @@ static int lp3943_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
 static const struct gpio_chip lp3943_gpio_chip = {
 	.label			= "lp3943",
 	.owner			= THIS_MODULE,
-	.request		= lp3943_gpio_request,
-	.free			= lp3943_gpio_free,
 	.direction_input	= lp3943_gpio_direction_input,
 	.get			= lp3943_gpio_get,
 	.direction_output	= lp3943_gpio_direction_output,
diff --git a/include/linux/mfd/lp3943.h b/include/linux/mfd/lp3943.h
index 3490db782988..d3a738118b27 100644
--- a/include/linux/mfd/lp3943.h
+++ b/include/linux/mfd/lp3943.h
@@ -94,18 +94,12 @@ struct lp3943_reg_cfg {
  * @regmap: Used for I2C communication on accessing registers
  * @pdata: LP3943 platform specific data
  * @mux_cfg: Register configuration for pin MUX
- * @pin_used: Bit mask for output pin used.
- *	      This bitmask is used for pin assignment management.
- *	      1 = pin used, 0 = available.
- *	      Only LSB 16 bits are used, but it is unsigned long type
- *	      for atomic bitwise operations.
  */
 struct lp3943 {
 	struct device *dev;
 	struct regmap *regmap;
 	struct lp3943_platform_data *pdata;
 	const struct lp3943_reg_cfg *mux_cfg;
-	unsigned long pin_used;
 };
 
 int lp3943_read_byte(struct lp3943 *lp3943, u8 reg, u8 *read);
-- 
cgit v1.2.3


From caf02abf9bd00b4c23745a055c4f4c243eecd392 Mon Sep 17 00:00:00 2001
From: "Robin H. Johnson" <robbat2@gentoo.org>
Date: Sun, 6 Mar 2016 22:02:30 +0000
Subject: PCI: Add QEMU top-level IDs for (sub)vendor & device

Introduce PCI_VENDOR/PCI_SUBVENDOR/PCI_SUBDEVICE defines to replace the
constants scattered in the kernel already used to detect QEMU.

They are defined in the QEMU codebase per docs/specs/pci-ids.txt.

Signed-off-by: Robin H. Johnson <robbat2@gentoo.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/bochs/bochs_drv.c   | 4 ++--
 drivers/gpu/drm/cirrus/cirrus_drv.c | 5 +++--
 drivers/virtio/virtio_pci_common.c  | 2 +-
 include/linux/pci_ids.h             | 4 ++++
 sound/pci/intel8x0.c                | 4 ++--
 5 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/bochs/bochs_drv.c b/drivers/gpu/drm/bochs/bochs_drv.c
index 7f1a3604b19f..b332b4d3b0e2 100644
--- a/drivers/gpu/drm/bochs/bochs_drv.c
+++ b/drivers/gpu/drm/bochs/bochs_drv.c
@@ -182,8 +182,8 @@ static const struct pci_device_id bochs_pci_tbl[] = {
 	{
 		.vendor      = 0x1234,
 		.device      = 0x1111,
-		.subvendor   = 0x1af4,
-		.subdevice   = 0x1100,
+		.subvendor   = PCI_SUBVENDOR_ID_REDHAT_QUMRANET,
+		.subdevice   = PCI_SUBDEVICE_ID_QEMU,
 		.driver_data = BOCHS_QEMU_STDVGA,
 	},
 	{
diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.c b/drivers/gpu/drm/cirrus/cirrus_drv.c
index b1619e29a564..7bc394ec9fb3 100644
--- a/drivers/gpu/drm/cirrus/cirrus_drv.c
+++ b/drivers/gpu/drm/cirrus/cirrus_drv.c
@@ -33,8 +33,9 @@ static struct drm_driver driver;
 
 /* only bind to the cirrus chip in qemu */
 static const struct pci_device_id pciidlist[] = {
-	{ PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, 0x1af4, 0x1100, 0,
-	  0, 0 },
+	{ PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446,
+	  PCI_SUBVENDOR_ID_REDHAT_QUMRANET, PCI_SUBDEVICE_ID_QEMU,
+	  0, 0, 0 },
 	{ PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, PCI_VENDOR_ID_XEN,
 	  0x0001, 0, 0, 0 },
 	{0,}
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 36205c27c4d0..127dfe4e8694 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -467,7 +467,7 @@ static const struct dev_pm_ops virtio_pci_pm_ops = {
 
 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
 static const struct pci_device_id virtio_pci_id_table[] = {
-	{ PCI_DEVICE(0x1af4, PCI_ANY_ID) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) },
 	{ 0 }
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 37f05cb1dfd6..6d249d32c8b8 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2506,6 +2506,10 @@
 
 #define PCI_VENDOR_ID_AZWAVE		0x1a3b
 
+#define PCI_VENDOR_ID_REDHAT_QUMRANET    0x1af4
+#define PCI_SUBVENDOR_ID_REDHAT_QUMRANET 0x1af4
+#define PCI_SUBDEVICE_ID_QEMU            0x1100
+
 #define PCI_VENDOR_ID_ASMEDIA		0x1b21
 
 #define PCI_VENDOR_ID_CIRCUITCO		0x1cc8
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 42bcbac801a3..12c2c180e407 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -2980,8 +2980,8 @@ static int snd_intel8x0_inside_vm(struct pci_dev *pci)
 		goto fini;
 
 	/* check for known (emulated) devices */
-	if (pci->subsystem_vendor == 0x1af4 &&
-	    pci->subsystem_device == 0x1100) {
+	if (pci->subsystem_vendor == PCI_SUBVENDOR_ID_REDHAT_QUMRANET &&
+	    pci->subsystem_device == PCI_SUBDEVICE_ID_QEMU) {
 		/* KVM emulated sound, PCI SSID: 1af4:1100 */
 		msg = "enable KVM";
 	} else if (pci->subsystem_vendor == 0x1ab8) {
-- 
cgit v1.2.3


From 34e2c555f3e13c90e9284e23d00f03be8a6e06c5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 15 Feb 2016 20:20:42 +0100
Subject: cpufreq: Add mechanism for registering utilization update callbacks

Introduce a mechanism by which parts of the cpufreq subsystem
("setpolicy" drivers or the core) can register callbacks to be
executed from cpufreq_update_util() which is invoked by the
scheduler's update_load_avg() on CPU utilization changes.

This allows the "setpolicy" drivers to dispense with their timers
and do all of the computations they need and frequency/voltage
adjustments in the update_load_avg() code path, among other things.

The update_load_avg() changes were suggested by Peter Zijlstra.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/cpufreq/cpufreq.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/cpufreq.h   | 34 ++++++++++++++++++++++++++++++++++
 kernel/sched/deadline.c   |  4 ++++
 kernel/sched/fair.c       | 26 +++++++++++++++++++++++++-
 kernel/sched/rt.c         |  4 ++++
 kernel/sched/sched.h      |  1 +
 6 files changed, 113 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 34b17447e0d1..e172b2a02c1d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -102,6 +102,51 @@ static LIST_HEAD(cpufreq_governor_list);
 static struct cpufreq_driver *cpufreq_driver;
 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
 static DEFINE_RWLOCK(cpufreq_driver_lock);
+
+static DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
+
+/**
+ * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer.
+ * @cpu: The CPU to set the pointer for.
+ * @data: New pointer value.
+ *
+ * Set and publish the update_util_data pointer for the given CPU.  That pointer
+ * points to a struct update_util_data object containing a callback function
+ * to call from cpufreq_update_util().  That function will be called from an RCU
+ * read-side critical section, so it must not sleep.
+ *
+ * Callers must use RCU callbacks to free any memory that might be accessed
+ * via the old update_util_data pointer or invoke synchronize_rcu() right after
+ * this function to avoid use-after-free.
+ */
+void cpufreq_set_update_util_data(int cpu, struct update_util_data *data)
+{
+	rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
+}
+EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data);
+
+/**
+ * cpufreq_update_util - Take a note about CPU utilization changes.
+ * @time: Current time.
+ * @util: Current utilization.
+ * @max: Utilization ceiling.
+ *
+ * This function is called by the scheduler on every invocation of
+ * update_load_avg() on the CPU whose utilization is being updated.
+ */
+void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
+{
+	struct update_util_data *data;
+
+	rcu_read_lock();
+
+	data = rcu_dereference(*this_cpu_ptr(&cpufreq_update_util_data));
+	if (data && data->func)
+		data->func(data, time, util, max);
+
+	rcu_read_unlock();
+}
+
 DEFINE_MUTEX(cpufreq_governor_lock);
 
 /* Flag to suspend/resume CPUFreq governors */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index d0bf555b6bbf..704d85bf7242 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -151,6 +151,36 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy)
 extern struct kobject *cpufreq_global_kobject;
 
 #ifdef CONFIG_CPU_FREQ
+void cpufreq_update_util(u64 time, unsigned long util, unsigned long max);
+
+/**
+ * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
+ * @time: Current time.
+ *
+ * The way cpufreq is currently arranged requires it to evaluate the CPU
+ * performance state (frequency/voltage) on a regular basis to prevent it from
+ * being stuck in a completely inadequate performance level for too long.
+ * That is not guaranteed to happen if the updates are only triggered from CFS,
+ * though, because they may not be coming in if RT or deadline tasks are active
+ * all the time (or there are RT and DL tasks only).
+ *
+ * As a workaround for that issue, this function is called by the RT and DL
+ * sched classes to trigger extra cpufreq updates to prevent it from stalling,
+ * but that really is a band-aid.  Going forward it should be replaced with
+ * solutions targeted more specifically at RT and DL tasks.
+ */
+static inline void cpufreq_trigger_update(u64 time)
+{
+	cpufreq_update_util(time, ULONG_MAX, 0);
+}
+
+struct update_util_data {
+	void (*func)(struct update_util_data *data,
+		     u64 time, unsigned long util, unsigned long max);
+};
+
+void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);
+
 unsigned int cpufreq_get(unsigned int cpu);
 unsigned int cpufreq_quick_get(unsigned int cpu);
 unsigned int cpufreq_quick_get_max(unsigned int cpu);
@@ -162,6 +192,10 @@ int cpufreq_update_policy(unsigned int cpu);
 bool have_governor_per_policy(void);
 struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
 #else
+static inline void cpufreq_update_util(u64 time, unsigned long util,
+				       unsigned long max) {}
+static inline void cpufreq_trigger_update(u64 time) {}
+
 static inline unsigned int cpufreq_get(unsigned int cpu)
 {
 	return 0;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index cd64c979d0e1..21a0aa6f810d 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -726,6 +726,10 @@ static void update_curr_dl(struct rq *rq)
 	if (!dl_task(curr) || !on_dl_rq(dl_se))
 		return;
 
+	/* Kick cpufreq (see the comment in linux/cpufreq.h). */
+	if (cpu_of(rq) == smp_processor_id())
+		cpufreq_trigger_update(rq_clock(rq));
+
 	/*
 	 * Consumed budget is computed considering the time as
 	 * observed by schedulable tasks (excluding time spent
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 56b7d4b83947..e2987a7e489d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2824,7 +2824,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 	u64 now = cfs_rq_clock_task(cfs_rq);
-	int cpu = cpu_of(rq_of(cfs_rq));
+	struct rq *rq = rq_of(cfs_rq);
+	int cpu = cpu_of(rq);
 
 	/*
 	 * Track task load average for carrying it to new CPU after migrated, and
@@ -2836,6 +2837,29 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
 
 	if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
 		update_tg_load_avg(cfs_rq, 0);
+
+	if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
+		unsigned long max = rq->cpu_capacity_orig;
+
+		/*
+		 * There are a few boundary cases this might miss but it should
+		 * get called often enough that that should (hopefully) not be
+		 * a real problem -- added to that it only calls on the local
+		 * CPU, so if we enqueue remotely we'll miss an update, but
+		 * the next tick/schedule should update.
+		 *
+		 * It will not get called when we go idle, because the idle
+		 * thread is a different class (!fair), nor will the utilization
+		 * number include things like RT tasks.
+		 *
+		 * As is, the util number is not freq-invariant (we'd have to
+		 * implement arch_scale_freq_capacity() for that).
+		 *
+		 * See cpu_util().
+		 */
+		cpufreq_update_util(rq_clock(rq),
+				    min(cfs_rq->avg.util_avg, max), max);
+	}
 }
 
 static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8ec86abe0ea1..27f5b03cbdbe 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -945,6 +945,10 @@ static void update_curr_rt(struct rq *rq)
 	if (curr->sched_class != &rt_sched_class)
 		return;
 
+	/* Kick cpufreq (see the comment in linux/cpufreq.h). */
+	if (cpu_of(rq) == smp_processor_id())
+		cpufreq_trigger_update(rq_clock(rq));
+
 	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
 	if (unlikely((s64)delta_exec <= 0))
 		return;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 10f16374df7f..f042190c8002 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -9,6 +9,7 @@
 #include <linux/irq_work.h>
 #include <linux/tick.h>
 #include <linux/slab.h>
+#include <linux/cpufreq.h>
 
 #include "cpupri.h"
 #include "cpudeadline.h"
-- 
cgit v1.2.3


From 68e80dae09033d778b98dc88e5bfe8fdade188e5 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 9 Feb 2016 09:01:35 +0530
Subject: Revert "cpufreq: Drop rwsem lock around CPUFREQ_GOV_POLICY_EXIT"

Earlier, when the struct freq-attr was used to represent governor
attributes, the standard cpufreq show/store sysfs attribute callbacks
were applied to the governor tunable attributes and they always acquire
the policy->rwsem lock before carrying out the operation.  That could
have resulted in an ABBA deadlock if governor tunable attributes are
removed under policy->rwsem while one of them is being accessed
concurrently (if sysfs attributes removal wins the race, it will wait
for the access to complete with policy->rwsem held while the attribute
callback will block on policy->rwsem indefinitely).

We attempted to address this issue by dropping policy->rwsem around
governor tunable attributes removal (that is, around invocations of the
->governor callback with the event arg equal to CPUFREQ_GOV_POLICY_EXIT)
in cpufreq_set_policy(), but that opened up race conditions that had not
been possible with policy->rwsem held all the time.

The previous commit, "cpufreq: governor: New sysfs show/store callbacks
for governor tunables", fixed the original ABBA deadlock by adding new
governor specific show/store callbacks.

We don't have to drop rwsem around invocations of governor event
CPUFREQ_GOV_POLICY_EXIT anymore, and original fix can be reverted now.

Fixes: 955ef4833574 (cpufreq: Drop rwsem lock around CPUFREQ_GOV_POLICY_EXIT)
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reported-by: Juri Lelli <juri.lelli@arm.com>
Tested-by: Juri Lelli <juri.lelli@arm.com>
Tested-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 5 -----
 include/linux/cpufreq.h   | 4 ----
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index e172b2a02c1d..e92e9eab7c6c 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2205,10 +2205,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 			return ret;
 		}
 
-		up_write(&policy->rwsem);
 		ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
-		down_write(&policy->rwsem);
-
 		if (ret) {
 			pr_err("%s: Failed to Exit Governor: %s (%d)\n",
 			       __func__, old_gov->name, ret);
@@ -2224,9 +2221,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 		if (!ret)
 			goto out;
 
-		up_write(&policy->rwsem);
 		__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
-		down_write(&policy->rwsem);
 	}
 
 	/* new governor failed, so re-start old one */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 704d85bf7242..cac3d1ba8200 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -100,10 +100,6 @@ struct cpufreq_policy {
 	 * - Any routine that will write to the policy structure and/or may take away
 	 *   the policy altogether (eg. CPU hotplug), will hold this lock in write
 	 *   mode before doing so.
-	 *
-	 * Additional rules:
-	 * - Lock should not be held across
-	 *     __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);
 	 */
 	struct rw_semaphore	rwsem;
 
-- 
cgit v1.2.3


From 242aa883a64d8c54cfeee47f3603b21bc705e081 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 22 Feb 2016 16:36:44 +0530
Subject: cpufreq: Remove 'policy->governor_enabled'

The entire sequence of events (like INIT/START or STOP/EXIT) for which
cpufreq_governor() is called, is guaranteed to be protected by
policy->rwsem now.

The additional checks that were added earlier (as we were forced to drop
policy->rwsem before calling cpufreq_governor() for EXIT event), aren't
required anymore.

Over that, they weren't sufficient really. They just take care of
START/STOP events, but not INIT/EXIT and the state machine was never
maintained properly by them.

Kill the unnecessary checks and policy->governor_enabled field.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 17 -----------------
 include/linux/cpufreq.h   |  1 -
 2 files changed, 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b3d05a905034..dd568aaf2728 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2010,17 +2010,6 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
 
 	pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event);
 
-	if ((policy->governor_enabled && event == CPUFREQ_GOV_START)
-	    || (!policy->governor_enabled
-	    && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) {
-		return -EBUSY;
-	}
-
-	if (event == CPUFREQ_GOV_STOP)
-		policy->governor_enabled = false;
-	else if (event == CPUFREQ_GOV_START)
-		policy->governor_enabled = true;
-
 	ret = policy->governor->governor(policy, event);
 
 	if (!ret) {
@@ -2028,12 +2017,6 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
 			policy->governor->initialized++;
 		else if (event == CPUFREQ_GOV_POLICY_EXIT)
 			policy->governor->initialized--;
-	} else {
-		/* Restore original values */
-		if (event == CPUFREQ_GOV_STOP)
-			policy->governor_enabled = true;
-		else if (event == CPUFREQ_GOV_START)
-			policy->governor_enabled = false;
 	}
 
 	if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) ||
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index cac3d1ba8200..a50c5b2e3bf2 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -80,7 +80,6 @@ struct cpufreq_policy {
 	unsigned int		last_policy; /* policy before unplug */
 	struct cpufreq_governor	*governor; /* see below */
 	void			*governor_data;
-	bool			governor_enabled; /* governor start/stop flag */
 	char			last_governor[CPUFREQ_NAME_LEN]; /* last governor used */
 
 	struct work_struct	update; /* if update_policy() needs to be
-- 
cgit v1.2.3


From 70aba44b6cade8dc63261c4f97d5e83b0c02d07a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 9 Mar 2016 22:00:27 +0700
Subject: Revert "gpio: lp3943: Drop pin_used and
 lp3943_gpio_request/lp3943_gpio_free"

This reverts commit 3fab91ea284a3b795327dda915a3c150a49e4be2.
---
 drivers/gpio/gpio-lp3943.c | 22 ++++++++++++++++++++++
 include/linux/mfd/lp3943.h |  6 ++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-lp3943.c b/drivers/gpio/gpio-lp3943.c
index fdec94b0cfb0..6dc6725403ec 100644
--- a/drivers/gpio/gpio-lp3943.c
+++ b/drivers/gpio/gpio-lp3943.c
@@ -45,6 +45,26 @@ struct lp3943_gpio {
 	u16 input_mask;		/* 1 = GPIO is input direction, 0 = output */
 };
 
+static int lp3943_gpio_request(struct gpio_chip *chip, unsigned offset)
+{
+	struct lp3943_gpio *lp3943_gpio = gpiochip_get_data(chip);
+	struct lp3943 *lp3943 = lp3943_gpio->lp3943;
+
+	/* Return an error if the pin is already assigned */
+	if (test_and_set_bit(offset, &lp3943->pin_used))
+		return -EBUSY;
+
+	return 0;
+}
+
+static void lp3943_gpio_free(struct gpio_chip *chip, unsigned offset)
+{
+	struct lp3943_gpio *lp3943_gpio = gpiochip_get_data(chip);
+	struct lp3943 *lp3943 = lp3943_gpio->lp3943;
+
+	clear_bit(offset, &lp3943->pin_used);
+}
+
 static int lp3943_gpio_set_mode(struct lp3943_gpio *lp3943_gpio, u8 offset,
 				u8 val)
 {
@@ -157,6 +177,8 @@ static int lp3943_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
 static const struct gpio_chip lp3943_gpio_chip = {
 	.label			= "lp3943",
 	.owner			= THIS_MODULE,
+	.request		= lp3943_gpio_request,
+	.free			= lp3943_gpio_free,
 	.direction_input	= lp3943_gpio_direction_input,
 	.get			= lp3943_gpio_get,
 	.direction_output	= lp3943_gpio_direction_output,
diff --git a/include/linux/mfd/lp3943.h b/include/linux/mfd/lp3943.h
index d3a738118b27..3490db782988 100644
--- a/include/linux/mfd/lp3943.h
+++ b/include/linux/mfd/lp3943.h
@@ -94,12 +94,18 @@ struct lp3943_reg_cfg {
  * @regmap: Used for I2C communication on accessing registers
  * @pdata: LP3943 platform specific data
  * @mux_cfg: Register configuration for pin MUX
+ * @pin_used: Bit mask for output pin used.
+ *	      This bitmask is used for pin assignment management.
+ *	      1 = pin used, 0 = available.
+ *	      Only LSB 16 bits are used, but it is unsigned long type
+ *	      for atomic bitwise operations.
  */
 struct lp3943 {
 	struct device *dev;
 	struct regmap *regmap;
 	struct lp3943_platform_data *pdata;
 	const struct lp3943_reg_cfg *mux_cfg;
+	unsigned long pin_used;
 };
 
 int lp3943_read_byte(struct lp3943 *lp3943, u8 reg, u8 *read);
-- 
cgit v1.2.3


From e498b4984db82b4ba3ceea7dba813222a31e9c2e Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Wed, 9 Mar 2016 18:40:06 +0530
Subject: thermal: of-thermal: Add devm version of
 thermal_zone_of_sensor_register

Add resource managed version of thermal_zone_of_sensor_register() and
thermal_zone_of_sensor_unregister().

This helps in reducing the code size in error path, remove of
driver remove callbacks and making proper sequence for deallocations.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/of-thermal.c | 81 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/thermal.h      | 18 ++++++++++
 2 files changed, 99 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 9043f8f91852..49ac23d3e776 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -555,6 +555,87 @@ void thermal_zone_of_sensor_unregister(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_unregister);
 
+static void devm_thermal_zone_of_sensor_release(struct device *dev, void *res)
+{
+	thermal_zone_of_sensor_unregister(dev,
+					  *(struct thermal_zone_device **)res);
+}
+
+static int devm_thermal_zone_of_sensor_match(struct device *dev, void *res,
+					     void *data)
+{
+	struct thermal_zone_device **r = res;
+
+	if (WARN_ON(!r || !*r))
+		return 0;
+
+	return *r == data;
+}
+
+/**
+ * devm_thermal_zone_of_sensor_register - Resource managed version of
+ *				thermal_zone_of_sensor_register()
+ * @dev: a valid struct device pointer of a sensor device. Must contain
+ *       a valid .of_node, for the sensor node.
+ * @sensor_id: a sensor identifier, in case the sensor IP has more
+ *	       than one sensors
+ * @data: a private pointer (owned by the caller) that will be passed
+ *	  back, when a temperature reading is needed.
+ * @ops: struct thermal_zone_of_device_ops *. Must contain at least .get_temp.
+ *
+ * Refer thermal_zone_of_sensor_register() for more details.
+ *
+ * Return: On success returns a valid struct thermal_zone_device,
+ * otherwise, it returns a corresponding ERR_PTR(). Caller must
+ * check the return value with help of IS_ERR() helper.
+ * Registered hermal_zone_device device will automatically be
+ * released when device is unbounded.
+ */
+struct thermal_zone_device *devm_thermal_zone_of_sensor_register(
+	struct device *dev, int sensor_id,
+	void *data, const struct thermal_zone_of_device_ops *ops)
+{
+	struct thermal_zone_device **ptr, *tzd;
+
+	ptr = devres_alloc(devm_thermal_zone_of_sensor_release, sizeof(*ptr),
+			   GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	tzd = thermal_zone_of_sensor_register(dev, sensor_id, data, ops);
+	if (IS_ERR(tzd)) {
+		devres_free(ptr);
+		return tzd;
+	}
+
+	*ptr = tzd;
+	devres_add(dev, ptr);
+
+	return tzd;
+}
+EXPORT_SYMBOL_GPL(devm_thermal_zone_of_sensor_register);
+
+/**
+ * devm_thermal_zone_of_sensor_unregister - Resource managed version of
+ *				thermal_zone_of_sensor_unregister().
+ * @dev: Device for which which resource was allocated.
+ * @tzd: a pointer to struct thermal_zone_device where the sensor is registered.
+ *
+ * This function removes the sensor callbacks and private data from the
+ * thermal zone device registered with devm_thermal_zone_of_sensor_register()
+ * API. It will also silent the zone by remove the .get_temp() and .get_trend()
+ * thermal zone device callbacks.
+ * Normally this function will not need to be called and the resource
+ * management code will ensure that the resource is freed.
+ */
+void devm_thermal_zone_of_sensor_unregister(struct device *dev,
+					    struct thermal_zone_device *tzd)
+{
+	WARN_ON(devres_release(dev, devm_thermal_zone_of_sensor_release,
+			       devm_thermal_zone_of_sensor_match, tzd));
+}
+EXPORT_SYMBOL_GPL(devm_thermal_zone_of_sensor_unregister);
+
 /***   functions parsing device tree nodes   ***/
 
 /**
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index e13a1ace50e9..9c481991fdc7 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -362,6 +362,11 @@ thermal_zone_of_sensor_register(struct device *dev, int id, void *data,
 				const struct thermal_zone_of_device_ops *ops);
 void thermal_zone_of_sensor_unregister(struct device *dev,
 				       struct thermal_zone_device *tz);
+struct thermal_zone_device *devm_thermal_zone_of_sensor_register(
+		struct device *dev, int id, void *data,
+		const struct thermal_zone_of_device_ops *ops);
+void devm_thermal_zone_of_sensor_unregister(struct device *dev,
+					    struct thermal_zone_device *tz);
 #else
 static inline struct thermal_zone_device *
 thermal_zone_of_sensor_register(struct device *dev, int id, void *data,
@@ -376,6 +381,19 @@ void thermal_zone_of_sensor_unregister(struct device *dev,
 {
 }
 
+static inline struct thermal_zone_device *devm_thermal_zone_of_sensor_register(
+		struct device *dev, int id, void *data,
+		const struct thermal_zone_of_device_ops *ops)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline
+void devm_thermal_zone_of_sensor_unregister(struct device *dev,
+					    struct thermal_zone_device *tz)
+{
+}
+
 #endif
 
 #if IS_ENABLED(CONFIG_THERMAL)
-- 
cgit v1.2.3


From ff3cc952d3f009e6c376cc40651b87187ce364a6 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Wed, 9 Mar 2016 12:47:04 -0700
Subject: resource: Add remove_resource interface

insert_resource() and insert_resource_conflict() are called
by resource producers to insert a new resource.  When there
is any conflict, they move conflicting resources down to the
children of the new resource.  There is no destructor of these
interfaces, however.

Add remove_resource(), which removes a resource previously
inserted by insert_resource() or insert_resource_conflict(),
and moves the children up to where they were before.

__release_resource() is changed to have @release_child, so
that this function can be used for remove_resource() as well.

Also add comments to clarify that these functions are intended
for producers of resources to avoid any confusion with
request/release_resource() for consumers.

Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/ioport.h |  1 +
 kernel/resource.c      | 51 +++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 47 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index afb45597fb5f..8017b8bf45fa 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -174,6 +174,7 @@ extern void reserve_region_with_split(struct resource *root,
 extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new);
 extern int insert_resource(struct resource *parent, struct resource *new);
 extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new);
+extern int remove_resource(struct resource *old);
 extern void arch_remove_reservations(struct resource *avail);
 extern int allocate_resource(struct resource *root, struct resource *new,
 			     resource_size_t size, resource_size_t min,
diff --git a/kernel/resource.c b/kernel/resource.c
index 5a56e8f24058..effb6ee2c3e8 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -233,9 +233,9 @@ static struct resource * __request_resource(struct resource *root, struct resour
 	}
 }
 
-static int __release_resource(struct resource *old)
+static int __release_resource(struct resource *old, bool release_child)
 {
-	struct resource *tmp, **p;
+	struct resource *tmp, **p, *chd;
 
 	p = &old->parent->child;
 	for (;;) {
@@ -243,7 +243,17 @@ static int __release_resource(struct resource *old)
 		if (!tmp)
 			break;
 		if (tmp == old) {
-			*p = tmp->sibling;
+			if (release_child || !(tmp->child)) {
+				*p = tmp->sibling;
+			} else {
+				for (chd = tmp->child;; chd = chd->sibling) {
+					chd->parent = tmp->parent;
+					if (!(chd->sibling))
+						break;
+				}
+				*p = tmp->child;
+				chd->sibling = tmp->sibling;
+			}
 			old->parent = NULL;
 			return 0;
 		}
@@ -325,7 +335,7 @@ int release_resource(struct resource *old)
 	int retval;
 
 	write_lock(&resource_lock);
-	retval = __release_resource(old);
+	retval = __release_resource(old, true);
 	write_unlock(&resource_lock);
 	return retval;
 }
@@ -679,7 +689,7 @@ static int reallocate_resource(struct resource *root, struct resource *old,
 		old->start = new.start;
 		old->end = new.end;
 	} else {
-		__release_resource(old);
+		__release_resource(old, true);
 		*old = new;
 		conflict = __request_resource(root, old);
 		BUG_ON(conflict);
@@ -825,6 +835,9 @@ static struct resource * __insert_resource(struct resource *parent, struct resou
  * entirely fit within the range of the new resource, then the new
  * resource is inserted and the conflicting resources become children of
  * the new resource.
+ *
+ * This function is intended for producers of resources, such as FW modules
+ * and bus drivers.
  */
 struct resource *insert_resource_conflict(struct resource *parent, struct resource *new)
 {
@@ -842,6 +855,9 @@ struct resource *insert_resource_conflict(struct resource *parent, struct resour
  * @new: new resource to insert
  *
  * Returns 0 on success, -EBUSY if the resource can't be inserted.
+ *
+ * This function is intended for producers of resources, such as FW modules
+ * and bus drivers.
  */
 int insert_resource(struct resource *parent, struct resource *new)
 {
@@ -885,6 +901,31 @@ void insert_resource_expand_to_fit(struct resource *root, struct resource *new)
 	write_unlock(&resource_lock);
 }
 
+/**
+ * remove_resource - Remove a resource in the resource tree
+ * @old: resource to remove
+ *
+ * Returns 0 on success, -EINVAL if the resource is not valid.
+ *
+ * This function removes a resource previously inserted by insert_resource()
+ * or insert_resource_conflict(), and moves the children (if any) up to
+ * where they were before.  insert_resource() and insert_resource_conflict()
+ * insert a new resource, and move any conflicting resources down to the
+ * children of the new resource.
+ *
+ * insert_resource(), insert_resource_conflict() and remove_resource() are
+ * intended for producers of resources, such as FW modules and bus drivers.
+ */
+int remove_resource(struct resource *old)
+{
+	int retval;
+
+	write_lock(&resource_lock);
+	retval = __release_resource(old, false);
+	write_unlock(&resource_lock);
+	return retval;
+}
+
 static int __adjust_resource(struct resource *res, resource_size_t start,
 				resource_size_t size)
 {
-- 
cgit v1.2.3


From ff3c44e675054533403909ecb76e78c1d4efbd26 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Mon, 7 Mar 2016 14:11:00 -0800
Subject: rcu: Add list_next_or_null_rcu

This is a convenience function that returns the next entry in an RCU
list or NULL if at the end of the list.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rculist.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 14ec1652daf4..17d4f849c65e 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -318,6 +318,27 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
 	likely(__ptr != __next) ? list_entry_rcu(__next, type, member) : NULL; \
 })
 
+/**
+ * list_next_or_null_rcu - get the first element from a list
+ * @head:	the head for the list.
+ * @ptr:        the list head to take the next element from.
+ * @type:       the type of the struct this is embedded in.
+ * @member:     the name of the list_head within the struct.
+ *
+ * Note that if the ptr is at the end of the list, NULL is returned.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+ */
+#define list_next_or_null_rcu(head, ptr, type, member) \
+({ \
+	struct list_head *__head = (head); \
+	struct list_head *__ptr = (ptr); \
+	struct list_head *__next = READ_ONCE(__ptr->next); \
+	likely(__next != __head) ? list_entry_rcu(__next, type, \
+						  member) : NULL; \
+})
+
 /**
  * list_for_each_entry_rcu	-	iterate over rcu list of given type
  * @pos:	the type * to use as a loop cursor.
-- 
cgit v1.2.3


From f4a00aacdb5f6784d46e8c999b6bb52ece4b306b Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Mon, 7 Mar 2016 14:11:01 -0800
Subject: net: Make sock_alloc exportable

Export it for cases where we want to create sockets by hand.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 1 +
 net/socket.c        | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 0b4ac7da583a..49175e4ced11 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -215,6 +215,7 @@ int __sock_create(struct net *net, int family, int type, int proto,
 int sock_create(int family, int type, int proto, struct socket **res);
 int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res);
 int sock_create_lite(int family, int type, int proto, struct socket **res);
+struct socket *sock_alloc(void);
 void sock_release(struct socket *sock);
 int sock_sendmsg(struct socket *sock, struct msghdr *msg);
 int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
diff --git a/net/socket.c b/net/socket.c
index c044d1e8508c..38a78d4d50f5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -533,7 +533,7 @@ static const struct inode_operations sockfs_inode_ops = {
  *	NULL is returned.
  */
 
-static struct socket *sock_alloc(void)
+struct socket *sock_alloc(void)
 {
 	struct inode *inode;
 	struct socket *sock;
@@ -554,6 +554,7 @@ static struct socket *sock_alloc(void)
 	this_cpu_add(sockets_in_use, 1);
 	return sock;
 }
+EXPORT_SYMBOL(sock_alloc);
 
 /**
  *	sock_release	-	close a socket
-- 
cgit v1.2.3


From f092276d85b82504e8a07498f4e9e0c51f06745c Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Mon, 7 Mar 2016 14:11:03 -0800
Subject: net: Add MSG_BATCH flag

Add a new msg flag called MSG_BATCH. This flag is used in sendmsg to
indicate that more messages will follow (i.e. a batch of messages is
being sent). This is similar to MSG_MORE except that the following
messages are not merged into one packet, they are sent individually.
sendmmsg is updated so that each contained message except for the
last one is marked as MSG_BATCH.

MSG_BATCH is a performance optimization in cases where a socket
implementation can benefit by transmitting packets in a batch.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 1 +
 net/socket.c           | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 5bf59c8493b7..d834af22a460 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -274,6 +274,7 @@ struct ucred {
 #define MSG_MORE	0x8000	/* Sender will send more */
 #define MSG_WAITFORONE	0x10000	/* recvmmsg(): block until 1+ packets avail */
 #define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
+#define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
 #define MSG_EOF         MSG_FIN
 
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
diff --git a/net/socket.c b/net/socket.c
index 0dd4dd818f41..886649c88d8f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2008,6 +2008,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 	struct compat_mmsghdr __user *compat_entry;
 	struct msghdr msg_sys;
 	struct used_address used_address;
+	unsigned int oflags = flags;
 
 	if (vlen > UIO_MAXIOV)
 		vlen = UIO_MAXIOV;
@@ -2022,8 +2023,12 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 	entry = mmsg;
 	compat_entry = (struct compat_mmsghdr __user *)mmsg;
 	err = 0;
+	flags |= MSG_BATCH;
 
 	while (datagrams < vlen) {
+		if (datagrams == vlen - 1)
+			flags = oflags;
+
 		if (MSG_CMSG_COMPAT & flags) {
 			err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
 					     &msg_sys, flags, &used_address, MSG_EOR);
-- 
cgit v1.2.3


From ab7ac4eb9832e32a09f4e8042705484d2fb0aad3 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Mon, 7 Mar 2016 14:11:06 -0800
Subject: kcm: Kernel Connection Multiplexor module

This module implements the Kernel Connection Multiplexor.

Kernel Connection Multiplexor (KCM) is a facility that provides a
message based interface over TCP for generic application protocols.
With KCM an application can efficiently send and receive application
protocol messages over TCP using datagram sockets.

For more information see the included Documentation/networking/kcm.txt

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h   |    6 +-
 include/net/kcm.h        |  125 +++
 include/uapi/linux/kcm.h |   40 +
 net/Kconfig              |    1 +
 net/Makefile             |    1 +
 net/kcm/Kconfig          |   10 +
 net/kcm/Makefile         |    3 +
 net/kcm/kcmsock.c        | 2016 ++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 2201 insertions(+), 1 deletion(-)
 create mode 100644 include/net/kcm.h
 create mode 100644 include/uapi/linux/kcm.h
 create mode 100644 net/kcm/Kconfig
 create mode 100644 net/kcm/Makefile
 create mode 100644 net/kcm/kcmsock.c

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index d834af22a460..73bf6c6a833b 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -200,7 +200,9 @@ struct ucred {
 #define AF_ALG		38	/* Algorithm sockets		*/
 #define AF_NFC		39	/* NFC sockets			*/
 #define AF_VSOCK	40	/* vSockets			*/
-#define AF_MAX		41	/* For now.. */
+#define AF_KCM		41	/* Kernel Connection Multiplexor*/
+
+#define AF_MAX		42	/* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC	AF_UNSPEC
@@ -246,6 +248,7 @@ struct ucred {
 #define PF_ALG		AF_ALG
 #define PF_NFC		AF_NFC
 #define PF_VSOCK	AF_VSOCK
+#define PF_KCM		AF_KCM
 #define PF_MAX		AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
@@ -323,6 +326,7 @@ struct ucred {
 #define SOL_CAIF	278
 #define SOL_ALG		279
 #define SOL_NFC		280
+#define SOL_KCM		281
 
 /* IPX options */
 #define IPX_TYPE	1
diff --git a/include/net/kcm.h b/include/net/kcm.h
new file mode 100644
index 000000000000..1bcae39070ec
--- /dev/null
+++ b/include/net/kcm.h
@@ -0,0 +1,125 @@
+/*
+ * Kernel Connection Multiplexor
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __NET_KCM_H_
+#define __NET_KCM_H_
+
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <uapi/linux/kcm.h>
+
+extern unsigned int kcm_net_id;
+
+struct kcm_tx_msg {
+	unsigned int sent;
+	unsigned int fragidx;
+	unsigned int frag_offset;
+	unsigned int msg_flags;
+	struct sk_buff *frag_skb;
+	struct sk_buff *last_skb;
+};
+
+struct kcm_rx_msg {
+	int full_len;
+	int accum_len;
+	int offset;
+};
+
+/* Socket structure for KCM client sockets */
+struct kcm_sock {
+	struct sock sk;
+	struct kcm_mux *mux;
+	struct list_head kcm_sock_list;
+	int index;
+	u32 done : 1;
+	struct work_struct done_work;
+
+	/* Transmit */
+	struct kcm_psock *tx_psock;
+	struct work_struct tx_work;
+	struct list_head wait_psock_list;
+	struct sk_buff *seq_skb;
+
+	/* Don't use bit fields here, these are set under different locks */
+	bool tx_wait;
+	bool tx_wait_more;
+
+	/* Receive */
+	struct kcm_psock *rx_psock;
+	struct list_head wait_rx_list; /* KCMs waiting for receiving */
+	bool rx_wait;
+	u32 rx_disabled : 1;
+};
+
+struct bpf_prog;
+
+/* Structure for an attached lower socket */
+struct kcm_psock {
+	struct sock *sk;
+	struct kcm_mux *mux;
+	int index;
+
+	u32 tx_stopped : 1;
+	u32 rx_stopped : 1;
+	u32 done : 1;
+	u32 unattaching : 1;
+
+	void (*save_state_change)(struct sock *sk);
+	void (*save_data_ready)(struct sock *sk);
+	void (*save_write_space)(struct sock *sk);
+
+	struct list_head psock_list;
+
+	/* Receive */
+	struct sk_buff *rx_skb_head;
+	struct sk_buff **rx_skb_nextp;
+	struct sk_buff *ready_rx_msg;
+	struct list_head psock_ready_list;
+	struct work_struct rx_work;
+	struct delayed_work rx_delayed_work;
+	struct bpf_prog *bpf_prog;
+	struct kcm_sock *rx_kcm;
+
+	/* Transmit */
+	struct kcm_sock *tx_kcm;
+	struct list_head psock_avail_list;
+};
+
+/* Per net MUX list */
+struct kcm_net {
+	struct mutex mutex;
+	struct list_head mux_list;
+	int count;
+};
+
+/* Structure for a MUX */
+struct kcm_mux {
+	struct list_head kcm_mux_list;
+	struct rcu_head rcu;
+	struct kcm_net *knet;
+
+	struct list_head kcm_socks;	/* All KCM sockets on MUX */
+	int kcm_socks_cnt;		/* Total KCM socket count for MUX */
+	struct list_head psocks;	/* List of all psocks on MUX */
+	int psocks_cnt;		/* Total attached sockets */
+
+	/* Receive */
+	spinlock_t rx_lock ____cacheline_aligned_in_smp;
+	struct list_head kcm_rx_waiters; /* KCMs waiting for receiving */
+	struct list_head psocks_ready;	/* List of psocks with a msg ready */
+	struct sk_buff_head rx_hold_queue;
+
+	/* Transmit */
+	spinlock_t  lock ____cacheline_aligned_in_smp;	/* TX and mux locking */
+	struct list_head psocks_avail;	/* List of available psocks */
+	struct list_head kcm_tx_waiters; /* KCMs waiting for a TX psock */
+};
+
+#endif /* __NET_KCM_H_ */
diff --git a/include/uapi/linux/kcm.h b/include/uapi/linux/kcm.h
new file mode 100644
index 000000000000..a5a530940b99
--- /dev/null
+++ b/include/uapi/linux/kcm.h
@@ -0,0 +1,40 @@
+/*
+ * Kernel Connection Multiplexor
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * User API to clone KCM sockets and attach transport socket to a KCM
+ * multiplexor.
+ */
+
+#ifndef KCM_KERNEL_H
+#define KCM_KERNEL_H
+
+struct kcm_attach {
+	int fd;
+	int bpf_fd;
+};
+
+struct kcm_unattach {
+	int fd;
+};
+
+struct kcm_clone {
+	int fd;
+};
+
+#define SIOCKCMATTACH	(SIOCPROTOPRIVATE + 0)
+#define SIOCKCMUNATTACH	(SIOCPROTOPRIVATE + 1)
+#define SIOCKCMCLONE	(SIOCPROTOPRIVATE + 2)
+
+#define KCMPROTO_CONNECTED	0
+
+/* Socket options */
+#define KCM_RECV_DISABLE	1
+
+#endif
+
diff --git a/net/Kconfig b/net/Kconfig
index 2760825e53fa..10640d5f8bee 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -360,6 +360,7 @@ source "net/can/Kconfig"
 source "net/irda/Kconfig"
 source "net/bluetooth/Kconfig"
 source "net/rxrpc/Kconfig"
+source "net/kcm/Kconfig"
 
 config FIB_RULES
 	bool
diff --git a/net/Makefile b/net/Makefile
index a5d04098dfce..81d14119eab5 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_IRDA)		+= irda/
 obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
 obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
+obj-$(CONFIG_AF_KCM)		+= kcm/
 obj-$(CONFIG_ATM)		+= atm/
 obj-$(CONFIG_L2TP)		+= l2tp/
 obj-$(CONFIG_DECNET)		+= decnet/
diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig
new file mode 100644
index 000000000000..5db94d940ecc
--- /dev/null
+++ b/net/kcm/Kconfig
@@ -0,0 +1,10 @@
+
+config AF_KCM
+	tristate "KCM sockets"
+	depends on INET
+	select BPF_SYSCALL
+	---help---
+	  KCM (Kernel Connection Multiplexor) sockets provide a method
+	  for multiplexing messages of a message based application
+	  protocol over kernel connectons (e.g. TCP connections).
+
diff --git a/net/kcm/Makefile b/net/kcm/Makefile
new file mode 100644
index 000000000000..cb525f7c5a13
--- /dev/null
+++ b/net/kcm/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_AF_KCM) += kcm.o
+
+kcm-y := kcmsock.o
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
new file mode 100644
index 000000000000..30ef69ac6b81
--- /dev/null
+++ b/net/kcm/kcmsock.c
@@ -0,0 +1,2016 @@
+#include <linux/bpf.h>
+#include <linux/errno.h>
+#include <linux/errqueue.h>
+#include <linux/file.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/poll.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/uaccess.h>
+#include <linux/workqueue.h>
+#include <net/kcm.h>
+#include <net/netns/generic.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <uapi/linux/kcm.h>
+
+unsigned int kcm_net_id;
+
+static struct kmem_cache *kcm_psockp __read_mostly;
+static struct kmem_cache *kcm_muxp __read_mostly;
+static struct workqueue_struct *kcm_wq;
+
+static inline struct kcm_sock *kcm_sk(const struct sock *sk)
+{
+	return (struct kcm_sock *)sk;
+}
+
+static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb)
+{
+	return (struct kcm_tx_msg *)skb->cb;
+}
+
+static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb)
+{
+	return (struct kcm_rx_msg *)((void *)skb->cb +
+				     offsetof(struct qdisc_skb_cb, data));
+}
+
+static void report_csk_error(struct sock *csk, int err)
+{
+	csk->sk_err = EPIPE;
+	csk->sk_error_report(csk);
+}
+
+/* Callback lock held */
+static void kcm_abort_rx_psock(struct kcm_psock *psock, int err,
+			       struct sk_buff *skb)
+{
+	struct sock *csk = psock->sk;
+
+	/* Unrecoverable error in receive */
+
+	if (psock->rx_stopped)
+		return;
+
+	psock->rx_stopped = 1;
+
+	/* Report an error on the lower socket */
+	report_csk_error(csk, err);
+}
+
+static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
+			       bool wakeup_kcm)
+{
+	struct sock *csk = psock->sk;
+	struct kcm_mux *mux = psock->mux;
+
+	/* Unrecoverable error in transmit */
+
+	spin_lock_bh(&mux->lock);
+
+	if (psock->tx_stopped) {
+		spin_unlock_bh(&mux->lock);
+		return;
+	}
+
+	psock->tx_stopped = 1;
+
+	if (!psock->tx_kcm) {
+		/* Take off psocks_avail list */
+		list_del(&psock->psock_avail_list);
+	} else if (wakeup_kcm) {
+		/* In this case psock is being aborted while outside of
+		 * write_msgs and psock is reserved. Schedule tx_work
+		 * to handle the failure there. Need to commit tx_stopped
+		 * before queuing work.
+		 */
+		smp_mb();
+
+		queue_work(kcm_wq, &psock->tx_kcm->tx_work);
+	}
+
+	spin_unlock_bh(&mux->lock);
+
+	/* Report error on lower socket */
+	report_csk_error(csk, err);
+}
+
+static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+
+/* KCM is ready to receive messages on its queue-- either the KCM is new or
+ * has become unblocked after being blocked on full socket buffer. Queue any
+ * pending ready messages on a psock. RX mux lock held.
+ */
+static void kcm_rcv_ready(struct kcm_sock *kcm)
+{
+	struct kcm_mux *mux = kcm->mux;
+	struct kcm_psock *psock;
+	struct sk_buff *skb;
+
+	if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled))
+		return;
+
+	while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) {
+		if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
+			/* Assuming buffer limit has been reached */
+			skb_queue_head(&mux->rx_hold_queue, skb);
+			WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
+			return;
+		}
+	}
+
+	while (!list_empty(&mux->psocks_ready)) {
+		psock = list_first_entry(&mux->psocks_ready, struct kcm_psock,
+					 psock_ready_list);
+
+		if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) {
+			/* Assuming buffer limit has been reached */
+			WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
+			return;
+		}
+
+		/* Consumed the ready message on the psock. Schedule rx_work to
+		 * get more messages.
+		 */
+		list_del(&psock->psock_ready_list);
+		psock->ready_rx_msg = NULL;
+
+		/* Commit clearing of ready_rx_msg for queuing work */
+		smp_mb();
+
+		queue_work(kcm_wq, &psock->rx_work);
+	}
+
+	/* Buffer limit is okay now, add to ready list */
+	list_add_tail(&kcm->wait_rx_list,
+		      &kcm->mux->kcm_rx_waiters);
+	kcm->rx_wait = true;
+}
+
+static void kcm_rfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	struct kcm_sock *kcm = kcm_sk(sk);
+	struct kcm_mux *mux = kcm->mux;
+	unsigned int len = skb->truesize;
+
+	sk_mem_uncharge(sk, len);
+	atomic_sub(len, &sk->sk_rmem_alloc);
+
+	/* For reading rx_wait and rx_psock without holding lock */
+	smp_mb__after_atomic();
+
+	if (!kcm->rx_wait && !kcm->rx_psock &&
+	    sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) {
+		spin_lock_bh(&mux->rx_lock);
+		kcm_rcv_ready(kcm);
+		spin_unlock_bh(&mux->rx_lock);
+	}
+}
+
+static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	struct sk_buff_head *list = &sk->sk_receive_queue;
+
+	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+		return -ENOMEM;
+
+	if (!sk_rmem_schedule(sk, skb, skb->truesize))
+		return -ENOBUFS;
+
+	skb->dev = NULL;
+
+	skb_orphan(skb);
+	skb->sk = sk;
+	skb->destructor = kcm_rfree;
+	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+	sk_mem_charge(sk, skb->truesize);
+
+	skb_queue_tail(list, skb);
+
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk);
+
+	return 0;
+}
+
+/* Requeue received messages for a kcm socket to other kcm sockets. This is
+ * called with a kcm socket is receive disabled.
+ * RX mux lock held.
+ */
+static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head)
+{
+	struct sk_buff *skb;
+	struct kcm_sock *kcm;
+
+	while ((skb = __skb_dequeue(head))) {
+		/* Reset destructor to avoid calling kcm_rcv_ready */
+		skb->destructor = sock_rfree;
+		skb_orphan(skb);
+try_again:
+		if (list_empty(&mux->kcm_rx_waiters)) {
+			skb_queue_tail(&mux->rx_hold_queue, skb);
+			continue;
+		}
+
+		kcm = list_first_entry(&mux->kcm_rx_waiters,
+				       struct kcm_sock, wait_rx_list);
+
+		if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
+			/* Should mean socket buffer full */
+			list_del(&kcm->wait_rx_list);
+			kcm->rx_wait = false;
+
+			/* Commit rx_wait to read in kcm_free */
+			smp_wmb();
+
+			goto try_again;
+		}
+	}
+}
+
+/* Lower sock lock held */
+static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
+				       struct sk_buff *head)
+{
+	struct kcm_mux *mux = psock->mux;
+	struct kcm_sock *kcm;
+
+	WARN_ON(psock->ready_rx_msg);
+
+	if (psock->rx_kcm)
+		return psock->rx_kcm;
+
+	spin_lock_bh(&mux->rx_lock);
+
+	if (psock->rx_kcm) {
+		spin_unlock_bh(&mux->rx_lock);
+		return psock->rx_kcm;
+	}
+
+	if (list_empty(&mux->kcm_rx_waiters)) {
+		psock->ready_rx_msg = head;
+		list_add_tail(&psock->psock_ready_list,
+			      &mux->psocks_ready);
+		spin_unlock_bh(&mux->rx_lock);
+		return NULL;
+	}
+
+	kcm = list_first_entry(&mux->kcm_rx_waiters,
+			       struct kcm_sock, wait_rx_list);
+	list_del(&kcm->wait_rx_list);
+	kcm->rx_wait = false;
+
+	psock->rx_kcm = kcm;
+	kcm->rx_psock = psock;
+
+	spin_unlock_bh(&mux->rx_lock);
+
+	return kcm;
+}
+
+static void kcm_done(struct kcm_sock *kcm);
+
+static void kcm_done_work(struct work_struct *w)
+{
+	kcm_done(container_of(w, struct kcm_sock, done_work));
+}
+
+/* Lower sock held */
+static void unreserve_rx_kcm(struct kcm_psock *psock,
+			     bool rcv_ready)
+{
+	struct kcm_sock *kcm = psock->rx_kcm;
+	struct kcm_mux *mux = psock->mux;
+
+	if (!kcm)
+		return;
+
+	spin_lock_bh(&mux->rx_lock);
+
+	psock->rx_kcm = NULL;
+	kcm->rx_psock = NULL;
+
+	/* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
+	 * kcm_rfree
+	 */
+	smp_mb();
+
+	if (unlikely(kcm->done)) {
+		spin_unlock_bh(&mux->rx_lock);
+
+		/* Need to run kcm_done in a task since we need to qcquire
+		 * callback locks which may already be held here.
+		 */
+		INIT_WORK(&kcm->done_work, kcm_done_work);
+		schedule_work(&kcm->done_work);
+		return;
+	}
+
+	if (unlikely(kcm->rx_disabled)) {
+		requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
+	} else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) {
+		/* Check for degenerative race with rx_wait that all
+		 * data was dequeued (accounted for in kcm_rfree).
+		 */
+		kcm_rcv_ready(kcm);
+	}
+	spin_unlock_bh(&mux->rx_lock);
+}
+
+/* Macro to invoke filter function. */
+#define KCM_RUN_FILTER(prog, ctx) \
+	(*prog->bpf_func)(ctx, prog->insnsi)
+
+/* Lower socket lock held */
+static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+			unsigned int orig_offset, size_t orig_len)
+{
+	struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data;
+	struct kcm_rx_msg *rxm;
+	struct kcm_sock *kcm;
+	struct sk_buff *head, *skb;
+	size_t eaten = 0, cand_len;
+	ssize_t extra;
+	int err;
+	bool cloned_orig = false;
+
+	if (psock->ready_rx_msg)
+		return 0;
+
+	head = psock->rx_skb_head;
+	if (head) {
+		/* Message already in progress */
+
+		if (unlikely(orig_offset)) {
+			/* Getting data with a non-zero offset when a message is
+			 * in progress is not expected. If it does happen, we
+			 * need to clone and pull since we can't deal with
+			 * offsets in the skbs for a message expect in the head.
+			 */
+			orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
+			if (!orig_skb) {
+				desc->error = -ENOMEM;
+				return 0;
+			}
+			if (!pskb_pull(orig_skb, orig_offset)) {
+				kfree_skb(orig_skb);
+				desc->error = -ENOMEM;
+				return 0;
+			}
+			cloned_orig = true;
+			orig_offset = 0;
+		}
+
+		if (!psock->rx_skb_nextp) {
+			/* We are going to append to the frags_list of head.
+			 * Need to unshare the frag_list.
+			 */
+			err = skb_unclone(head, GFP_ATOMIC);
+			if (err) {
+				desc->error = err;
+				return 0;
+			}
+
+			if (unlikely(skb_shinfo(head)->frag_list)) {
+				/* We can't append to an sk_buff that already
+				 * has a frag_list. We create a new head, point
+				 * the frag_list of that to the old head, and
+				 * then are able to use the old head->next for
+				 * appending to the message.
+				 */
+				if (WARN_ON(head->next)) {
+					desc->error = -EINVAL;
+					return 0;
+				}
+
+				skb = alloc_skb(0, GFP_ATOMIC);
+				if (!skb) {
+					desc->error = -ENOMEM;
+					return 0;
+				}
+				skb->len = head->len;
+				skb->data_len = head->len;
+				skb->truesize = head->truesize;
+				*kcm_rx_msg(skb) = *kcm_rx_msg(head);
+				psock->rx_skb_nextp = &head->next;
+				skb_shinfo(skb)->frag_list = head;
+				psock->rx_skb_head = skb;
+				head = skb;
+			} else {
+				psock->rx_skb_nextp =
+				    &skb_shinfo(head)->frag_list;
+			}
+		}
+	}
+
+	while (eaten < orig_len) {
+		/* Always clone since we will consume something */
+		skb = skb_clone(orig_skb, GFP_ATOMIC);
+		if (!skb) {
+			desc->error = -ENOMEM;
+			break;
+		}
+
+		cand_len = orig_len - eaten;
+
+		head = psock->rx_skb_head;
+		if (!head) {
+			head = skb;
+			psock->rx_skb_head = head;
+			/* Will set rx_skb_nextp on next packet if needed */
+			psock->rx_skb_nextp = NULL;
+			rxm = kcm_rx_msg(head);
+			memset(rxm, 0, sizeof(*rxm));
+			rxm->offset = orig_offset + eaten;
+		} else {
+			/* Unclone since we may be appending to an skb that we
+			 * already share a frag_list with.
+			 */
+			err = skb_unclone(skb, GFP_ATOMIC);
+			if (err) {
+				desc->error = err;
+				break;
+			}
+
+			rxm = kcm_rx_msg(head);
+			*psock->rx_skb_nextp = skb;
+			psock->rx_skb_nextp = &skb->next;
+			head->data_len += skb->len;
+			head->len += skb->len;
+			head->truesize += skb->truesize;
+		}
+
+		if (!rxm->full_len) {
+			ssize_t len;
+
+			len = KCM_RUN_FILTER(psock->bpf_prog, head);
+
+			if (!len) {
+				/* Need more header to determine length */
+				rxm->accum_len += cand_len;
+				eaten += cand_len;
+				WARN_ON(eaten != orig_len);
+				break;
+			} else if (len <= (ssize_t)head->len -
+					  skb->len - rxm->offset) {
+				/* Length must be into new skb (and also
+				 * greater than zero)
+				 */
+				desc->error = -EPROTO;
+				psock->rx_skb_head = NULL;
+				kcm_abort_rx_psock(psock, EPROTO, head);
+				break;
+			}
+
+			rxm->full_len = len;
+		}
+
+		extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len;
+
+		if (extra < 0) {
+			/* Message not complete yet. */
+			rxm->accum_len += cand_len;
+			eaten += cand_len;
+			WARN_ON(eaten != orig_len);
+			break;
+		}
+
+		/* Positive extra indicates ore bytes than needed for the
+		 * message
+		 */
+
+		WARN_ON(extra > cand_len);
+
+		eaten += (cand_len - extra);
+
+		/* Hurray, we have a new message! */
+		psock->rx_skb_head = NULL;
+
+try_queue:
+		kcm = reserve_rx_kcm(psock, head);
+		if (!kcm) {
+			/* Unable to reserve a KCM, message is held in psock. */
+			break;
+		}
+
+		if (kcm_queue_rcv_skb(&kcm->sk, head)) {
+			/* Should mean socket buffer full */
+			unreserve_rx_kcm(psock, false);
+			goto try_queue;
+		}
+	}
+
+	if (cloned_orig)
+		kfree_skb(orig_skb);
+
+	return eaten;
+}
+
+/* Called with lock held on lower socket */
+static int psock_tcp_read_sock(struct kcm_psock *psock)
+{
+	read_descriptor_t desc;
+
+	desc.arg.data = psock;
+	desc.error = 0;
+	desc.count = 1; /* give more than one skb per call */
+
+	/* sk should be locked here, so okay to do tcp_read_sock */
+	tcp_read_sock(psock->sk, &desc, kcm_tcp_recv);
+
+	unreserve_rx_kcm(psock, true);
+
+	return desc.error;
+}
+
+/* Lower sock lock held */
+static void psock_tcp_data_ready(struct sock *sk)
+{
+	struct kcm_psock *psock;
+
+	read_lock_bh(&sk->sk_callback_lock);
+
+	psock = (struct kcm_psock *)sk->sk_user_data;
+	if (unlikely(!psock || psock->rx_stopped))
+		goto out;
+
+	if (psock->ready_rx_msg)
+		goto out;
+
+	if (psock_tcp_read_sock(psock) == -ENOMEM)
+		queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
+
+out:
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void do_psock_rx_work(struct kcm_psock *psock)
+{
+	read_descriptor_t rd_desc;
+	struct sock *csk = psock->sk;
+
+	/* We need the read lock to synchronize with psock_tcp_data_ready. We
+	 * need the socket lock for calling tcp_read_sock.
+	 */
+	lock_sock(csk);
+	read_lock_bh(&csk->sk_callback_lock);
+
+	if (unlikely(csk->sk_user_data != psock))
+		goto out;
+
+	if (unlikely(psock->rx_stopped))
+		goto out;
+
+	if (psock->ready_rx_msg)
+		goto out;
+
+	rd_desc.arg.data = psock;
+
+	if (psock_tcp_read_sock(psock) == -ENOMEM)
+		queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
+
+out:
+	read_unlock_bh(&csk->sk_callback_lock);
+	release_sock(csk);
+}
+
+static void psock_rx_work(struct work_struct *w)
+{
+	do_psock_rx_work(container_of(w, struct kcm_psock, rx_work));
+}
+
+static void psock_rx_delayed_work(struct work_struct *w)
+{
+	do_psock_rx_work(container_of(w, struct kcm_psock,
+				      rx_delayed_work.work));
+}
+
+static void psock_tcp_state_change(struct sock *sk)
+{
+	/* TCP only does a POLLIN for a half close. Do a POLLHUP here
+	 * since application will normally not poll with POLLIN
+	 * on the TCP sockets.
+	 */
+
+	report_csk_error(sk, EPIPE);
+}
+
+static void psock_tcp_write_space(struct sock *sk)
+{
+	struct kcm_psock *psock;
+	struct kcm_mux *mux;
+	struct kcm_sock *kcm;
+
+	read_lock_bh(&sk->sk_callback_lock);
+
+	psock = (struct kcm_psock *)sk->sk_user_data;
+	if (unlikely(!psock))
+		goto out;
+
+	mux = psock->mux;
+
+	spin_lock_bh(&mux->lock);
+
+	/* Check if the socket is reserved so someone is waiting for sending. */
+	kcm = psock->tx_kcm;
+	if (kcm)
+		queue_work(kcm_wq, &kcm->tx_work);
+
+	spin_unlock_bh(&mux->lock);
+out:
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void unreserve_psock(struct kcm_sock *kcm);
+
+/* kcm sock is locked. */
+static struct kcm_psock *reserve_psock(struct kcm_sock *kcm)
+{
+	struct kcm_mux *mux = kcm->mux;
+	struct kcm_psock *psock;
+
+	psock = kcm->tx_psock;
+
+	smp_rmb(); /* Must read tx_psock before tx_wait */
+
+	if (psock) {
+		WARN_ON(kcm->tx_wait);
+		if (unlikely(psock->tx_stopped))
+			unreserve_psock(kcm);
+		else
+			return kcm->tx_psock;
+	}
+
+	spin_lock_bh(&mux->lock);
+
+	/* Check again under lock to see if psock was reserved for this
+	 * psock via psock_unreserve.
+	 */
+	psock = kcm->tx_psock;
+	if (unlikely(psock)) {
+		WARN_ON(kcm->tx_wait);
+		spin_unlock_bh(&mux->lock);
+		return kcm->tx_psock;
+	}
+
+	if (!list_empty(&mux->psocks_avail)) {
+		psock = list_first_entry(&mux->psocks_avail,
+					 struct kcm_psock,
+					 psock_avail_list);
+		list_del(&psock->psock_avail_list);
+		if (kcm->tx_wait) {
+			list_del(&kcm->wait_psock_list);
+			kcm->tx_wait = false;
+		}
+		kcm->tx_psock = psock;
+		psock->tx_kcm = kcm;
+	} else if (!kcm->tx_wait) {
+		list_add_tail(&kcm->wait_psock_list,
+			      &mux->kcm_tx_waiters);
+		kcm->tx_wait = true;
+	}
+
+	spin_unlock_bh(&mux->lock);
+
+	return psock;
+}
+
+/* mux lock held */
+static void psock_now_avail(struct kcm_psock *psock)
+{
+	struct kcm_mux *mux = psock->mux;
+	struct kcm_sock *kcm;
+
+	if (list_empty(&mux->kcm_tx_waiters)) {
+		list_add_tail(&psock->psock_avail_list,
+			      &mux->psocks_avail);
+	} else {
+		kcm = list_first_entry(&mux->kcm_tx_waiters,
+				       struct kcm_sock,
+				       wait_psock_list);
+		list_del(&kcm->wait_psock_list);
+		kcm->tx_wait = false;
+		psock->tx_kcm = kcm;
+
+		/* Commit before changing tx_psock since that is read in
+		 * reserve_psock before queuing work.
+		 */
+		smp_mb();
+
+		kcm->tx_psock = psock;
+		queue_work(kcm_wq, &kcm->tx_work);
+	}
+}
+
+/* kcm sock is locked. */
+static void unreserve_psock(struct kcm_sock *kcm)
+{
+	struct kcm_psock *psock;
+	struct kcm_mux *mux = kcm->mux;
+
+	spin_lock_bh(&mux->lock);
+
+	psock = kcm->tx_psock;
+
+	if (WARN_ON(!psock)) {
+		spin_unlock_bh(&mux->lock);
+		return;
+	}
+
+	smp_rmb(); /* Read tx_psock before tx_wait */
+
+	WARN_ON(kcm->tx_wait);
+
+	kcm->tx_psock = NULL;
+	psock->tx_kcm = NULL;
+
+	if (unlikely(psock->tx_stopped)) {
+		if (psock->done) {
+			/* Deferred free */
+			list_del(&psock->psock_list);
+			mux->psocks_cnt--;
+			sock_put(psock->sk);
+			fput(psock->sk->sk_socket->file);
+			kmem_cache_free(kcm_psockp, psock);
+		}
+
+		/* Don't put back on available list */
+
+		spin_unlock_bh(&mux->lock);
+
+		return;
+	}
+
+	psock_now_avail(psock);
+
+	spin_unlock_bh(&mux->lock);
+}
+
+/* Write any messages ready on the kcm socket.  Called with kcm sock lock
+ * held.  Return bytes actually sent or error.
+ */
+static int kcm_write_msgs(struct kcm_sock *kcm)
+{
+	struct sock *sk = &kcm->sk;
+	struct kcm_psock *psock;
+	struct sk_buff *skb, *head;
+	struct kcm_tx_msg *txm;
+	unsigned short fragidx, frag_offset;
+	unsigned int sent, total_sent = 0;
+	int ret = 0;
+
+	kcm->tx_wait_more = false;
+	psock = kcm->tx_psock;
+	if (unlikely(psock && psock->tx_stopped)) {
+		/* A reserved psock was aborted asynchronously. Unreserve
+		 * it and we'll retry the message.
+		 */
+		unreserve_psock(kcm);
+		if (skb_queue_empty(&sk->sk_write_queue))
+			return 0;
+
+		kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0;
+
+	} else if (skb_queue_empty(&sk->sk_write_queue)) {
+		return 0;
+	}
+
+	head = skb_peek(&sk->sk_write_queue);
+	txm = kcm_tx_msg(head);
+
+	if (txm->sent) {
+		/* Send of first skbuff in queue already in progress */
+		if (WARN_ON(!psock)) {
+			ret = -EINVAL;
+			goto out;
+		}
+		sent = txm->sent;
+		frag_offset = txm->frag_offset;
+		fragidx = txm->fragidx;
+		skb = txm->frag_skb;
+
+		goto do_frag;
+	}
+
+try_again:
+	psock = reserve_psock(kcm);
+	if (!psock)
+		goto out;
+
+	do {
+		skb = head;
+		txm = kcm_tx_msg(head);
+		sent = 0;
+
+do_frag_list:
+		if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags;
+		     fragidx++) {
+			skb_frag_t *frag;
+
+			frag_offset = 0;
+do_frag:
+			frag = &skb_shinfo(skb)->frags[fragidx];
+			if (WARN_ON(!frag->size)) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			ret = kernel_sendpage(psock->sk->sk_socket,
+					      frag->page.p,
+					      frag->page_offset + frag_offset,
+					      frag->size - frag_offset,
+					      MSG_DONTWAIT);
+			if (ret <= 0) {
+				if (ret == -EAGAIN) {
+					/* Save state to try again when there's
+					 * write space on the socket
+					 */
+					txm->sent = sent;
+					txm->frag_offset = frag_offset;
+					txm->fragidx = fragidx;
+					txm->frag_skb = skb;
+
+					ret = 0;
+					goto out;
+				}
+
+				/* Hard failure in sending message, abort this
+				 * psock since it has lost framing
+				 * synchonization and retry sending the
+				 * message from the beginning.
+				 */
+				kcm_abort_tx_psock(psock, ret ? -ret : EPIPE,
+						   true);
+				unreserve_psock(kcm);
+
+				txm->sent = 0;
+				ret = 0;
+
+				goto try_again;
+			}
+
+			sent += ret;
+			frag_offset += ret;
+			if (frag_offset < frag->size) {
+				/* Not finished with this frag */
+				goto do_frag;
+			}
+		}
+
+		if (skb == head) {
+			if (skb_has_frag_list(skb)) {
+				skb = skb_shinfo(skb)->frag_list;
+				goto do_frag_list;
+			}
+		} else if (skb->next) {
+			skb = skb->next;
+			goto do_frag_list;
+		}
+
+		/* Successfully sent the whole packet, account for it. */
+		skb_dequeue(&sk->sk_write_queue);
+		kfree_skb(head);
+		sk->sk_wmem_queued -= sent;
+		total_sent += sent;
+	} while ((head = skb_peek(&sk->sk_write_queue)));
+out:
+	if (!head) {
+		/* Done with all queued messages. */
+		WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
+		unreserve_psock(kcm);
+	}
+
+	/* Check if write space is available */
+	sk->sk_write_space(sk);
+
+	return total_sent ? : ret;
+}
+
+static void kcm_tx_work(struct work_struct *w)
+{
+	struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work);
+	struct sock *sk = &kcm->sk;
+	int err;
+
+	lock_sock(sk);
+
+	/* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx
+	 * aborts
+	 */
+	err = kcm_write_msgs(kcm);
+	if (err < 0) {
+		/* Hard failure in write, report error on KCM socket */
+		pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err);
+		report_csk_error(&kcm->sk, -err);
+		goto out;
+	}
+
+	/* Primarily for SOCK_SEQPACKET sockets */
+	if (likely(sk->sk_socket) &&
+	    test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+		clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		sk->sk_write_space(sk);
+	}
+
+out:
+	release_sock(sk);
+}
+
+static void kcm_push(struct kcm_sock *kcm)
+{
+	if (kcm->tx_wait_more)
+		kcm_write_msgs(kcm);
+}
+
+static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct kcm_sock *kcm = kcm_sk(sk);
+	struct sk_buff *skb = NULL, *head = NULL;
+	size_t copy, copied = 0;
+	long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+	int eor = (sock->type == SOCK_DGRAM) ?
+		  !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR);
+	int err = -EPIPE;
+
+	lock_sock(sk);
+
+	/* Per tcp_sendmsg this should be in poll */
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+	if (sk->sk_err)
+		goto out_error;
+
+	if (kcm->seq_skb) {
+		/* Previously opened message */
+		head = kcm->seq_skb;
+		skb = kcm_tx_msg(head)->last_skb;
+		goto start;
+	}
+
+	/* Call the sk_stream functions to manage the sndbuf mem. */
+	if (!sk_stream_memory_free(sk)) {
+		kcm_push(kcm);
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		err = sk_stream_wait_memory(sk, &timeo);
+		if (err)
+			goto out_error;
+	}
+
+	/* New message, alloc head skb */
+	head = alloc_skb(0, sk->sk_allocation);
+	while (!head) {
+		kcm_push(kcm);
+		err = sk_stream_wait_memory(sk, &timeo);
+		if (err)
+			goto out_error;
+
+		head = alloc_skb(0, sk->sk_allocation);
+	}
+
+	skb = head;
+
+	/* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
+	 * csum_and_copy_from_iter from skb_do_copy_data_nocache.
+	 */
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+start:
+	while (msg_data_left(msg)) {
+		bool merge = true;
+		int i = skb_shinfo(skb)->nr_frags;
+		struct page_frag *pfrag = sk_page_frag(sk);
+
+		if (!sk_page_frag_refill(sk, pfrag))
+			goto wait_for_memory;
+
+		if (!skb_can_coalesce(skb, i, pfrag->page,
+				      pfrag->offset)) {
+			if (i == MAX_SKB_FRAGS) {
+				struct sk_buff *tskb;
+
+				tskb = alloc_skb(0, sk->sk_allocation);
+				if (!tskb)
+					goto wait_for_memory;
+
+				if (head == skb)
+					skb_shinfo(head)->frag_list = tskb;
+				else
+					skb->next = tskb;
+
+				skb = tskb;
+				skb->ip_summed = CHECKSUM_UNNECESSARY;
+				continue;
+			}
+			merge = false;
+		}
+
+		copy = min_t(int, msg_data_left(msg),
+			     pfrag->size - pfrag->offset);
+
+		if (!sk_wmem_schedule(sk, copy))
+			goto wait_for_memory;
+
+		err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
+					       pfrag->page,
+					       pfrag->offset,
+					       copy);
+		if (err)
+			goto out_error;
+
+		/* Update the skb. */
+		if (merge) {
+			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
+		} else {
+			skb_fill_page_desc(skb, i, pfrag->page,
+					   pfrag->offset, copy);
+			get_page(pfrag->page);
+		}
+
+		pfrag->offset += copy;
+		copied += copy;
+		if (head != skb) {
+			head->len += copy;
+			head->data_len += copy;
+		}
+
+		continue;
+
+wait_for_memory:
+		kcm_push(kcm);
+		err = sk_stream_wait_memory(sk, &timeo);
+		if (err)
+			goto out_error;
+	}
+
+	if (eor) {
+		bool not_busy = skb_queue_empty(&sk->sk_write_queue);
+
+		/* Message complete, queue it on send buffer */
+		__skb_queue_tail(&sk->sk_write_queue, head);
+		kcm->seq_skb = NULL;
+
+		if (msg->msg_flags & MSG_BATCH) {
+			kcm->tx_wait_more = true;
+		} else if (kcm->tx_wait_more || not_busy) {
+			err = kcm_write_msgs(kcm);
+			if (err < 0) {
+				/* We got a hard error in write_msgs but have
+				 * already queued this message. Report an error
+				 * in the socket, but don't affect return value
+				 * from sendmsg
+				 */
+				pr_warn("KCM: Hard failure on kcm_write_msgs\n");
+				report_csk_error(&kcm->sk, -err);
+			}
+		}
+	} else {
+		/* Message not complete, save state */
+partial_message:
+		kcm->seq_skb = head;
+		kcm_tx_msg(head)->last_skb = skb;
+	}
+
+	release_sock(sk);
+	return copied;
+
+out_error:
+	kcm_push(kcm);
+
+	if (copied && sock->type == SOCK_SEQPACKET) {
+		/* Wrote some bytes before encountering an
+		 * error, return partial success.
+		 */
+		goto partial_message;
+	}
+
+	if (head != kcm->seq_skb)
+		kfree_skb(head);
+
+	err = sk_stream_error(sk, msg->msg_flags, err);
+
+	/* make sure we wake any epoll edge trigger waiter */
+	if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+		sk->sk_write_space(sk);
+
+	release_sock(sk);
+	return err;
+}
+
+static struct sk_buff *kcm_wait_data(struct sock *sk, int flags,
+				     long timeo, int *err)
+{
+	struct sk_buff *skb;
+
+	while (!(skb = skb_peek(&sk->sk_receive_queue))) {
+		if (sk->sk_err) {
+			*err = sock_error(sk);
+			return NULL;
+		}
+
+		if (sock_flag(sk, SOCK_DONE))
+			return NULL;
+
+		if ((flags & MSG_DONTWAIT) || !timeo) {
+			*err = -EAGAIN;
+			return NULL;
+		}
+
+		sk_wait_data(sk, &timeo, NULL);
+
+		/* Handle signals */
+		if (signal_pending(current)) {
+			*err = sock_intr_errno(timeo);
+			return NULL;
+		}
+	}
+
+	return skb;
+}
+
+static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
+		       size_t len, int flags)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+	long timeo;
+	struct kcm_rx_msg *rxm;
+	int copied = 0;
+	struct sk_buff *skb;
+
+	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+	lock_sock(sk);
+
+	skb = kcm_wait_data(sk, flags, timeo, &err);
+	if (!skb)
+		goto out;
+
+	/* Okay, have a message on the receive queue */
+
+	rxm = kcm_rx_msg(skb);
+
+	if (len > rxm->full_len)
+		len = rxm->full_len;
+
+	err = skb_copy_datagram_msg(skb, rxm->offset, msg, len);
+	if (err < 0)
+		goto out;
+
+	copied = len;
+	if (likely(!(flags & MSG_PEEK))) {
+		if (copied < rxm->full_len) {
+			if (sock->type == SOCK_DGRAM) {
+				/* Truncated message */
+				msg->msg_flags |= MSG_TRUNC;
+				goto msg_finished;
+			}
+			rxm->offset += copied;
+			rxm->full_len -= copied;
+		} else {
+msg_finished:
+			/* Finished with message */
+			msg->msg_flags |= MSG_EOR;
+			skb_unlink(skb, &sk->sk_receive_queue);
+			kfree_skb(skb);
+		}
+	}
+
+out:
+	release_sock(sk);
+
+	return copied ? : err;
+}
+
+/* kcm sock lock held */
+static void kcm_recv_disable(struct kcm_sock *kcm)
+{
+	struct kcm_mux *mux = kcm->mux;
+
+	if (kcm->rx_disabled)
+		return;
+
+	spin_lock_bh(&mux->rx_lock);
+
+	kcm->rx_disabled = 1;
+
+	/* If a psock is reserved we'll do cleanup in unreserve */
+	if (!kcm->rx_psock) {
+		if (kcm->rx_wait) {
+			list_del(&kcm->wait_rx_list);
+			kcm->rx_wait = false;
+		}
+
+		requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
+	}
+
+	spin_unlock_bh(&mux->rx_lock);
+}
+
+/* kcm sock lock held */
+static void kcm_recv_enable(struct kcm_sock *kcm)
+{
+	struct kcm_mux *mux = kcm->mux;
+
+	if (!kcm->rx_disabled)
+		return;
+
+	spin_lock_bh(&mux->rx_lock);
+
+	kcm->rx_disabled = 0;
+	kcm_rcv_ready(kcm);
+
+	spin_unlock_bh(&mux->rx_lock);
+}
+
+static int kcm_setsockopt(struct socket *sock, int level, int optname,
+			  char __user *optval, unsigned int optlen)
+{
+	struct kcm_sock *kcm = kcm_sk(sock->sk);
+	int val, valbool;
+	int err = 0;
+
+	if (level != SOL_KCM)
+		return -ENOPROTOOPT;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EINVAL;
+
+	valbool = val ? 1 : 0;
+
+	switch (optname) {
+	case KCM_RECV_DISABLE:
+		lock_sock(&kcm->sk);
+		if (valbool)
+			kcm_recv_disable(kcm);
+		else
+			kcm_recv_enable(kcm);
+		release_sock(&kcm->sk);
+		break;
+	default:
+		err = -ENOPROTOOPT;
+	}
+
+	return err;
+}
+
+static int kcm_getsockopt(struct socket *sock, int level, int optname,
+			  char __user *optval, int __user *optlen)
+{
+	struct kcm_sock *kcm = kcm_sk(sock->sk);
+	int val, len;
+
+	if (level != SOL_KCM)
+		return -ENOPROTOOPT;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+	if (len < 0)
+		return -EINVAL;
+
+	switch (optname) {
+	case KCM_RECV_DISABLE:
+		val = kcm->rx_disabled;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+	return 0;
+}
+
+static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
+{
+	struct kcm_sock *tkcm;
+	struct list_head *head;
+	int index = 0;
+
+	/* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
+	 * we set sk_state, otherwise epoll_wait always returns right away with
+	 * POLLHUP
+	 */
+	kcm->sk.sk_state = TCP_ESTABLISHED;
+
+	/* Add to mux's kcm sockets list */
+	kcm->mux = mux;
+	spin_lock_bh(&mux->lock);
+
+	head = &mux->kcm_socks;
+	list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) {
+		if (tkcm->index != index)
+			break;
+		head = &tkcm->kcm_sock_list;
+		index++;
+	}
+
+	list_add(&kcm->kcm_sock_list, head);
+	kcm->index = index;
+
+	mux->kcm_socks_cnt++;
+	spin_unlock_bh(&mux->lock);
+
+	INIT_WORK(&kcm->tx_work, kcm_tx_work);
+
+	spin_lock_bh(&mux->rx_lock);
+	kcm_rcv_ready(kcm);
+	spin_unlock_bh(&mux->rx_lock);
+}
+
+static int kcm_attach(struct socket *sock, struct socket *csock,
+		      struct bpf_prog *prog)
+{
+	struct kcm_sock *kcm = kcm_sk(sock->sk);
+	struct kcm_mux *mux = kcm->mux;
+	struct sock *csk;
+	struct kcm_psock *psock = NULL, *tpsock;
+	struct list_head *head;
+	int index = 0;
+
+	if (csock->ops->family != PF_INET &&
+	    csock->ops->family != PF_INET6)
+		return -EINVAL;
+
+	csk = csock->sk;
+	if (!csk)
+		return -EINVAL;
+
+	/* Only support TCP for now */
+	if (csk->sk_protocol != IPPROTO_TCP)
+		return -EINVAL;
+
+	psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
+	if (!psock)
+		return -ENOMEM;
+
+	psock->mux = mux;
+	psock->sk = csk;
+	psock->bpf_prog = prog;
+	INIT_WORK(&psock->rx_work, psock_rx_work);
+	INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work);
+
+	sock_hold(csk);
+
+	write_lock_bh(&csk->sk_callback_lock);
+	psock->save_data_ready = csk->sk_data_ready;
+	psock->save_write_space = csk->sk_write_space;
+	psock->save_state_change = csk->sk_state_change;
+	csk->sk_user_data = psock;
+	csk->sk_data_ready = psock_tcp_data_ready;
+	csk->sk_write_space = psock_tcp_write_space;
+	csk->sk_state_change = psock_tcp_state_change;
+	write_unlock_bh(&csk->sk_callback_lock);
+
+	/* Finished initialization, now add the psock to the MUX. */
+	spin_lock_bh(&mux->lock);
+	head = &mux->psocks;
+	list_for_each_entry(tpsock, &mux->psocks, psock_list) {
+		if (tpsock->index != index)
+			break;
+		head = &tpsock->psock_list;
+		index++;
+	}
+
+	list_add(&psock->psock_list, head);
+	psock->index = index;
+
+	mux->psocks_cnt++;
+	psock_now_avail(psock);
+	spin_unlock_bh(&mux->lock);
+
+	/* Schedule RX work in case there are already bytes queued */
+	queue_work(kcm_wq, &psock->rx_work);
+
+	return 0;
+}
+
+static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
+{
+	struct socket *csock;
+	struct bpf_prog *prog;
+	int err;
+
+	csock = sockfd_lookup(info->fd, &err);
+	if (!csock)
+		return -ENOENT;
+
+	prog = bpf_prog_get(info->bpf_fd);
+	if (IS_ERR(prog)) {
+		err = PTR_ERR(prog);
+		goto out;
+	}
+
+	if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
+		bpf_prog_put(prog);
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = kcm_attach(sock, csock, prog);
+	if (err) {
+		bpf_prog_put(prog);
+		goto out;
+	}
+
+	/* Keep reference on file also */
+
+	return 0;
+out:
+	fput(csock->file);
+	return err;
+}
+
+static void kcm_unattach(struct kcm_psock *psock)
+{
+	struct sock *csk = psock->sk;
+	struct kcm_mux *mux = psock->mux;
+
+	/* Stop getting callbacks from TCP socket. After this there should
+	 * be no way to reserve a kcm for this psock.
+	 */
+	write_lock_bh(&csk->sk_callback_lock);
+	csk->sk_user_data = NULL;
+	csk->sk_data_ready = psock->save_data_ready;
+	csk->sk_write_space = psock->save_write_space;
+	csk->sk_state_change = psock->save_state_change;
+	psock->rx_stopped = 1;
+
+	if (WARN_ON(psock->rx_kcm)) {
+		write_unlock_bh(&csk->sk_callback_lock);
+		return;
+	}
+
+	spin_lock_bh(&mux->rx_lock);
+
+	/* Stop receiver activities. After this point psock should not be
+	 * able to get onto ready list either through callbacks or work.
+	 */
+	if (psock->ready_rx_msg) {
+		list_del(&psock->psock_ready_list);
+		kfree_skb(psock->ready_rx_msg);
+		psock->ready_rx_msg = NULL;
+	}
+
+	spin_unlock_bh(&mux->rx_lock);
+
+	write_unlock_bh(&csk->sk_callback_lock);
+
+	cancel_work_sync(&psock->rx_work);
+	cancel_delayed_work_sync(&psock->rx_delayed_work);
+
+	bpf_prog_put(psock->bpf_prog);
+
+	kfree_skb(psock->rx_skb_head);
+	psock->rx_skb_head = NULL;
+
+	spin_lock_bh(&mux->lock);
+
+	if (psock->tx_kcm) {
+		/* psock was reserved.  Just mark it finished and we will clean
+		 * up in the kcm paths, we need kcm lock which can not be
+		 * acquired here.
+		 */
+		spin_unlock_bh(&mux->lock);
+
+		/* We are unattaching a socket that is reserved. Abort the
+		 * socket since we may be out of sync in sending on it. We need
+		 * to do this without the mux lock.
+		 */
+		kcm_abort_tx_psock(psock, EPIPE, false);
+
+		spin_lock_bh(&mux->lock);
+		if (!psock->tx_kcm) {
+			/* psock now unreserved in window mux was unlocked */
+			goto no_reserved;
+		}
+		psock->done = 1;
+
+		/* Commit done before queuing work to process it */
+		smp_mb();
+
+		/* Queue tx work to make sure psock->done is handled */
+		queue_work(kcm_wq, &psock->tx_kcm->tx_work);
+		spin_unlock_bh(&mux->lock);
+	} else {
+no_reserved:
+		if (!psock->tx_stopped)
+			list_del(&psock->psock_avail_list);
+		list_del(&psock->psock_list);
+		mux->psocks_cnt--;
+		spin_unlock_bh(&mux->lock);
+
+		sock_put(csk);
+		fput(csk->sk_socket->file);
+		kmem_cache_free(kcm_psockp, psock);
+	}
+}
+
+static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
+{
+	struct kcm_sock *kcm = kcm_sk(sock->sk);
+	struct kcm_mux *mux = kcm->mux;
+	struct kcm_psock *psock;
+	struct socket *csock;
+	struct sock *csk;
+	int err;
+
+	csock = sockfd_lookup(info->fd, &err);
+	if (!csock)
+		return -ENOENT;
+
+	csk = csock->sk;
+	if (!csk) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = -ENOENT;
+
+	spin_lock_bh(&mux->lock);
+
+	list_for_each_entry(psock, &mux->psocks, psock_list) {
+		if (psock->sk != csk)
+			continue;
+
+		/* Found the matching psock */
+
+		if (psock->unattaching || WARN_ON(psock->done)) {
+			err = -EALREADY;
+			break;
+		}
+
+		psock->unattaching = 1;
+
+		spin_unlock_bh(&mux->lock);
+
+		kcm_unattach(psock);
+
+		err = 0;
+		goto out;
+	}
+
+	spin_unlock_bh(&mux->lock);
+
+out:
+	fput(csock->file);
+	return err;
+}
+
+static struct proto kcm_proto = {
+	.name	= "KCM",
+	.owner	= THIS_MODULE,
+	.obj_size = sizeof(struct kcm_sock),
+};
+
+/* Clone a kcm socket. */
+static int kcm_clone(struct socket *osock, struct kcm_clone *info,
+		     struct socket **newsockp)
+{
+	struct socket *newsock;
+	struct sock *newsk;
+	struct file *newfile;
+	int err, newfd;
+
+	err = -ENFILE;
+	newsock = sock_alloc();
+	if (!newsock)
+		goto out;
+
+	newsock->type = osock->type;
+	newsock->ops = osock->ops;
+
+	__module_get(newsock->ops->owner);
+
+	newfd = get_unused_fd_flags(0);
+	if (unlikely(newfd < 0)) {
+		err = newfd;
+		goto out_fd_fail;
+	}
+
+	newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
+	if (unlikely(IS_ERR(newfile))) {
+		err = PTR_ERR(newfile);
+		goto out_sock_alloc_fail;
+	}
+
+	newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
+			 &kcm_proto, true);
+	if (!newsk) {
+		err = -ENOMEM;
+		goto out_sk_alloc_fail;
+	}
+
+	sock_init_data(newsock, newsk);
+	init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
+
+	fd_install(newfd, newfile);
+	*newsockp = newsock;
+	info->fd = newfd;
+
+	return 0;
+
+out_sk_alloc_fail:
+	fput(newfile);
+out_sock_alloc_fail:
+	put_unused_fd(newfd);
+out_fd_fail:
+	sock_release(newsock);
+out:
+	return err;
+}
+
+static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	int err;
+
+	switch (cmd) {
+	case SIOCKCMATTACH: {
+		struct kcm_attach info;
+
+		if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
+			err = -EFAULT;
+
+		err = kcm_attach_ioctl(sock, &info);
+
+		break;
+	}
+	case SIOCKCMUNATTACH: {
+		struct kcm_unattach info;
+
+		if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
+			err = -EFAULT;
+
+		err = kcm_unattach_ioctl(sock, &info);
+
+		break;
+	}
+	case SIOCKCMCLONE: {
+		struct kcm_clone info;
+		struct socket *newsock = NULL;
+
+		if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
+			err = -EFAULT;
+
+		err = kcm_clone(sock, &info, &newsock);
+
+		if (!err) {
+			if (copy_to_user((void __user *)arg, &info,
+					 sizeof(info))) {
+				err = -EFAULT;
+				sock_release(newsock);
+			}
+		}
+
+		break;
+	}
+	default:
+		err = -ENOIOCTLCMD;
+		break;
+	}
+
+	return err;
+}
+
+static void free_mux(struct rcu_head *rcu)
+{
+	struct kcm_mux *mux = container_of(rcu,
+	    struct kcm_mux, rcu);
+
+	kmem_cache_free(kcm_muxp, mux);
+}
+
+static void release_mux(struct kcm_mux *mux)
+{
+	struct kcm_net *knet = mux->knet;
+	struct kcm_psock *psock, *tmp_psock;
+
+	/* Release psocks */
+	list_for_each_entry_safe(psock, tmp_psock,
+				 &mux->psocks, psock_list) {
+		if (!WARN_ON(psock->unattaching))
+			kcm_unattach(psock);
+	}
+
+	if (WARN_ON(mux->psocks_cnt))
+		return;
+
+	__skb_queue_purge(&mux->rx_hold_queue);
+
+	mutex_lock(&knet->mutex);
+	list_del_rcu(&mux->kcm_mux_list);
+	knet->count--;
+	mutex_unlock(&knet->mutex);
+
+	call_rcu(&mux->rcu, free_mux);
+}
+
+static void kcm_done(struct kcm_sock *kcm)
+{
+	struct kcm_mux *mux = kcm->mux;
+	struct sock *sk = &kcm->sk;
+	int socks_cnt;
+
+	spin_lock_bh(&mux->rx_lock);
+	if (kcm->rx_psock) {
+		/* Cleanup in unreserve_rx_kcm */
+		WARN_ON(kcm->done);
+		kcm->rx_disabled = 1;
+		kcm->done = 1;
+		spin_unlock_bh(&mux->rx_lock);
+		return;
+	}
+
+	if (kcm->rx_wait) {
+		list_del(&kcm->wait_rx_list);
+		kcm->rx_wait = false;
+	}
+	/* Move any pending receive messages to other kcm sockets */
+	requeue_rx_msgs(mux, &sk->sk_receive_queue);
+
+	spin_unlock_bh(&mux->rx_lock);
+
+	if (WARN_ON(sk_rmem_alloc_get(sk)))
+		return;
+
+	/* Detach from MUX */
+	spin_lock_bh(&mux->lock);
+
+	list_del(&kcm->kcm_sock_list);
+	mux->kcm_socks_cnt--;
+	socks_cnt = mux->kcm_socks_cnt;
+
+	spin_unlock_bh(&mux->lock);
+
+	if (!socks_cnt) {
+		/* We are done with the mux now. */
+		release_mux(mux);
+	}
+
+	WARN_ON(kcm->rx_wait);
+
+	sock_put(&kcm->sk);
+}
+
+/* Called by kcm_release to close a KCM socket.
+ * If this is the last KCM socket on the MUX, destroy the MUX.
+ */
+static int kcm_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct kcm_sock *kcm;
+	struct kcm_mux *mux;
+	struct kcm_psock *psock;
+
+	if (!sk)
+		return 0;
+
+	kcm = kcm_sk(sk);
+	mux = kcm->mux;
+
+	sock_orphan(sk);
+	kfree_skb(kcm->seq_skb);
+
+	lock_sock(sk);
+	/* Purge queue under lock to avoid race condition with tx_work trying
+	 * to act when queue is nonempty. If tx_work runs after this point
+	 * it will just return.
+	 */
+	__skb_queue_purge(&sk->sk_write_queue);
+	release_sock(sk);
+
+	spin_lock_bh(&mux->lock);
+	if (kcm->tx_wait) {
+		/* Take of tx_wait list, after this point there should be no way
+		 * that a psock will be assigned to this kcm.
+		 */
+		list_del(&kcm->wait_psock_list);
+		kcm->tx_wait = false;
+	}
+	spin_unlock_bh(&mux->lock);
+
+	/* Cancel work. After this point there should be no outside references
+	 * to the kcm socket.
+	 */
+	cancel_work_sync(&kcm->tx_work);
+
+	lock_sock(sk);
+	psock = kcm->tx_psock;
+	if (psock) {
+		/* A psock was reserved, so we need to kill it since it
+		 * may already have some bytes queued from a message. We
+		 * need to do this after removing kcm from tx_wait list.
+		 */
+		kcm_abort_tx_psock(psock, EPIPE, false);
+		unreserve_psock(kcm);
+	}
+	release_sock(sk);
+
+	WARN_ON(kcm->tx_wait);
+	WARN_ON(kcm->tx_psock);
+
+	sock->sk = NULL;
+
+	kcm_done(kcm);
+
+	return 0;
+}
+
+static const struct proto_ops kcm_ops = {
+	.family =	PF_KCM,
+	.owner =	THIS_MODULE,
+	.release =	kcm_release,
+	.bind =		sock_no_bind,
+	.connect =	sock_no_connect,
+	.socketpair =	sock_no_socketpair,
+	.accept =	sock_no_accept,
+	.getname =	sock_no_getname,
+	.poll =		datagram_poll,
+	.ioctl =	kcm_ioctl,
+	.listen =	sock_no_listen,
+	.shutdown =	sock_no_shutdown,
+	.setsockopt =	kcm_setsockopt,
+	.getsockopt =	kcm_getsockopt,
+	.sendmsg =	kcm_sendmsg,
+	.recvmsg =	kcm_recvmsg,
+	.mmap =		sock_no_mmap,
+	.sendpage =	sock_no_sendpage,
+};
+
+/* Create proto operation for kcm sockets */
+static int kcm_create(struct net *net, struct socket *sock,
+		      int protocol, int kern)
+{
+	struct kcm_net *knet = net_generic(net, kcm_net_id);
+	struct sock *sk;
+	struct kcm_mux *mux;
+
+	switch (sock->type) {
+	case SOCK_DGRAM:
+	case SOCK_SEQPACKET:
+		sock->ops = &kcm_ops;
+		break;
+	default:
+		return -ESOCKTNOSUPPORT;
+	}
+
+	if (protocol != KCMPROTO_CONNECTED)
+		return -EPROTONOSUPPORT;
+
+	sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern);
+	if (!sk)
+		return -ENOMEM;
+
+	/* Allocate a kcm mux, shared between KCM sockets */
+	mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL);
+	if (!mux) {
+		sk_free(sk);
+		return -ENOMEM;
+	}
+
+	spin_lock_init(&mux->lock);
+	spin_lock_init(&mux->rx_lock);
+	INIT_LIST_HEAD(&mux->kcm_socks);
+	INIT_LIST_HEAD(&mux->kcm_rx_waiters);
+	INIT_LIST_HEAD(&mux->kcm_tx_waiters);
+
+	INIT_LIST_HEAD(&mux->psocks);
+	INIT_LIST_HEAD(&mux->psocks_ready);
+	INIT_LIST_HEAD(&mux->psocks_avail);
+
+	mux->knet = knet;
+
+	/* Add new MUX to list */
+	mutex_lock(&knet->mutex);
+	list_add_rcu(&mux->kcm_mux_list, &knet->mux_list);
+	knet->count++;
+	mutex_unlock(&knet->mutex);
+
+	skb_queue_head_init(&mux->rx_hold_queue);
+
+	/* Init KCM socket */
+	sock_init_data(sock, sk);
+	init_kcm_sock(kcm_sk(sk), mux);
+
+	return 0;
+}
+
+static struct net_proto_family kcm_family_ops = {
+	.family = PF_KCM,
+	.create = kcm_create,
+	.owner  = THIS_MODULE,
+};
+
+static __net_init int kcm_init_net(struct net *net)
+{
+	struct kcm_net *knet = net_generic(net, kcm_net_id);
+
+	INIT_LIST_HEAD_RCU(&knet->mux_list);
+	mutex_init(&knet->mutex);
+
+	return 0;
+}
+
+static __net_exit void kcm_exit_net(struct net *net)
+{
+	struct kcm_net *knet = net_generic(net, kcm_net_id);
+
+	/* All KCM sockets should be closed at this point, which should mean
+	 * that all multiplexors and psocks have been destroyed.
+	 */
+	WARN_ON(!list_empty(&knet->mux_list));
+}
+
+static struct pernet_operations kcm_net_ops = {
+	.init = kcm_init_net,
+	.exit = kcm_exit_net,
+	.id   = &kcm_net_id,
+	.size = sizeof(struct kcm_net),
+};
+
+static int __init kcm_init(void)
+{
+	int err = -ENOMEM;
+
+	kcm_muxp = kmem_cache_create("kcm_mux_cache",
+				     sizeof(struct kcm_mux), 0,
+				     SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+	if (!kcm_muxp)
+		goto fail;
+
+	kcm_psockp = kmem_cache_create("kcm_psock_cache",
+				       sizeof(struct kcm_psock), 0,
+					SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+	if (!kcm_psockp)
+		goto fail;
+
+	kcm_wq = create_singlethread_workqueue("kkcmd");
+	if (!kcm_wq)
+		goto fail;
+
+	err = proto_register(&kcm_proto, 1);
+	if (err)
+		goto fail;
+
+	err = sock_register(&kcm_family_ops);
+	if (err)
+		goto sock_register_fail;
+
+	err = register_pernet_device(&kcm_net_ops);
+	if (err)
+		goto net_ops_fail;
+
+	return 0;
+
+net_ops_fail:
+	sock_unregister(PF_KCM);
+
+sock_register_fail:
+	proto_unregister(&kcm_proto);
+
+fail:
+	kmem_cache_destroy(kcm_muxp);
+	kmem_cache_destroy(kcm_psockp);
+
+	if (kcm_wq)
+		destroy_workqueue(kcm_wq);
+
+	return err;
+}
+
+static void __exit kcm_exit(void)
+{
+	unregister_pernet_device(&kcm_net_ops);
+	sock_unregister(PF_KCM);
+	proto_unregister(&kcm_proto);
+	destroy_workqueue(kcm_wq);
+
+	kmem_cache_destroy(kcm_muxp);
+	kmem_cache_destroy(kcm_psockp);
+}
+
+module_init(kcm_init);
+module_exit(kcm_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_KCM);
+
-- 
cgit v1.2.3


From 87aca73737e379f079993802d2c43606f7c5d26c Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Thu, 21 Jan 2016 09:20:12 +0100
Subject: NFC: microread: Drop platform data header file

Originally I only wanted to drop the unneeded inclusion of
<linux/i2c.h>, but then noticed that struct
microread_nfc_platform_data isn't actually used, and
MICROREAD_DRIVER_NAME is redefined in the only file where it is used,
so we can get rid of the header file and dead code altogether.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Cc: Lauro Ramos Venancio <lauro.venancio@openbossa.org>
Cc: Aloisio Almeida Jr <aloisio.almeida@openbossa.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 MAINTAINERS                             |  1 -
 drivers/nfc/microread/i2c.c             |  8 --------
 include/linux/platform_data/microread.h | 35 ---------------------------------
 3 files changed, 44 deletions(-)
 delete mode 100644 include/linux/platform_data/microread.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 355e1c85bad6..5e4e50ff87bb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7644,7 +7644,6 @@ F:	net/nfc/
 F:	include/net/nfc/
 F:	include/uapi/linux/nfc.h
 F:	drivers/nfc/
-F:	include/linux/platform_data/microread.h
 F:	include/linux/platform_data/nfcmrvl.h
 F:	include/linux/platform_data/nxp-nci.h
 F:	include/linux/platform_data/pn544.h
diff --git a/drivers/nfc/microread/i2c.c b/drivers/nfc/microread/i2c.c
index 918e8f2eac47..e0e8afd27849 100644
--- a/drivers/nfc/microread/i2c.c
+++ b/drivers/nfc/microread/i2c.c
@@ -246,18 +246,10 @@ static int microread_i2c_probe(struct i2c_client *client,
 			       const struct i2c_device_id *id)
 {
 	struct microread_i2c_phy *phy;
-	struct microread_nfc_platform_data *pdata =
-		dev_get_platdata(&client->dev);
 	int r;
 
 	dev_dbg(&client->dev, "client %p\n", client);
 
-	if (!pdata) {
-		nfc_err(&client->dev, "client %p: missing platform data\n",
-			client);
-		return -EINVAL;
-	}
-
 	phy = devm_kzalloc(&client->dev, sizeof(struct microread_i2c_phy),
 			   GFP_KERNEL);
 	if (!phy)
diff --git a/include/linux/platform_data/microread.h b/include/linux/platform_data/microread.h
deleted file mode 100644
index ca13992089b8..000000000000
--- a/include/linux/platform_data/microread.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Driver include for the Inside Secure microread NFC Chip.
- *
- * Copyright (C) 2011 Tieto Poland
- * Copyright (C) 2012 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _MICROREAD_H
-#define _MICROREAD_H
-
-#include <linux/i2c.h>
-
-#define MICROREAD_DRIVER_NAME	"microread"
-
-/* board config platform data for microread */
-struct microread_nfc_platform_data {
-	unsigned int rst_gpio;
-	unsigned int irq_gpio;
-	unsigned int ioh_gpio;
-};
-
-#endif /* _MICROREAD_H */
-- 
cgit v1.2.3


From 59e6473980f321c16299e12db69d1fabc2644a6f Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Mar 2016 07:16:07 -0800
Subject: libnvdimm, pmem: clear poison on write

If a write is directed at a known bad block perform the following:

1/ write the data

2/ send a clear poison command

3/ invalidate the poison out of the cache hierarchy

Cc: <x86@kernel.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h |  5 +++++
 drivers/nvdimm/bus.c        | 46 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/nvdimm/nd.h         |  2 ++
 drivers/nvdimm/pmem.c       | 29 +++++++++++++++++++++++++++-
 include/linux/pmem.h        | 19 +++++++++++++++++++
 5 files changed, 100 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index c57fd1ea9689..bf8b35d2035a 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -137,6 +137,11 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
 	arch_wb_cache_pmem(addr, size);
 }
 
+static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
+{
+	clflush_cache_range((void __force *) addr, size);
+}
+
 static inline bool __arch_has_wmb_pmem(void)
 {
 	/*
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index cb6fd64b13e3..33557481d452 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -159,6 +159,52 @@ void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event)
 }
 EXPORT_SYMBOL_GPL(nvdimm_region_notify);
 
+long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
+		unsigned int len)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	struct nvdimm_bus_descriptor *nd_desc;
+	struct nd_cmd_clear_error clear_err;
+	struct nd_cmd_ars_cap ars_cap;
+	u32 clear_err_unit, mask;
+	int cmd_rc, rc;
+
+	if (!nvdimm_bus)
+		return -ENXIO;
+
+	nd_desc = nvdimm_bus->nd_desc;
+	if (!nd_desc->ndctl)
+		return -ENXIO;
+
+	memset(&ars_cap, 0, sizeof(ars_cap));
+	ars_cap.address = phys;
+	ars_cap.length = len;
+	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap,
+			sizeof(ars_cap), &cmd_rc);
+	if (rc < 0)
+		return rc;
+	if (cmd_rc < 0)
+		return cmd_rc;
+	clear_err_unit = ars_cap.clear_err_unit;
+	if (!clear_err_unit || !is_power_of_2(clear_err_unit))
+		return -ENXIO;
+
+	mask = clear_err_unit - 1;
+	if ((phys | len) & mask)
+		return -ENXIO;
+	memset(&clear_err, 0, sizeof(clear_err));
+	clear_err.address = phys;
+	clear_err.length = len;
+	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err,
+			sizeof(clear_err), &cmd_rc);
+	if (rc < 0)
+		return rc;
+	if (cmd_rc < 0)
+		return cmd_rc;
+	return clear_err.cleared;
+}
+EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
+
 static struct bus_type nvdimm_bus_type = {
 	.name = "nd",
 	.uevent = nvdimm_bus_uevent,
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 78b82f6dd191..1799bd97a9ce 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -186,6 +186,8 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
 int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
 int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
 		void *buf, size_t len);
+long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
+		unsigned int len);
 struct nd_btt *to_nd_btt(struct device *dev);
 
 struct nd_gen_sb {
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index e7b86a7fca0a..adc387236fe7 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -62,17 +62,40 @@ static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len)
 	return false;
 }
 
+static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
+		unsigned int len)
+{
+	struct device *dev = disk_to_dev(pmem->pmem_disk);
+	sector_t sector;
+	long cleared;
+
+	sector = (offset - pmem->data_offset) / 512;
+	cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
+
+	if (cleared > 0 && cleared / 512) {
+		dev_dbg(dev, "%s: %llx clear %ld sector%s\n",
+				__func__, (unsigned long long) sector,
+				cleared / 512, cleared / 512 > 1 ? "s" : "");
+		badblocks_clear(&pmem->bb, sector, cleared / 512);
+	}
+	invalidate_pmem(pmem->virt_addr + offset, len);
+}
+
 static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 			unsigned int len, unsigned int off, int rw,
 			sector_t sector)
 {
 	int rc = 0;
+	bool bad_pmem = false;
 	void *mem = kmap_atomic(page);
 	phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
 	void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
 
+	if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
+		bad_pmem = true;
+
 	if (rw == READ) {
-		if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
+		if (unlikely(bad_pmem))
 			rc = -EIO;
 		else {
 			memcpy_from_pmem(mem + off, pmem_addr, len);
@@ -81,6 +104,10 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 	} else {
 		flush_dcache_page(page);
 		memcpy_to_pmem(pmem_addr, mem + off, len);
+		if (unlikely(bad_pmem)) {
+			pmem_clear_poison(pmem, pmem_off, len);
+			memcpy_to_pmem(pmem_addr, mem + off, len);
+		}
 	}
 
 	kunmap_atomic(mem);
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 7c3d11a6b4ad..3ec5309e29f3 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -58,6 +58,11 @@ static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
 {
 	BUG();
 }
+
+static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
+{
+	BUG();
+}
 #endif
 
 /*
@@ -185,6 +190,20 @@ static inline void clear_pmem(void __pmem *addr, size_t size)
 		default_clear_pmem(addr, size);
 }
 
+/**
+ * invalidate_pmem - flush a pmem range from the cache hierarchy
+ * @addr:	virtual start address
+ * @size:	bytes to invalidate (internally aligned to cache line size)
+ *
+ * For platforms that support clearing poison this flushes any poisoned
+ * ranges out of the cache
+ */
+static inline void invalidate_pmem(void __pmem *addr, size_t size)
+{
+	if (arch_has_pmem_api())
+		arch_invalidate_pmem(addr, size);
+}
+
 /**
  * wb_cache_pmem - write back processor cache for PMEM memory range
  * @addr:	virtual start address
-- 
cgit v1.2.3


From 2793a23aacbd754dbbb5cb75093deb7e4103bace Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 9 Mar 2016 21:58:32 -0500
Subject: net: validate variable length ll headers

Netdevice parameter hard_header_len is variously interpreted both as
an upper and lower bound on link layer header length. The field is
used as upper bound when reserving room at allocation, as lower bound
when validating user input in PF_PACKET.

Clarify the definition to be maximum header length. For validation
of untrusted headers, add an optional validate member to header_ops.

Allow bypassing of validation by passing CAP_SYS_RAWIO, for instance
for deliberate testing of corrupt input. In this case, pad trailing
bytes, as some device drivers expect completely initialized headers.

See also http://comments.gmane.org/gmane.linux.network/401064

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index efe7cec111fa..fd30cb545c45 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -268,6 +268,7 @@ struct header_ops {
 	void	(*cache_update)(struct hh_cache *hh,
 				const struct net_device *dev,
 				const unsigned char *haddr);
+	bool	(*validate)(const char *ll_header, unsigned int len);
 };
 
 /* These flag bits are private to the generic network queueing
@@ -1459,8 +1460,7 @@ enum netdev_priv_flags {
  *	@dma:		DMA channel
  *	@mtu:		Interface MTU value
  *	@type:		Interface hardware type
- *	@hard_header_len: Hardware header length, which means that this is the
- *			  minimum size of a packet.
+ *	@hard_header_len: Maximum hardware header length.
  *
  *	@needed_headroom: Extra headroom the hardware may need, but not in all
  *			  cases can this be guaranteed
@@ -2687,6 +2687,24 @@ static inline int dev_parse_header(const struct sk_buff *skb,
 	return dev->header_ops->parse(skb, haddr);
 }
 
+/* ll_header must have at least hard_header_len allocated */
+static inline bool dev_validate_header(const struct net_device *dev,
+				       char *ll_header, int len)
+{
+	if (likely(len >= dev->hard_header_len))
+		return true;
+
+	if (capable(CAP_SYS_RAWIO)) {
+		memset(ll_header + len, 0, dev->hard_header_len - len);
+		return true;
+	}
+
+	if (dev->header_ops && dev->header_ops->validate)
+		return dev->header_ops->validate(ll_header, len);
+
+	return false;
+}
+
 typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len);
 int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
 static inline int unregister_gifconf(unsigned int family)
-- 
cgit v1.2.3


From 153fefbf34e5079c2dd69490f5f23373758d2e9c Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Mon, 7 Mar 2016 18:51:45 +0200
Subject: net/mlx5_core: Create anchor of last flow table

Create an empty flow table in the end of NIC rx namesapce.
Adding this flow table simplify the implementation of "forward
to next prio" rules.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 40 ++++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h |  6 ++++
 include/linux/mlx5/fs.h                           |  1 +
 3 files changed, 46 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 6f68dba8d7ed..a2781ee0fd15 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -77,6 +77,9 @@
 #define KERNEL_NUM_PRIOS 1
 #define KENREL_MIN_LEVEL 2
 
+#define ANCHOR_MAX_FT 1
+#define ANCHOR_NUM_PRIOS 1
+#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
 struct node_caps {
 	size_t	arr_sz;
 	long	*caps;
@@ -92,7 +95,7 @@ static struct init_tree_node {
 	int max_ft;
 } root_fs = {
 	.type = FS_TYPE_NAMESPACE,
-	.ar_size = 3,
+	.ar_size = 4,
 	.children = (struct init_tree_node[]) {
 		ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
 			 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
@@ -108,6 +111,8 @@ static struct init_tree_node {
 					  FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
 					  FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
 			 ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))),
+		ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
+			 ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_MAX_FT))),
 	}
 };
 
@@ -1126,6 +1131,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 	case MLX5_FLOW_NAMESPACE_BYPASS:
 	case MLX5_FLOW_NAMESPACE_KERNEL:
 	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
+	case MLX5_FLOW_NAMESPACE_ANCHOR:
 		prio = type;
 		break;
 	case MLX5_FLOW_NAMESPACE_FDB:
@@ -1351,6 +1357,25 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
 	}
 }
 
+#define ANCHOR_PRIO 0
+#define ANCHOR_SIZE 1
+static int create_anchor_flow_table(struct mlx5_core_dev
+							*dev)
+{
+	struct mlx5_flow_namespace *ns = NULL;
+	struct mlx5_flow_table *ft;
+
+	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR);
+	if (!ns)
+		return -EINVAL;
+	ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE);
+	if (IS_ERR(ft)) {
+		mlx5_core_err(dev, "Failed to create last anchor flow table");
+		return PTR_ERR(ft);
+	}
+	return 0;
+}
+
 static int init_root_ns(struct mlx5_core_dev *dev)
 {
 
@@ -1363,6 +1388,9 @@ static int init_root_ns(struct mlx5_core_dev *dev)
 
 	set_prio_attrs(dev->priv.root_ns);
 
+	if (create_anchor_flow_table(dev))
+		goto cleanup;
+
 	return 0;
 
 cleanup:
@@ -1392,6 +1420,15 @@ static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev,
 	root_ns = NULL;
 }
 
+static void destroy_flow_tables(struct fs_prio *prio)
+{
+	struct mlx5_flow_table *iter;
+	struct mlx5_flow_table *tmp;
+
+	fs_for_each_ft_safe(iter, tmp, prio)
+		mlx5_destroy_flow_table(iter);
+}
+
 static void cleanup_root_ns(struct mlx5_core_dev *dev)
 {
 	struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns;
@@ -1420,6 +1457,7 @@ static void cleanup_root_ns(struct mlx5_core_dev *dev)
 							 list);
 
 				fs_get_obj(obj_iter_prio2, iter_prio2);
+				destroy_flow_tables(obj_iter_prio2);
 				if (tree_remove_node(iter_prio2)) {
 					mlx5_core_warn(dev,
 						       "Priority %d wasn't destroyed, refcount > 1\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 00245fd7e4bc..574a903180ed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -142,6 +142,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
 #define fs_list_for_each_entry(pos, root)		\
 	list_for_each_entry(pos, root, node.list)
 
+#define fs_list_for_each_entry_safe(pos, tmp, root)		\
+	list_for_each_entry_safe(pos, tmp, root, node.list)
+
 #define fs_for_each_ns_or_ft_reverse(pos, prio)				\
 	list_for_each_entry_reverse(pos, &(prio)->node.children, list)
 
@@ -157,6 +160,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
 #define fs_for_each_ft(pos, prio)			\
 	fs_list_for_each_entry(pos, &(prio)->node.children)
 
+#define fs_for_each_ft_safe(pos, tmp, prio)			\
+	fs_list_for_each_entry_safe(pos, tmp, &(prio)->node.children)
+
 #define fs_for_each_fg(pos, ft)			\
 	fs_list_for_each_entry(pos, &(ft)->node.children)
 
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 8230caa3fb6e..72adf53fdcba 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -52,6 +52,7 @@ enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_BYPASS,
 	MLX5_FLOW_NAMESPACE_KERNEL,
 	MLX5_FLOW_NAMESPACE_LEFTOVERS,
+	MLX5_FLOW_NAMESPACE_ANCHOR,
 	MLX5_FLOW_NAMESPACE_FDB,
 };
 
-- 
cgit v1.2.3


From b3638e1a76648dbd482cc5a8f27eb6948cc3bc86 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Mon, 7 Mar 2016 18:51:46 +0200
Subject: net/mlx5_core: Introduce forward to next priority action

Add support to create flow rule that forward packets
to the first flow table in the next priority (next priority
could be the first priority in the next namespace or the
next priority in the same namespace).
This feature could be used for DONT_TRAP rules or rules
that only want to mark the packet with flow tag.

In order to do it optimally, each flow table has list
of all rules that point to this flow table,
when a flow table is destroyed/created, we update the list
head correspondingly.

This kind of rule is created when destination is NULL and
action is MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 185 ++++++++++++++++++++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h |   9 ++
 include/linux/mlx5/fs.h                           |   4 +
 include/linux/mlx5/mlx5_ifc.h                     |   3 +-
 4 files changed, 188 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index a2781ee0fd15..bf3446794bd5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -201,8 +201,10 @@ static void tree_put_node(struct fs_node *node)
 
 static int tree_remove_node(struct fs_node *node)
 {
-	if (atomic_read(&node->refcount) > 1)
-		return -EPERM;
+	if (atomic_read(&node->refcount) > 1) {
+		atomic_dec(&node->refcount);
+		return -EEXIST;
+	}
 	tree_put_node(node);
 	return 0;
 }
@@ -365,6 +367,11 @@ static void del_rule(struct fs_node *node)
 	memcpy(match_value, fte->val, sizeof(fte->val));
 	fs_get_obj(ft, fg->node.parent);
 	list_del(&rule->node.list);
+	if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+		mutex_lock(&rule->dest_attr.ft->lock);
+		list_del(&rule->next_ft);
+		mutex_unlock(&rule->dest_attr.ft->lock);
+	}
 	fte->dests_size--;
 	if (fte->dests_size) {
 		err = mlx5_cmd_update_fte(dev, ft,
@@ -470,6 +477,8 @@ static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte,
 	ft->node.type = FS_TYPE_FLOW_TABLE;
 	ft->type = table_type;
 	ft->max_fte = max_fte;
+	INIT_LIST_HEAD(&ft->fwd_rules);
+	mutex_init(&ft->lock);
 
 	return ft;
 }
@@ -606,9 +615,63 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 	return err;
 }
 
+static int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
+					struct mlx5_flow_destination *dest)
+{
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_group *fg;
+	struct fs_fte *fte;
+	int err = 0;
+
+	fs_get_obj(fte, rule->node.parent);
+	if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+		return -EINVAL;
+	lock_ref_node(&fte->node);
+	fs_get_obj(fg, fte->node.parent);
+	fs_get_obj(ft, fg->node.parent);
+
+	memcpy(&rule->dest_attr, dest, sizeof(*dest));
+	err = mlx5_cmd_update_fte(get_dev(&ft->node),
+				  ft, fg->id, fte);
+	unlock_ref_node(&fte->node);
+
+	return err;
+}
+
+/* Modify/set FWD rules that point on old_next_ft to point on new_next_ft  */
+static int connect_fwd_rules(struct mlx5_core_dev *dev,
+			     struct mlx5_flow_table *new_next_ft,
+			     struct mlx5_flow_table *old_next_ft)
+{
+	struct mlx5_flow_destination dest;
+	struct mlx5_flow_rule *iter;
+	int err = 0;
+
+	/* new_next_ft and old_next_ft could be NULL only
+	 * when we create/destroy the anchor flow table.
+	 */
+	if (!new_next_ft || !old_next_ft)
+		return 0;
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = new_next_ft;
+
+	mutex_lock(&old_next_ft->lock);
+	list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
+	mutex_unlock(&old_next_ft->lock);
+	list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
+		err = mlx5_modify_rule_destination(iter, &dest);
+		if (err)
+			pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
+			       new_next_ft->id);
+	}
+	return 0;
+}
+
 static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
 			      struct fs_prio *prio)
 {
+	struct mlx5_flow_table *next_ft;
 	int err = 0;
 
 	/* Connect_prev_fts and update_root_ft_create are mutually exclusive */
@@ -617,6 +680,11 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table
 		err = connect_prev_fts(dev, ft, prio);
 		if (err)
 			return err;
+
+		next_ft = find_next_chained_ft(prio);
+		err = connect_fwd_rules(dev, ft, next_ft);
+		if (err)
+			return err;
 	}
 
 	if (MLX5_CAP_FLOWTABLE(dev,
@@ -767,6 +835,7 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
 	if (!rule)
 		return NULL;
 
+	INIT_LIST_HEAD(&rule->next_ft);
 	rule->node.type = FS_TYPE_FLOW_DEST;
 	memcpy(&rule->dest_attr, dest, sizeof(*dest));
 
@@ -787,9 +856,14 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte,
 		return ERR_PTR(-ENOMEM);
 
 	fs_get_obj(ft, fg->node.parent);
-	/* Add dest to dests list- added as first element after the head */
+	/* Add dest to dests list- we need flow tables to be in the
+	 * end of the list for forward to next prio rules.
+	 */
 	tree_init_node(&rule->node, 1, del_rule);
-	list_add_tail(&rule->node.list, &fte->node.children);
+	if (dest && dest->type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+		list_add(&rule->node.list, &fte->node.children);
+	else
+		list_add_tail(&rule->node.list, &fte->node.children);
 	fte->dests_size++;
 	if (fte->dests_size == 1)
 		err = mlx5_cmd_create_fte(get_dev(&ft->node),
@@ -908,6 +982,25 @@ out:
 	return fg;
 }
 
+static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
+					     struct mlx5_flow_destination *dest)
+{
+	struct mlx5_flow_rule *rule;
+
+	list_for_each_entry(rule, &fte->node.children, node.list) {
+		if (rule->dest_attr.type == dest->type) {
+			if ((dest->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+			     dest->vport_num == rule->dest_attr.vport_num) ||
+			    (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
+			     dest->ft == rule->dest_attr.ft) ||
+			    (dest->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+			     dest->tir_num == rule->dest_attr.tir_num))
+				return rule;
+		}
+	}
+	return NULL;
+}
+
 static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
 					  u32 *match_value,
 					  u8 action,
@@ -924,6 +1017,13 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
 		nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
 		if (compare_match_value(&fg->mask, match_value, &fte->val) &&
 		    action == fte->action && flow_tag == fte->flow_tag) {
+			rule = find_flow_rule(fte, dest);
+			if (rule) {
+				atomic_inc(&rule->node.refcount);
+				unlock_ref_node(&fte->node);
+				unlock_ref_node(&fg->node);
+				return rule;
+			}
 			rule = add_rule_fte(fte, fg, dest);
 			unlock_ref_node(&fte->node);
 			if (IS_ERR(rule))
@@ -989,14 +1089,14 @@ static struct mlx5_flow_rule *add_rule_to_auto_fg(struct mlx5_flow_table *ft,
 	return rule;
 }
 
-struct mlx5_flow_rule *
-mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-		   u8 match_criteria_enable,
-		   u32 *match_criteria,
-		   u32 *match_value,
-		   u32 action,
-		   u32 flow_tag,
-		   struct mlx5_flow_destination *dest)
+static struct mlx5_flow_rule *
+_mlx5_add_flow_rule(struct mlx5_flow_table *ft,
+		    u8 match_criteria_enable,
+		    u32 *match_criteria,
+		    u32 *match_value,
+		    u32 action,
+		    u32 flow_tag,
+		    struct mlx5_flow_destination *dest)
 {
 	struct mlx5_flow_group *g;
 	struct mlx5_flow_rule *rule;
@@ -1019,6 +1119,63 @@ unlock:
 	unlock_ref_node(&ft->node);
 	return rule;
 }
+
+static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
+{
+	return ((ft->type == FS_FT_NIC_RX) &&
+		(MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
+}
+
+struct mlx5_flow_rule *
+mlx5_add_flow_rule(struct mlx5_flow_table *ft,
+		   u8 match_criteria_enable,
+		   u32 *match_criteria,
+		   u32 *match_value,
+		   u32 action,
+		   u32 flow_tag,
+		   struct mlx5_flow_destination *dest)
+{
+	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+	struct mlx5_flow_destination gen_dest;
+	struct mlx5_flow_table *next_ft = NULL;
+	struct mlx5_flow_rule *rule = NULL;
+	u32 sw_action = action;
+	struct fs_prio *prio;
+
+	fs_get_obj(prio, ft->node.parent);
+	if (action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+		if (!fwd_next_prio_supported(ft))
+			return ERR_PTR(-EOPNOTSUPP);
+		if (dest)
+			return ERR_PTR(-EINVAL);
+		mutex_lock(&root->chain_lock);
+		next_ft = find_next_chained_ft(prio);
+		if (next_ft) {
+			gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+			gen_dest.ft = next_ft;
+			dest = &gen_dest;
+			action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+		} else {
+			mutex_unlock(&root->chain_lock);
+			return ERR_PTR(-EOPNOTSUPP);
+		}
+	}
+
+	rule =	_mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria,
+				    match_value, action, flow_tag, dest);
+
+	if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+		if (!IS_ERR_OR_NULL(rule) &&
+		    (list_empty(&rule->next_ft))) {
+			mutex_lock(&next_ft->lock);
+			list_add(&rule->next_ft, &next_ft->fwd_rules);
+			mutex_unlock(&next_ft->lock);
+			rule->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+		}
+		mutex_unlock(&root->chain_lock);
+	}
+	return rule;
+}
 EXPORT_SYMBOL(mlx5_add_flow_rule);
 
 void mlx5_del_flow_rule(struct mlx5_flow_rule *rule)
@@ -1082,6 +1239,10 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft)
 		return 0;
 
 	next_ft = find_next_chained_ft(prio);
+	err = connect_fwd_rules(dev, next_ft, ft);
+	if (err)
+		return err;
+
 	err = connect_prev_fts(dev, next_ft, prio);
 	if (err)
 		mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 574a903180ed..f37a6248a27b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -68,6 +68,11 @@ struct fs_node {
 struct mlx5_flow_rule {
 	struct fs_node				node;
 	struct mlx5_flow_destination		dest_attr;
+	/* next_ft should be accessed under chain_lock and only of
+	 * destination type is FWD_NEXT_fT.
+	 */
+	struct list_head			next_ft;
+	u32					sw_action;
 };
 
 /* Type of children is mlx5_flow_group */
@@ -82,6 +87,10 @@ struct mlx5_flow_table {
 		unsigned int		required_groups;
 		unsigned int		num_groups;
 	} autogroup;
+	/* Protect fwd_rules */
+	struct mutex			lock;
+	/* FWD rules that point on this flow table */
+	struct list_head		fwd_rules;
 };
 
 /* Type of children is mlx5_flow_rule */
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 72adf53fdcba..8dec5508d93d 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -38,6 +38,10 @@
 
 #define MLX5_FS_DEFAULT_FLOW_TAG 0x0
 
+enum {
+	MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO	= 1 << 16,
+};
+
 #define LEFTOVERS_RULE_NUM	 2
 static inline void build_leftovers_ft_param(int *priority,
 					    int *n_ent,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 51f1e540fc2b..5f70f36af3cd 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -458,7 +458,8 @@ struct mlx5_ifc_ads_bits {
 };
 
 struct mlx5_ifc_flow_table_nic_cap_bits {
-	u8         reserved_at_0[0x200];
+	u8         nic_rx_multi_path_tirs[0x1];
+	u8         reserved_at_1[0x1ff];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
 
-- 
cgit v1.2.3


From 35d1901134e97cf95c0ab6ef70f5aead6cb34e9e Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Mon, 7 Mar 2016 18:51:47 +0200
Subject: IB/mlx5: Add support for don't trap rules

Each bypass flow steering priority will be split into two priorities:
1. Priority for don't trap rules.
2. Priority for normal rules.

When user creates a flow using IB_FLOW_ATTR_FLAGS_DONT_TRAP flag, the
driver creates two flow rules, one used for receiving the traffic and
the other one for forwarding the packet to continue matching in lower
or equal priorities.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/main.c    | 54 ++++++++++++++++++++++++++++++++----
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  2 +-
 include/linux/mlx5/device.h          |  7 ++++-
 3 files changed, 55 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 03c418ccbc98..5863644f0872 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1369,11 +1369,20 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 	return 0;
 }
 
+static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
+{
+	priority *= 2;
+	if (!dont_trap)
+		priority++;
+	return priority;
+}
+
 #define MLX5_FS_MAX_TYPES	 10
 #define MLX5_FS_MAX_ENTRIES	 32000UL
 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 						struct ib_flow_attr *flow_attr)
 {
+	bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
 	struct mlx5_flow_namespace *ns = NULL;
 	struct mlx5_ib_flow_prio *prio;
 	struct mlx5_flow_table *ft;
@@ -1383,10 +1392,12 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 	int err = 0;
 
 	if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
-		if (flow_is_multicast_only(flow_attr))
+		if (flow_is_multicast_only(flow_attr) &&
+		    !dont_trap)
 			priority = MLX5_IB_FLOW_MCAST_PRIO;
 		else
-			priority = flow_attr->priority;
+			priority = ib_prio_to_core_prio(flow_attr->priority,
+							dont_trap);
 		ns = mlx5_get_flow_namespace(dev->mdev,
 					     MLX5_FLOW_NAMESPACE_BYPASS);
 		num_entries = MLX5_FS_MAX_ENTRIES;
@@ -1434,6 +1445,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 	unsigned int spec_index;
 	u32 *match_c;
 	u32 *match_v;
+	u32 action;
 	int err = 0;
 
 	if (!is_valid_attr(flow_attr))
@@ -1459,9 +1471,11 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 
 	/* Outer header support only */
 	match_criteria_enable = (!outer_header_zero(match_c)) << 0;
+	action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+		MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 	handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
 					   match_c, match_v,
-					   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					   action,
 					   MLX5_FS_DEFAULT_FLOW_TAG,
 					   dst);
 
@@ -1481,6 +1495,29 @@ free:
 	return err ? ERR_PTR(err) : handler;
 }
 
+static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
+							  struct mlx5_ib_flow_prio *ft_prio,
+							  struct ib_flow_attr *flow_attr,
+							  struct mlx5_flow_destination *dst)
+{
+	struct mlx5_ib_flow_handler *handler_dst = NULL;
+	struct mlx5_ib_flow_handler *handler = NULL;
+
+	handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
+	if (!IS_ERR(handler)) {
+		handler_dst = create_flow_rule(dev, ft_prio,
+					       flow_attr, dst);
+		if (IS_ERR(handler_dst)) {
+			mlx5_del_flow_rule(handler->rule);
+			kfree(handler);
+			handler = handler_dst;
+		} else {
+			list_add(&handler_dst->list, &handler->list);
+		}
+	}
+
+	return handler;
+}
 enum {
 	LEFTOVERS_MC,
 	LEFTOVERS_UC,
@@ -1558,7 +1595,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 
 	if (domain != IB_FLOW_DOMAIN_USER ||
 	    flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
-	    flow_attr->flags)
+	    (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
 		return ERR_PTR(-EINVAL);
 
 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
@@ -1577,8 +1614,13 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 	dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
 
 	if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
-		handler = create_flow_rule(dev, ft_prio, flow_attr,
-					   dst);
+		if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)  {
+			handler = create_dont_trap_rule(dev, ft_prio,
+							flow_attr, dst);
+		} else {
+			handler = create_flow_rule(dev, ft_prio, flow_attr,
+						   dst);
+		}
 	} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 		   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
 		handler = create_leftovers_rule(dev, ft_prio, flow_attr,
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d2b9737baa36..bd84b1fbb787 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -126,7 +126,7 @@ struct mlx5_ib_pd {
 };
 
 #define MLX5_IB_FLOW_MCAST_PRIO		(MLX5_BY_PASS_NUM_PRIOS - 1)
-#define MLX5_IB_FLOW_LAST_PRIO		(MLX5_IB_FLOW_MCAST_PRIO - 1)
+#define MLX5_IB_FLOW_LAST_PRIO		(MLX5_BY_PASS_NUM_REGULAR_PRIOS - 1)
 #if (MLX5_IB_FLOW_LAST_PRIO <= 0)
 #error "Invalid number of bypass priorities"
 #endif
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 987764afa65c..2bf222e65e6b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1294,6 +1294,11 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
 	return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
 }
 
-#define MLX5_BY_PASS_NUM_PRIOS 9
+#define MLX5_BY_PASS_NUM_REGULAR_PRIOS 8
+#define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 8
+#define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1
+#define MLX5_BY_PASS_NUM_PRIOS (MLX5_BY_PASS_NUM_REGULAR_PRIOS +\
+				MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS +\
+				MLX5_BY_PASS_NUM_MULTICAST_PRIOS)
 
 #endif /* MLX5_DEVICE_H */
-- 
cgit v1.2.3


From 764fd639d794a1c0b0d203b19d1bef0451c23fb1 Mon Sep 17 00:00:00 2001
From: "Wiebe, Wladislav (Nokia - DE/Ulm)" <wladislav.wiebe@nokia.com>
Date: Fri, 13 Nov 2015 12:10:05 +0000
Subject: pstore: Add support for 64 Bit address space

Some architectures have their reserved RAM in 64 Bit address space.
Therefore convert mem_address module parameter to ullong.

Signed-off-by: Wladislav Wiebe <wladislav.wiebe@nokia.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 fs/pstore/ram.c            | 4 ++--
 include/linux/pstore_ram.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 319c3a60cfa5..bd9812e83461 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -55,8 +55,8 @@ static ulong ramoops_pmsg_size = MIN_MEM_SIZE;
 module_param_named(pmsg_size, ramoops_pmsg_size, ulong, 0400);
 MODULE_PARM_DESC(pmsg_size, "size of user space message log");
 
-static ulong mem_address;
-module_param(mem_address, ulong, 0400);
+static unsigned long long mem_address;
+module_param(mem_address, ullong, 0400);
 MODULE_PARM_DESC(mem_address,
 		"start of reserved RAM used to store oops/panic logs");
 
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 9c9d6c154c8e..4660aaa3195e 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -76,7 +76,7 @@ ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz,
 
 struct ramoops_platform_data {
 	unsigned long	mem_size;
-	unsigned long	mem_address;
+	phys_addr_t	mem_address;
 	unsigned int	mem_type;
 	unsigned long	record_size;
 	unsigned long	console_size;
-- 
cgit v1.2.3


From 9f3e04297b08212ef43fce4f167e01f40a98d243 Mon Sep 17 00:00:00 2001
From: Archit Taneja <architt@codeaurora.org>
Date: Wed, 3 Feb 2016 14:29:49 +0530
Subject: mtd: nand: don't select chip in nand_chip's block_bad op

One of the arguments passed to struct nand_chip's block_bad op is
'getchip', which, if true, is supposed to get and select the nand device,
and later unselect and release the device.

This op is intended to be replaceable by drivers. The drivers shouldn't
be responsible for selecting/unselecting chip. Like other ops, the chip
should already be selected before the block_bad op is called.

Remove the getchip argument from the block_bad op and
nand_block_checkbad. Move the chip selection to nand_block_isbad, since it
is the only caller to nand_block_checkbad which requires chip selection.

Modify nand_block_bad (the default function for the op) such that it
doesn't select the chip.

Remove the getchip argument from the bad_block funcs in cafe_nand,
diskonchip and docg4 drivers.

Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Archit Taneja <architt@codeaurora.org>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/cafe_nand.c  |  2 +-
 drivers/mtd/nand/diskonchip.c |  2 +-
 drivers/mtd/nand/docg4.c      |  2 +-
 drivers/mtd/nand/nand_base.c  | 41 +++++++++++++++++++----------------------
 include/linux/mtd/nand.h      |  2 +-
 5 files changed, 23 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index aa1a616b9fb6..e553aff68987 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -537,7 +537,7 @@ static int cafe_nand_write_page_lowlevel(struct mtd_info *mtd,
 	return 0;
 }
 
-static int cafe_nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
+static int cafe_nand_block_bad(struct mtd_info *mtd, loff_t ofs)
 {
 	return 0;
 }
diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index f170f3c31b34..547c1002941d 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c
@@ -794,7 +794,7 @@ static int doc200x_dev_ready(struct mtd_info *mtd)
 	}
 }
 
-static int doc200x_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
+static int doc200x_block_bad(struct mtd_info *mtd, loff_t ofs)
 {
 	/* This is our last resort if we couldn't find or create a BBT.  Just
 	   pretend all blocks are good. */
diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index fb46fd7e056b..d86a60e1bbcb 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c
@@ -1120,7 +1120,7 @@ static int docg4_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	return ret;
 }
 
-static int docg4_block_neverbad(struct mtd_info *mtd, loff_t ofs, int getchip)
+static int docg4_block_neverbad(struct mtd_info *mtd, loff_t ofs)
 {
 	/* only called when module_param ignore_badblocks is set */
 	return 0;
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 191e4f75a513..d49b01df3513 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -317,9 +317,9 @@ static void nand_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len)
  *
  * Check, if the block is bad.
  */
-static int nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
+static int nand_block_bad(struct mtd_info *mtd, loff_t ofs)
 {
-	int page, chipnr, res = 0, i = 0;
+	int page, res = 0, i = 0;
 	struct nand_chip *chip = mtd_to_nand(mtd);
 	u16 bad;
 
@@ -328,15 +328,6 @@ static int nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
 
 	page = (int)(ofs >> chip->page_shift) & chip->pagemask;
 
-	if (getchip) {
-		chipnr = (int)(ofs >> chip->chip_shift);
-
-		nand_get_device(mtd, FL_READING);
-
-		/* Select the NAND device */
-		chip->select_chip(mtd, chipnr);
-	}
-
 	do {
 		if (chip->options & NAND_BUSWIDTH_16) {
 			chip->cmdfunc(mtd, NAND_CMD_READOOB,
@@ -361,11 +352,6 @@ static int nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
 		i++;
 	} while (!res && i < 2 && (chip->bbt_options & NAND_BBT_SCAN2NDPAGE));
 
-	if (getchip) {
-		chip->select_chip(mtd, -1);
-		nand_release_device(mtd);
-	}
-
 	return res;
 }
 
@@ -503,19 +489,17 @@ static int nand_block_isreserved(struct mtd_info *mtd, loff_t ofs)
  * nand_block_checkbad - [GENERIC] Check if a block is marked bad
  * @mtd: MTD device structure
  * @ofs: offset from device start
- * @getchip: 0, if the chip is already selected
  * @allowbbt: 1, if its allowed to access the bbt area
  *
  * Check, if the block is bad. Either by reading the bad block table or
  * calling of the scan function.
  */
-static int nand_block_checkbad(struct mtd_info *mtd, loff_t ofs, int getchip,
-			       int allowbbt)
+static int nand_block_checkbad(struct mtd_info *mtd, loff_t ofs, int allowbbt)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
 
 	if (!chip->bbt)
-		return chip->block_bad(mtd, ofs, getchip);
+		return chip->block_bad(mtd, ofs);
 
 	/* Return info from the table */
 	return nand_isbad_bbt(mtd, ofs, allowbbt);
@@ -2949,7 +2933,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 	while (len) {
 		/* Check if we have a bad block, we do not erase bad blocks! */
 		if (nand_block_checkbad(mtd, ((loff_t) page) <<
-					chip->page_shift, 0, allowbbt)) {
+					chip->page_shift, allowbbt)) {
 			pr_warn("%s: attempt to erase a bad block at page 0x%08x\n",
 				    __func__, page);
 			instr->state = MTD_ERASE_FAILED;
@@ -3036,7 +3020,20 @@ static void nand_sync(struct mtd_info *mtd)
  */
 static int nand_block_isbad(struct mtd_info *mtd, loff_t offs)
 {
-	return nand_block_checkbad(mtd, offs, 1, 0);
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	int chipnr = (int)(offs >> chip->chip_shift);
+	int ret;
+
+	/* Select the NAND device */
+	nand_get_device(mtd, FL_READING);
+	chip->select_chip(mtd, chipnr);
+
+	ret = nand_block_checkbad(mtd, offs, 0);
+
+	chip->select_chip(mtd, -1);
+	nand_release_device(mtd);
+
+	return ret;
 }
 
 /**
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 7604f4be3386..56574ba36555 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -672,7 +672,7 @@ struct nand_chip {
 	void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
 	void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len);
 	void (*select_chip)(struct mtd_info *mtd, int chip);
-	int (*block_bad)(struct mtd_info *mtd, loff_t ofs, int getchip);
+	int (*block_bad)(struct mtd_info *mtd, loff_t ofs);
 	int (*block_markbad)(struct mtd_info *mtd, loff_t ofs);
 	void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl);
 	int (*dev_ready)(struct mtd_info *mtd);
-- 
cgit v1.2.3


From adaf9fcd136970e480d7ca834c0cf25ce922ea74 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 10 Mar 2016 20:44:47 +0100
Subject: cpufreq: Move scheduler-related code to the sched directory

Create cpufreq.c under kernel/sched/ and move the cpufreq code
related to the scheduler to that file and to sched.h.

Redefine cpufreq_update_util() as a static inline function to avoid
function calls at its call sites in the scheduler code (as suggested
by Peter Zijlstra).

Also move the definition of struct update_util_data and declaration
of cpufreq_set_update_util_data() from include/linux/cpufreq.h to
include/linux/sched.h.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 drivers/cpufreq/cpufreq.c          | 53 --------------------------------------
 drivers/cpufreq/cpufreq_governor.c |  1 +
 include/linux/cpufreq.h            | 34 ------------------------
 include/linux/sched.h              |  9 +++++++
 kernel/sched/Makefile              |  1 +
 kernel/sched/cpufreq.c             | 37 ++++++++++++++++++++++++++
 kernel/sched/sched.h               | 49 ++++++++++++++++++++++++++++++++++-
 7 files changed, 96 insertions(+), 88 deletions(-)
 create mode 100644 kernel/sched/cpufreq.c

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 6eca12ab71d7..58e1a39b4d22 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -103,59 +103,6 @@ static struct cpufreq_driver *cpufreq_driver;
 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
 static DEFINE_RWLOCK(cpufreq_driver_lock);
 
-static DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
-
-/**
- * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer.
- * @cpu: The CPU to set the pointer for.
- * @data: New pointer value.
- *
- * Set and publish the update_util_data pointer for the given CPU.  That pointer
- * points to a struct update_util_data object containing a callback function
- * to call from cpufreq_update_util().  That function will be called from an RCU
- * read-side critical section, so it must not sleep.
- *
- * Callers must use RCU-sched callbacks to free any memory that might be
- * accessed via the old update_util_data pointer or invoke synchronize_sched()
- * right after this function to avoid use-after-free.
- */
-void cpufreq_set_update_util_data(int cpu, struct update_util_data *data)
-{
-	if (WARN_ON(data && !data->func))
-		return;
-
-	rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
-}
-EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data);
-
-/**
- * cpufreq_update_util - Take a note about CPU utilization changes.
- * @time: Current time.
- * @util: Current utilization.
- * @max: Utilization ceiling.
- *
- * This function is called by the scheduler on every invocation of
- * update_load_avg() on the CPU whose utilization is being updated.
- *
- * It can only be called from RCU-sched read-side critical sections.
- */
-void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
-{
-	struct update_util_data *data;
-
-#ifdef CONFIG_LOCKDEP
-	WARN_ON(debug_locks && !rcu_read_lock_sched_held());
-#endif
-
-	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
-	/*
-	 * If this isn't inside of an RCU-sched read-side critical section, data
-	 * may become NULL after the check below.
-	 */
-	if (data)
-		data->func(data, time, util, max);
-}
-
 /* Flag to suspend/resume CPUFreq governors */
 static bool cpufreq_suspended;
 
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index db46190bb246..1c25ef405616 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -18,6 +18,7 @@
 
 #include <linux/export.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 
 #include "cpufreq_governor.h"
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index a50c5b2e3bf2..a5ea52f793f3 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -146,36 +146,6 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy)
 extern struct kobject *cpufreq_global_kobject;
 
 #ifdef CONFIG_CPU_FREQ
-void cpufreq_update_util(u64 time, unsigned long util, unsigned long max);
-
-/**
- * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
- * @time: Current time.
- *
- * The way cpufreq is currently arranged requires it to evaluate the CPU
- * performance state (frequency/voltage) on a regular basis to prevent it from
- * being stuck in a completely inadequate performance level for too long.
- * That is not guaranteed to happen if the updates are only triggered from CFS,
- * though, because they may not be coming in if RT or deadline tasks are active
- * all the time (or there are RT and DL tasks only).
- *
- * As a workaround for that issue, this function is called by the RT and DL
- * sched classes to trigger extra cpufreq updates to prevent it from stalling,
- * but that really is a band-aid.  Going forward it should be replaced with
- * solutions targeted more specifically at RT and DL tasks.
- */
-static inline void cpufreq_trigger_update(u64 time)
-{
-	cpufreq_update_util(time, ULONG_MAX, 0);
-}
-
-struct update_util_data {
-	void (*func)(struct update_util_data *data,
-		     u64 time, unsigned long util, unsigned long max);
-};
-
-void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);
-
 unsigned int cpufreq_get(unsigned int cpu);
 unsigned int cpufreq_quick_get(unsigned int cpu);
 unsigned int cpufreq_quick_get_max(unsigned int cpu);
@@ -187,10 +157,6 @@ int cpufreq_update_policy(unsigned int cpu);
 bool have_governor_per_policy(void);
 struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
 #else
-static inline void cpufreq_update_util(u64 time, unsigned long util,
-				       unsigned long max) {}
-static inline void cpufreq_trigger_update(u64 time) {}
-
 static inline unsigned int cpufreq_get(unsigned int cpu)
 {
 	return 0;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a10494a94cc3..913e755ef7b8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3207,4 +3207,13 @@ static inline unsigned long rlimit_max(unsigned int limit)
 	return task_rlimit_max(current, limit);
 }
 
+#ifdef CONFIG_CPU_FREQ
+struct update_util_data {
+	void (*func)(struct update_util_data *data,
+		     u64 time, unsigned long util, unsigned long max);
+};
+
+void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);
+#endif /* CONFIG_CPU_FREQ */
+
 #endif
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 67687973ce80..9507522164ac 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -19,3 +19,4 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
 obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
+obj-$(CONFIG_CPU_FREQ) += cpufreq.o
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
new file mode 100644
index 000000000000..928c4ba32f68
--- /dev/null
+++ b/kernel/sched/cpufreq.c
@@ -0,0 +1,37 @@
+/*
+ * Scheduler code and data structures related to cpufreq.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "sched.h"
+
+DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
+
+/**
+ * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer.
+ * @cpu: The CPU to set the pointer for.
+ * @data: New pointer value.
+ *
+ * Set and publish the update_util_data pointer for the given CPU.  That pointer
+ * points to a struct update_util_data object containing a callback function
+ * to call from cpufreq_update_util().  That function will be called from an RCU
+ * read-side critical section, so it must not sleep.
+ *
+ * Callers must use RCU-sched callbacks to free any memory that might be
+ * accessed via the old update_util_data pointer or invoke synchronize_sched()
+ * right after this function to avoid use-after-free.
+ */
+void cpufreq_set_update_util_data(int cpu, struct update_util_data *data)
+{
+	if (WARN_ON(data && !data->func))
+		return;
+
+	rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
+}
+EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f042190c8002..faf7e2758dd0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -9,7 +9,6 @@
 #include <linux/irq_work.h>
 #include <linux/tick.h>
 #include <linux/slab.h>
-#include <linux/cpufreq.h>
 
 #include "cpupri.h"
 #include "cpudeadline.h"
@@ -1739,3 +1738,51 @@ static inline u64 irq_time_read(int cpu)
 }
 #endif /* CONFIG_64BIT */
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+
+#ifdef CONFIG_CPU_FREQ
+DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
+
+/**
+ * cpufreq_update_util - Take a note about CPU utilization changes.
+ * @time: Current time.
+ * @util: Current utilization.
+ * @max: Utilization ceiling.
+ *
+ * This function is called by the scheduler on every invocation of
+ * update_load_avg() on the CPU whose utilization is being updated.
+ *
+ * It can only be called from RCU-sched read-side critical sections.
+ */
+static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
+{
+       struct update_util_data *data;
+
+       data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
+       if (data)
+               data->func(data, time, util, max);
+}
+
+/**
+ * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
+ * @time: Current time.
+ *
+ * The way cpufreq is currently arranged requires it to evaluate the CPU
+ * performance state (frequency/voltage) on a regular basis to prevent it from
+ * being stuck in a completely inadequate performance level for too long.
+ * That is not guaranteed to happen if the updates are only triggered from CFS,
+ * though, because they may not be coming in if RT or deadline tasks are active
+ * all the time (or there are RT and DL tasks only).
+ *
+ * As a workaround for that issue, this function is called by the RT and DL
+ * sched classes to trigger extra cpufreq updates to prevent it from stalling,
+ * but that really is a band-aid.  Going forward it should be replaced with
+ * solutions targeted more specifically at RT and DL tasks.
+ */
+static inline void cpufreq_trigger_update(u64 time)
+{
+	cpufreq_update_util(time, ULONG_MAX, 0);
+}
+#else
+static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
+static inline void cpufreq_trigger_update(u64 time) {}
+#endif /* CONFIG_CPU_FREQ */
-- 
cgit v1.2.3


From 5b33f48842fa1e13e9c0ea8cc59c1d0df19042db Mon Sep 17 00:00:00 2001
From: Amir Vadai <amir@vadai.me>
Date: Tue, 8 Mar 2016 12:42:29 +0200
Subject: net/flower: Introduce hardware offload support

This patch is based on a patch made by John Fastabend.
It adds support for offloading cls_flower.
when NETIF_F_HW_TC is on:
  flags = 0       => Rule will be processed twice - by hardware, and if
                     still relevant, by software.
  flags = SKIP_HW => Rull will be processed by software only

If hardware fail/not capabale to apply the rule, operation will NOT
fail. Filter will be processed by SW only.

Acked-by: Jiri Pirko <jiri@mellanox.com>
Suggested-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Amir Vadai <amir@vadai.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h    |  2 ++
 include/net/pkt_cls.h        | 14 ++++++++++
 include/uapi/linux/pkt_cls.h |  2 ++
 net/sched/cls_flower.c       | 64 +++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 81 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fd30cb545c45..41df0b450757 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -786,6 +786,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
 enum {
 	TC_SETUP_MQPRIO,
 	TC_SETUP_CLSU32,
+	TC_SETUP_CLSFLOWER,
 };
 
 struct tc_cls_u32_offload;
@@ -795,6 +796,7 @@ struct tc_to_netdev {
 	union {
 		u8 tc;
 		struct tc_cls_u32_offload *cls_u32;
+		struct tc_cls_flower_offload *cls_flower;
 	};
 };
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index bea14eee373e..5b4e8f08b8f0 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -409,4 +409,18 @@ static inline bool tc_should_offload(struct net_device *dev, u32 flags)
 	return true;
 }
 
+enum tc_fl_command {
+	TC_CLSFLOWER_REPLACE,
+	TC_CLSFLOWER_DESTROY,
+};
+
+struct tc_cls_flower_offload {
+	enum tc_fl_command command;
+	u64 cookie;
+	struct flow_dissector *dissector;
+	struct fl_flow_key *mask;
+	struct fl_flow_key *key;
+	struct tcf_exts *exts;
+};
+
 #endif
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 9874f5680926..c43c5f78b9c4 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -417,6 +417,8 @@ enum {
 	TCA_FLOWER_KEY_TCP_DST,		/* be16 */
 	TCA_FLOWER_KEY_UDP_SRC,		/* be16 */
 	TCA_FLOWER_KEY_UDP_DST,		/* be16 */
+
+	TCA_FLOWER_FLAGS,
 	__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 95b021243233..25d87666bf1e 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -165,6 +165,51 @@ static void fl_destroy_filter(struct rcu_head *head)
 	kfree(f);
 }
 
+static void fl_hw_destroy_filter(struct tcf_proto *tp, u64 cookie)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct tc_cls_flower_offload offload = {0};
+	struct tc_to_netdev tc;
+
+	if (!tc_should_offload(dev, 0))
+		return;
+
+	offload.command = TC_CLSFLOWER_DESTROY;
+	offload.cookie = cookie;
+
+	tc.type = TC_SETUP_CLSFLOWER;
+	tc.cls_flower = &offload;
+
+	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+}
+
+static void fl_hw_replace_filter(struct tcf_proto *tp,
+				 struct flow_dissector *dissector,
+				 struct fl_flow_key *mask,
+				 struct fl_flow_key *key,
+				 struct tcf_exts *actions,
+				 u64 cookie, u32 flags)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct tc_cls_flower_offload offload = {0};
+	struct tc_to_netdev tc;
+
+	if (!tc_should_offload(dev, flags))
+		return;
+
+	offload.command = TC_CLSFLOWER_REPLACE;
+	offload.cookie = cookie;
+	offload.dissector = dissector;
+	offload.mask = mask;
+	offload.key = key;
+	offload.exts = actions;
+
+	tc.type = TC_SETUP_CLSFLOWER;
+	tc.cls_flower = &offload;
+
+	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+}
+
 static bool fl_destroy(struct tcf_proto *tp, bool force)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
@@ -174,6 +219,7 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
 		return false;
 
 	list_for_each_entry_safe(f, next, &head->filters, list) {
+		fl_hw_destroy_filter(tp, (u64)f);
 		list_del_rcu(&f->list);
 		call_rcu(&f->rcu, fl_destroy_filter);
 	}
@@ -459,6 +505,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	struct cls_fl_filter *fnew;
 	struct nlattr *tb[TCA_FLOWER_MAX + 1];
 	struct fl_flow_mask mask = {};
+	u32 flags = 0;
 	int err;
 
 	if (!tca[TCA_OPTIONS])
@@ -486,6 +533,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	}
 	fnew->handle = handle;
 
+	if (tb[TCA_FLOWER_FLAGS])
+		flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
+
 	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
 	if (err)
 		goto errout;
@@ -498,9 +548,20 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 				     head->ht_params);
 	if (err)
 		goto errout;
-	if (fold)
+
+	fl_hw_replace_filter(tp,
+			     &head->dissector,
+			     &mask.key,
+			     &fnew->key,
+			     &fnew->exts,
+			     (u64)fnew,
+			     flags);
+
+	if (fold) {
 		rhashtable_remove_fast(&head->ht, &fold->ht_node,
 				       head->ht_params);
+		fl_hw_destroy_filter(tp, (u64)fold);
+	}
 
 	*arg = (unsigned long) fnew;
 
@@ -527,6 +588,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg)
 	rhashtable_remove_fast(&head->ht, &f->ht_node,
 			       head->ht_params);
 	list_del_rcu(&f->list);
+	fl_hw_destroy_filter(tp, (u64)f);
 	tcf_unbind_filter(tp, &f->res);
 	call_rcu(&f->rcu, fl_destroy_filter);
 	return 0;
-- 
cgit v1.2.3


From 4df2bf466a9c9c92f40d27c4aa9120f4e8227bfc Mon Sep 17 00:00:00 2001
From: DingXiang <dingxiang@huawei.com>
Date: Tue, 2 Feb 2016 12:29:18 +0800
Subject: dm snapshot: disallow the COW and origin devices from being identical

Otherwise loading a "snapshot" table using the same device for the
origin and COW devices, e.g.:

echo "0 20971520 snapshot 253:3 253:3 P 8" | dmsetup create snap

will trigger:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000098
[ 1958.979934] IP: [<ffffffffa040efba>] dm_exception_store_set_chunk_size+0x7a/0x110 [dm_snapshot]
[ 1958.989655] PGD 0
[ 1958.991903] Oops: 0000 [#1] SMP
...
[ 1959.059647] CPU: 9 PID: 3556 Comm: dmsetup Tainted: G          IO    4.5.0-rc5.snitm+ #150
...
[ 1959.083517] task: ffff8800b9660c80 ti: ffff88032a954000 task.ti: ffff88032a954000
[ 1959.091865] RIP: 0010:[<ffffffffa040efba>]  [<ffffffffa040efba>] dm_exception_store_set_chunk_size+0x7a/0x110 [dm_snapshot]
[ 1959.104295] RSP: 0018:ffff88032a957b30  EFLAGS: 00010246
[ 1959.110219] RAX: 0000000000000000 RBX: 0000000000000008 RCX: 0000000000000001
[ 1959.118180] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff880329334a00
[ 1959.126141] RBP: ffff88032a957b50 R08: 0000000000000000 R09: 0000000000000001
[ 1959.134102] R10: 000000000000000a R11: f000000000000000 R12: ffff880330884d80
[ 1959.142061] R13: 0000000000000008 R14: ffffc90001c13088 R15: ffff880330884d80
[ 1959.150021] FS:  00007f8926ba3840(0000) GS:ffff880333440000(0000) knlGS:0000000000000000
[ 1959.159047] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1959.165456] CR2: 0000000000000098 CR3: 000000032f48b000 CR4: 00000000000006e0
[ 1959.173415] Stack:
[ 1959.175656]  ffffc90001c13040 ffff880329334a00 ffff880330884ed0 ffff88032a957bdc
[ 1959.183946]  ffff88032a957bb8 ffffffffa040f225 ffff880329334a30 ffff880300000000
[ 1959.192233]  ffffffffa04133e0 ffff880329334b30 0000000830884d58 00000000569c58cf
[ 1959.200521] Call Trace:
[ 1959.203248]  [<ffffffffa040f225>] dm_exception_store_create+0x1d5/0x240 [dm_snapshot]
[ 1959.211986]  [<ffffffffa040d310>] snapshot_ctr+0x140/0x630 [dm_snapshot]
[ 1959.219469]  [<ffffffffa0005c44>] ? dm_split_args+0x64/0x150 [dm_mod]
[ 1959.226656]  [<ffffffffa0005ea7>] dm_table_add_target+0x177/0x440 [dm_mod]
[ 1959.234328]  [<ffffffffa0009203>] table_load+0x143/0x370 [dm_mod]
[ 1959.241129]  [<ffffffffa00090c0>] ? retrieve_status+0x1b0/0x1b0 [dm_mod]
[ 1959.248607]  [<ffffffffa0009e35>] ctl_ioctl+0x255/0x4d0 [dm_mod]
[ 1959.255307]  [<ffffffff813304e2>] ? memzero_explicit+0x12/0x20
[ 1959.261816]  [<ffffffffa000a0c3>] dm_ctl_ioctl+0x13/0x20 [dm_mod]
[ 1959.268615]  [<ffffffff81215eb6>] do_vfs_ioctl+0xa6/0x5c0
[ 1959.274637]  [<ffffffff81120d2f>] ? __audit_syscall_entry+0xaf/0x100
[ 1959.281726]  [<ffffffff81003176>] ? do_audit_syscall_entry+0x66/0x70
[ 1959.288814]  [<ffffffff81216449>] SyS_ioctl+0x79/0x90
[ 1959.294450]  [<ffffffff8167e4ae>] entry_SYSCALL_64_fastpath+0x12/0x71
...
[ 1959.323277] RIP  [<ffffffffa040efba>] dm_exception_store_set_chunk_size+0x7a/0x110 [dm_snapshot]
[ 1959.333090]  RSP <ffff88032a957b30>
[ 1959.336978] CR2: 0000000000000098
[ 1959.344121] ---[ end trace b049991ccad1169e ]---

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1195899
Cc: stable@vger.kernel.org
Signed-off-by: Ding Xiang <dingxiang@huawei.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c          |  9 +++++++++
 drivers/md/dm-table.c         | 36 ++++++++++++++++++++++++------------
 include/linux/device-mapper.h |  2 ++
 3 files changed, 35 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 62479ac4baf7..70bb0e8b62ce 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1105,6 +1105,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	int i;
 	int r = -EINVAL;
 	char *origin_path, *cow_path;
+	dev_t origin_dev, cow_dev;
 	unsigned args_used, num_flush_bios = 1;
 	fmode_t origin_mode = FMODE_READ;
 
@@ -1135,11 +1136,19 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		ti->error = "Cannot get origin device";
 		goto bad_origin;
 	}
+	origin_dev = s->origin->bdev->bd_dev;
 
 	cow_path = argv[0];
 	argv++;
 	argc--;
 
+	cow_dev = dm_get_dev_t(cow_path);
+	if (cow_dev && cow_dev == origin_dev) {
+		ti->error = "COW device cannot be the same as origin device";
+		r = -EINVAL;
+		goto bad_cow;
+	}
+
 	r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow);
 	if (r) {
 		ti->error = "Cannot get COW device";
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 7210e5392cc4..f9e8f0bef332 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -364,6 +364,26 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
 	return 0;
 }
 
+/*
+ * Convert the path to a device
+ */
+dev_t dm_get_dev_t(const char *path)
+{
+	dev_t uninitialized_var(dev);
+	struct block_device *bdev;
+
+	bdev = lookup_bdev(path);
+	if (IS_ERR(bdev))
+		dev = name_to_dev_t(path);
+	else {
+		dev = bdev->bd_dev;
+		bdput(bdev);
+	}
+
+	return dev;
+}
+EXPORT_SYMBOL_GPL(dm_get_dev_t);
+
 /*
  * Add a device to the list, or just increment the usage count if
  * it's already present.
@@ -372,23 +392,15 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
 		  struct dm_dev **result)
 {
 	int r;
-	dev_t uninitialized_var(dev);
+	dev_t dev;
 	struct dm_dev_internal *dd;
 	struct dm_table *t = ti->table;
-	struct block_device *bdev;
 
 	BUG_ON(!t);
 
-	/* convert the path to a device */
-	bdev = lookup_bdev(path);
-	if (IS_ERR(bdev)) {
-		dev = name_to_dev_t(path);
-		if (!dev)
-			return -ENODEV;
-	} else {
-		dev = bdev->bd_dev;
-		bdput(bdev);
-	}
+	dev = dm_get_dev_t(path);
+	if (!dev)
+		return -ENODEV;
 
 	dd = find_device(&t->devices, dev);
 	if (!dd) {
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 82ae3b5e2c45..0830c9e86f0d 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -124,6 +124,8 @@ struct dm_dev {
 	char name[16];
 };
 
+dev_t dm_get_dev_t(const char *path);
+
 /*
  * Constructors should call these functions to ensure destination devices
  * are opened/closed correctly.
-- 
cgit v1.2.3


From 2b6a321da9a2d8725a1d3dbb0b2e96a7618ebe56 Mon Sep 17 00:00:00 2001
From: Andrew Duggan <aduggan@synaptics.com>
Date: Thu, 10 Mar 2016 15:35:49 -0800
Subject: Input: synaptics-rmi4 - add support for Synaptics RMI4 devices

Synaptics uses the Register Mapped Interface (RMI) protocol as a
communications interface for their devices. This driver adds the core
functionality needed to interface with RMI4 devices.

RMI devices can be connected to the host via several transport protocols
and can supports a wide variety of functionality defined by RMI functions.
Support for transport protocols and RMI functions are implemented in
individual drivers. The RMI4 core driver uses a bus architecture to
facilitate the various combinations of transport and function drivers
needed by a particular device.

Signed-off-by: Andrew Duggan <aduggan@synaptics.com>
Signed-off-by: Christopher Heiny <cheiny@synaptics.com>
Tested-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Tested-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/Kconfig           |    2 +
 drivers/input/Makefile          |    2 +
 drivers/input/rmi4/Kconfig      |   10 +
 drivers/input/rmi4/Makefile     |    2 +
 drivers/input/rmi4/rmi_bus.c    |  375 ++++++++++++++
 drivers/input/rmi4/rmi_bus.h    |  186 +++++++
 drivers/input/rmi4/rmi_driver.c | 1027 +++++++++++++++++++++++++++++++++++++++
 drivers/input/rmi4/rmi_driver.h |  103 ++++
 drivers/input/rmi4/rmi_f01.c    |  574 ++++++++++++++++++++++
 include/linux/rmi.h             |  209 ++++++++
 include/uapi/linux/input.h      |    1 +
 11 files changed, 2491 insertions(+)
 create mode 100644 drivers/input/rmi4/Kconfig
 create mode 100644 drivers/input/rmi4/Makefile
 create mode 100644 drivers/input/rmi4/rmi_bus.c
 create mode 100644 drivers/input/rmi4/rmi_bus.h
 create mode 100644 drivers/input/rmi4/rmi_driver.c
 create mode 100644 drivers/input/rmi4/rmi_driver.h
 create mode 100644 drivers/input/rmi4/rmi_f01.c
 create mode 100644 include/linux/rmi.h

(limited to 'include/linux')

diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index a35532ec00e4..6261874c07c9 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -201,6 +201,8 @@ source "drivers/input/touchscreen/Kconfig"
 
 source "drivers/input/misc/Kconfig"
 
+source "drivers/input/rmi4/Kconfig"
+
 endif
 
 menu "Hardware I/O ports"
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 0c9302ca9954..595820bbabe9 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -26,3 +26,5 @@ obj-$(CONFIG_INPUT_TOUCHSCREEN)	+= touchscreen/
 obj-$(CONFIG_INPUT_MISC)	+= misc/
 
 obj-$(CONFIG_INPUT_APMPOWER)	+= apm-power.o
+
+obj-$(CONFIG_RMI4_CORE)		+= rmi4/
diff --git a/drivers/input/rmi4/Kconfig b/drivers/input/rmi4/Kconfig
new file mode 100644
index 000000000000..5ea60e338093
--- /dev/null
+++ b/drivers/input/rmi4/Kconfig
@@ -0,0 +1,10 @@
+#
+# RMI4 configuration
+#
+config RMI4_CORE
+	tristate "Synaptics RMI4 bus support"
+	help
+	  Say Y here if you want to support the Synaptics RMI4 bus.  This is
+	  required for all RMI4 device support.
+
+	  If unsure, say Y.
diff --git a/drivers/input/rmi4/Makefile b/drivers/input/rmi4/Makefile
new file mode 100644
index 000000000000..12f219779b74
--- /dev/null
+++ b/drivers/input/rmi4/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_RMI4_CORE) += rmi_core.o
+rmi_core-y := rmi_bus.o rmi_driver.o rmi_f01.o
diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c
new file mode 100644
index 000000000000..0a2bd5a0f2b7
--- /dev/null
+++ b/drivers/input/rmi4/rmi_bus.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2011-2016 Synaptics Incorporated
+ * Copyright (c) 2011 Unixphere
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/kconfig.h>
+#include <linux/list.h>
+#include <linux/pm.h>
+#include <linux/rmi.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/of.h>
+#include "rmi_bus.h"
+#include "rmi_driver.h"
+
+static int debug_flags;
+module_param(debug_flags, int, 0644);
+MODULE_PARM_DESC(debug_flags, "control debugging information");
+
+void rmi_dbg(int flags, struct device *dev, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	if (flags & debug_flags) {
+		va_start(args, fmt);
+
+		vaf.fmt = fmt;
+		vaf.va = &args;
+
+		dev_printk(KERN_DEBUG, dev, "%pV", &vaf);
+
+		va_end(args);
+	}
+}
+EXPORT_SYMBOL_GPL(rmi_dbg);
+
+/*
+ * RMI Physical devices
+ *
+ * Physical RMI device consists of several functions serving particular
+ * purpose. For example F11 is a 2D touch sensor while F01 is a generic
+ * function present in every RMI device.
+ */
+
+static void rmi_release_device(struct device *dev)
+{
+	struct rmi_device *rmi_dev = to_rmi_device(dev);
+
+	kfree(rmi_dev);
+}
+
+static struct device_type rmi_device_type = {
+	.name		= "rmi4_sensor",
+	.release	= rmi_release_device,
+};
+
+bool rmi_is_physical_device(struct device *dev)
+{
+	return dev->type == &rmi_device_type;
+}
+
+/**
+ * rmi_register_transport_device - register a transport device connection
+ * on the RMI bus.  Transport drivers provide communication from the devices
+ * on a bus (such as SPI, I2C, and so on) to the RMI4 sensor.
+ *
+ * @xport: the transport device to register
+ */
+int rmi_register_transport_device(struct rmi_transport_dev *xport)
+{
+	static atomic_t transport_device_count = ATOMIC_INIT(0);
+	struct rmi_device *rmi_dev;
+	int error;
+
+	rmi_dev = kzalloc(sizeof(struct rmi_device), GFP_KERNEL);
+	if (!rmi_dev)
+		return -ENOMEM;
+
+	device_initialize(&rmi_dev->dev);
+
+	rmi_dev->xport = xport;
+	rmi_dev->number = atomic_inc_return(&transport_device_count) - 1;
+
+	dev_set_name(&rmi_dev->dev, "rmi4-%02d", rmi_dev->number);
+
+	rmi_dev->dev.bus = &rmi_bus_type;
+	rmi_dev->dev.type = &rmi_device_type;
+
+	xport->rmi_dev = rmi_dev;
+
+	error = device_add(&rmi_dev->dev);
+	if (error)
+		goto err_put_device;
+
+	rmi_dbg(RMI_DEBUG_CORE, xport->dev,
+		"%s: Registered %s as %s.\n", __func__,
+		dev_name(rmi_dev->xport->dev), dev_name(&rmi_dev->dev));
+
+	return 0;
+
+err_put_device:
+	put_device(&rmi_dev->dev);
+	return error;
+}
+EXPORT_SYMBOL_GPL(rmi_register_transport_device);
+
+/**
+ * rmi_unregister_transport_device - unregister a transport device connection
+ * @xport: the transport driver to unregister
+ *
+ */
+void rmi_unregister_transport_device(struct rmi_transport_dev *xport)
+{
+	struct rmi_device *rmi_dev = xport->rmi_dev;
+
+	device_del(&rmi_dev->dev);
+	put_device(&rmi_dev->dev);
+}
+EXPORT_SYMBOL(rmi_unregister_transport_device);
+
+
+/* Function specific stuff */
+
+static void rmi_release_function(struct device *dev)
+{
+	struct rmi_function *fn = to_rmi_function(dev);
+
+	kfree(fn);
+}
+
+static struct device_type rmi_function_type = {
+	.name		= "rmi4_function",
+	.release	= rmi_release_function,
+};
+
+bool rmi_is_function_device(struct device *dev)
+{
+	return dev->type == &rmi_function_type;
+}
+
+static int rmi_function_match(struct device *dev, struct device_driver *drv)
+{
+	struct rmi_function_handler *handler = to_rmi_function_handler(drv);
+	struct rmi_function *fn = to_rmi_function(dev);
+
+	return fn->fd.function_number == handler->func;
+}
+
+static int rmi_function_probe(struct device *dev)
+{
+	struct rmi_function *fn = to_rmi_function(dev);
+	struct rmi_function_handler *handler =
+					to_rmi_function_handler(dev->driver);
+	int error;
+
+	if (handler->probe) {
+		error = handler->probe(fn);
+		return error;
+	}
+
+	return 0;
+}
+
+static int rmi_function_remove(struct device *dev)
+{
+	struct rmi_function *fn = to_rmi_function(dev);
+	struct rmi_function_handler *handler =
+					to_rmi_function_handler(dev->driver);
+
+	if (handler->remove)
+		handler->remove(fn);
+
+	return 0;
+}
+
+int rmi_register_function(struct rmi_function *fn)
+{
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	int error;
+
+	device_initialize(&fn->dev);
+
+	dev_set_name(&fn->dev, "%s.fn%02x",
+		     dev_name(&rmi_dev->dev), fn->fd.function_number);
+
+	fn->dev.parent = &rmi_dev->dev;
+	fn->dev.type = &rmi_function_type;
+	fn->dev.bus = &rmi_bus_type;
+
+	error = device_add(&fn->dev);
+	if (error) {
+		dev_err(&rmi_dev->dev,
+			"Failed device_register function device %s\n",
+			dev_name(&fn->dev));
+		goto err_put_device;
+	}
+
+	rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev, "Registered F%02X.\n",
+			fn->fd.function_number);
+
+	return 0;
+
+err_put_device:
+	put_device(&fn->dev);
+	return error;
+}
+
+void rmi_unregister_function(struct rmi_function *fn)
+{
+	device_del(&fn->dev);
+
+	if (fn->dev.of_node)
+		of_node_put(fn->dev.of_node);
+
+	put_device(&fn->dev);
+}
+
+/**
+ * rmi_register_function_handler - register a handler for an RMI function
+ * @handler: RMI handler that should be registered.
+ * @module: pointer to module that implements the handler
+ * @mod_name: name of the module implementing the handler
+ *
+ * This function performs additional setup of RMI function handler and
+ * registers it with the RMI core so that it can be bound to
+ * RMI function devices.
+ */
+int __rmi_register_function_handler(struct rmi_function_handler *handler,
+				     struct module *owner,
+				     const char *mod_name)
+{
+	struct device_driver *driver = &handler->driver;
+	int error;
+
+	driver->bus = &rmi_bus_type;
+	driver->owner = owner;
+	driver->mod_name = mod_name;
+	driver->probe = rmi_function_probe;
+	driver->remove = rmi_function_remove;
+
+	error = driver_register(&handler->driver);
+	if (error) {
+		pr_err("driver_register() failed for %s, error: %d\n",
+			handler->driver.name, error);
+		return error;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__rmi_register_function_handler);
+
+/**
+ * rmi_unregister_function_handler - unregister given RMI function handler
+ * @handler: RMI handler that should be unregistered.
+ *
+ * This function unregisters given function handler from RMI core which
+ * causes it to be unbound from the function devices.
+ */
+void rmi_unregister_function_handler(struct rmi_function_handler *handler)
+{
+	driver_unregister(&handler->driver);
+}
+EXPORT_SYMBOL_GPL(rmi_unregister_function_handler);
+
+/* Bus specific stuff */
+
+static int rmi_bus_match(struct device *dev, struct device_driver *drv)
+{
+	bool physical = rmi_is_physical_device(dev);
+
+	/* First see if types are not compatible */
+	if (physical != rmi_is_physical_driver(drv))
+		return 0;
+
+	return physical || rmi_function_match(dev, drv);
+}
+
+struct bus_type rmi_bus_type = {
+	.match		= rmi_bus_match,
+	.name		= "rmi4",
+};
+
+static struct rmi_function_handler *fn_handlers[] = {
+	&rmi_f01_handler,
+};
+
+static void __rmi_unregister_function_handlers(int start_idx)
+{
+	int i;
+
+	for (i = start_idx; i >= 0; i--)
+		rmi_unregister_function_handler(fn_handlers[i]);
+}
+
+static void rmi_unregister_function_handlers(void)
+{
+	__rmi_unregister_function_handlers(ARRAY_SIZE(fn_handlers) - 1);
+}
+
+static int rmi_register_function_handlers(void)
+{
+	int ret;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(fn_handlers); i++)	{
+		ret = rmi_register_function_handler(fn_handlers[i]);
+		if (ret) {
+			pr_err("%s: error registering the RMI F%02x handler: %d\n",
+				__func__, fn_handlers[i]->func, ret);
+			goto err_unregister_function_handlers;
+		}
+	}
+
+	return 0;
+
+err_unregister_function_handlers:
+	__rmi_unregister_function_handlers(i - 1);
+	return ret;
+}
+
+static int __init rmi_bus_init(void)
+{
+	int error;
+
+	error = bus_register(&rmi_bus_type);
+	if (error) {
+		pr_err("%s: error registering the RMI bus: %d\n",
+			__func__, error);
+		return error;
+	}
+
+	error = rmi_register_function_handlers();
+	if (error)
+		goto err_unregister_bus;
+
+	error = rmi_register_physical_driver();
+	if (error) {
+		pr_err("%s: error registering the RMI physical driver: %d\n",
+			__func__, error);
+		goto err_unregister_bus;
+	}
+
+	return 0;
+
+err_unregister_bus:
+	bus_unregister(&rmi_bus_type);
+	return error;
+}
+module_init(rmi_bus_init);
+
+static void __exit rmi_bus_exit(void)
+{
+	/*
+	 * We should only ever get here if all drivers are unloaded, so
+	 * all we have to do at this point is unregister ourselves.
+	 */
+
+	rmi_unregister_physical_driver();
+	rmi_unregister_function_handlers();
+	bus_unregister(&rmi_bus_type);
+}
+module_exit(rmi_bus_exit);
+
+MODULE_AUTHOR("Christopher Heiny <cheiny@synaptics.com");
+MODULE_AUTHOR("Andrew Duggan <aduggan@synaptics.com");
+MODULE_DESCRIPTION("RMI bus");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(RMI_DRIVER_VERSION);
diff --git a/drivers/input/rmi4/rmi_bus.h b/drivers/input/rmi4/rmi_bus.h
new file mode 100644
index 000000000000..13b148d44b37
--- /dev/null
+++ b/drivers/input/rmi4/rmi_bus.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2011-2016 Synaptics Incorporated
+ * Copyright (c) 2011 Unixphere
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef _RMI_BUS_H
+#define _RMI_BUS_H
+
+#include <linux/rmi.h>
+
+struct rmi_device;
+
+/**
+ * struct rmi_function - represents the implementation of an RMI4
+ * function for a particular device (basically, a driver for that RMI4 function)
+ *
+ * @fd: The function descriptor of the RMI function
+ * @rmi_dev: Pointer to the RMI device associated with this function container
+ * @dev: The device associated with this particular function.
+ *
+ * @num_of_irqs: The number of irqs needed by this function
+ * @irq_pos: The position in the irq bitfield this function holds
+ * @irq_mask: For convenience, can be used to mask IRQ bits off during ATTN
+ * interrupt handling.
+ *
+ * @node: entry in device's list of functions
+ */
+struct rmi_function {
+	struct rmi_function_descriptor fd;
+	struct rmi_device *rmi_dev;
+	struct device dev;
+	struct list_head node;
+
+	unsigned int num_of_irqs;
+	unsigned int irq_pos;
+	unsigned long irq_mask[];
+};
+
+#define to_rmi_function(d)	container_of(d, struct rmi_function, dev)
+
+bool rmi_is_function_device(struct device *dev);
+
+int __must_check rmi_register_function(struct rmi_function *);
+void rmi_unregister_function(struct rmi_function *);
+
+/**
+ * struct rmi_function_handler - driver routines for a particular RMI function.
+ *
+ * @func: The RMI function number
+ * @reset: Called when a reset of the touch sensor is detected.  The routine
+ * should perform any out-of-the-ordinary reset handling that might be
+ * necessary.  Restoring of touch sensor configuration registers should be
+ * handled in the config() callback, below.
+ * @config: Called when the function container is first initialized, and
+ * after a reset is detected.  This routine should write any necessary
+ * configuration settings to the device.
+ * @attention: Called when the IRQ(s) for the function are set by the touch
+ * sensor.
+ * @suspend: Should perform any required operations to suspend the particular
+ * function.
+ * @resume: Should perform any required operations to resume the particular
+ * function.
+ *
+ * All callbacks are expected to return 0 on success, error code on failure.
+ */
+struct rmi_function_handler {
+	struct device_driver driver;
+
+	u8 func;
+
+	int (*probe)(struct rmi_function *fn);
+	void (*remove)(struct rmi_function *fn);
+	int (*config)(struct rmi_function *fn);
+	int (*reset)(struct rmi_function *fn);
+	int (*attention)(struct rmi_function *fn, unsigned long *irq_bits);
+	int (*suspend)(struct rmi_function *fn);
+	int (*resume)(struct rmi_function *fn);
+};
+
+#define to_rmi_function_handler(d) \
+		container_of(d, struct rmi_function_handler, driver)
+
+int __must_check __rmi_register_function_handler(struct rmi_function_handler *,
+						 struct module *, const char *);
+#define rmi_register_function_handler(handler) \
+	__rmi_register_function_handler(handler, THIS_MODULE, KBUILD_MODNAME)
+
+void rmi_unregister_function_handler(struct rmi_function_handler *);
+
+#define to_rmi_driver(d) \
+	container_of(d, struct rmi_driver, driver)
+
+#define to_rmi_device(d) container_of(d, struct rmi_device, dev)
+
+static inline struct rmi_device_platform_data *
+rmi_get_platform_data(struct rmi_device *d)
+{
+	return &d->xport->pdata;
+}
+
+bool rmi_is_physical_device(struct device *dev);
+
+/**
+ * rmi_read - read a single byte
+ * @d: Pointer to an RMI device
+ * @addr: The address to read from
+ * @buf: The read buffer
+ *
+ * Reads a single byte of data using the underlying transport protocol
+ * into memory pointed by @buf. It returns 0 on success or a negative
+ * error code.
+ */
+static inline int rmi_read(struct rmi_device *d, u16 addr, u8 *buf)
+{
+	return d->xport->ops->read_block(d->xport, addr, buf, 1);
+}
+
+/**
+ * rmi_read_block - read a block of bytes
+ * @d: Pointer to an RMI device
+ * @addr: The start address to read from
+ * @buf: The read buffer
+ * @len: Length of the read buffer
+ *
+ * Reads a block of byte data using the underlying transport protocol
+ * into memory pointed by @buf. It returns 0 on success or a negative
+ * error code.
+ */
+static inline int rmi_read_block(struct rmi_device *d, u16 addr,
+				 void *buf, size_t len)
+{
+	return d->xport->ops->read_block(d->xport, addr, buf, len);
+}
+
+/**
+ * rmi_write - write a single byte
+ * @d: Pointer to an RMI device
+ * @addr: The address to write to
+ * @data: The data to write
+ *
+ * Writes a single byte using the underlying transport protocol. It
+ * returns zero on success or a negative error code.
+ */
+static inline int rmi_write(struct rmi_device *d, u16 addr, u8 data)
+{
+	return d->xport->ops->write_block(d->xport, addr, &data, 1);
+}
+
+/**
+ * rmi_write_block - write a block of bytes
+ * @d: Pointer to an RMI device
+ * @addr: The start address to write to
+ * @buf: The write buffer
+ * @len: Length of the write buffer
+ *
+ * Writes a block of byte data from buf using the underlaying transport
+ * protocol.  It returns the amount of bytes written or a negative error code.
+ */
+static inline int rmi_write_block(struct rmi_device *d, u16 addr,
+				  const void *buf, size_t len)
+{
+	return d->xport->ops->write_block(d->xport, addr, buf, len);
+}
+
+int rmi_for_each_dev(void *data, int (*func)(struct device *dev, void *data));
+
+extern struct bus_type rmi_bus_type;
+
+int rmi_of_property_read_u32(struct device *dev, u32 *result,
+				const char *prop, bool optional);
+int rmi_of_property_read_u16(struct device *dev, u16 *result,
+				const char *prop, bool optional);
+int rmi_of_property_read_u8(struct device *dev, u8 *result,
+				const char *prop, bool optional);
+
+#define RMI_DEBUG_CORE			BIT(0)
+#define RMI_DEBUG_XPORT			BIT(1)
+#define RMI_DEBUG_FN			BIT(2)
+#define RMI_DEBUG_2D_SENSOR		BIT(3)
+
+void rmi_dbg(int flags, struct device *dev, const char *fmt, ...);
+#endif
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
new file mode 100644
index 000000000000..b0f34b57a126
--- /dev/null
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -0,0 +1,1027 @@
+/*
+ * Copyright (c) 2011-2016 Synaptics Incorporated
+ * Copyright (c) 2011 Unixphere
+ *
+ * This driver provides the core support for a single RMI4-based device.
+ *
+ * The RMI4 specification can be found here (URL split for line length):
+ *
+ * http://www.synaptics.com/sites/default/files/
+ *      511-000136-01-Rev-E-RMI4-Interfacing-Guide.pdf
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/bitmap.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/kconfig.h>
+#include <linux/pm.h>
+#include <linux/slab.h>
+#include <uapi/linux/input.h>
+#include <linux/rmi.h>
+#include "rmi_bus.h"
+#include "rmi_driver.h"
+
+#define HAS_NONSTANDARD_PDT_MASK 0x40
+#define RMI4_MAX_PAGE 0xff
+#define RMI4_PAGE_SIZE 0x100
+#define RMI4_PAGE_MASK 0xFF00
+
+#define RMI_DEVICE_RESET_CMD	0x01
+#define DEFAULT_RESET_DELAY_MS	100
+
+static void rmi_free_function_list(struct rmi_device *rmi_dev)
+{
+	struct rmi_function *fn, *tmp;
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+
+	data->f01_container = NULL;
+
+	/* Doing it in the reverse order so F01 will be removed last */
+	list_for_each_entry_safe_reverse(fn, tmp,
+					 &data->function_list, node) {
+		list_del(&fn->node);
+		rmi_unregister_function(fn);
+	}
+}
+
+static int reset_one_function(struct rmi_function *fn)
+{
+	struct rmi_function_handler *fh;
+	int retval = 0;
+
+	if (!fn || !fn->dev.driver)
+		return 0;
+
+	fh = to_rmi_function_handler(fn->dev.driver);
+	if (fh->reset) {
+		retval = fh->reset(fn);
+		if (retval < 0)
+			dev_err(&fn->dev, "Reset failed with code %d.\n",
+				retval);
+	}
+
+	return retval;
+}
+
+static int configure_one_function(struct rmi_function *fn)
+{
+	struct rmi_function_handler *fh;
+	int retval = 0;
+
+	if (!fn || !fn->dev.driver)
+		return 0;
+
+	fh = to_rmi_function_handler(fn->dev.driver);
+	if (fh->config) {
+		retval = fh->config(fn);
+		if (retval < 0)
+			dev_err(&fn->dev, "Config failed with code %d.\n",
+				retval);
+	}
+
+	return retval;
+}
+
+static int rmi_driver_process_reset_requests(struct rmi_device *rmi_dev)
+{
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	struct rmi_function *entry;
+	int retval;
+
+	list_for_each_entry(entry, &data->function_list, node) {
+		retval = reset_one_function(entry);
+		if (retval < 0)
+			return retval;
+	}
+
+	return 0;
+}
+
+static int rmi_driver_process_config_requests(struct rmi_device *rmi_dev)
+{
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	struct rmi_function *entry;
+	int retval;
+
+	list_for_each_entry(entry, &data->function_list, node) {
+		retval = configure_one_function(entry);
+		if (retval < 0)
+			return retval;
+	}
+
+	return 0;
+}
+
+static void process_one_interrupt(struct rmi_driver_data *data,
+				  struct rmi_function *fn)
+{
+	struct rmi_function_handler *fh;
+
+	if (!fn || !fn->dev.driver)
+		return;
+
+	fh = to_rmi_function_handler(fn->dev.driver);
+	if (fn->irq_mask && fh->attention) {
+		bitmap_and(data->fn_irq_bits, data->irq_status, fn->irq_mask,
+				data->irq_count);
+		if (!bitmap_empty(data->fn_irq_bits, data->irq_count))
+			fh->attention(fn, data->fn_irq_bits);
+	}
+}
+
+int rmi_process_interrupt_requests(struct rmi_device *rmi_dev)
+{
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	struct device *dev = &rmi_dev->dev;
+	struct rmi_function *entry;
+	int error;
+
+	if (!data)
+		return 0;
+
+	if (!rmi_dev->xport->attn_data) {
+		error = rmi_read_block(rmi_dev,
+				data->f01_container->fd.data_base_addr + 1,
+				data->irq_status, data->num_of_irq_regs);
+		if (error < 0) {
+			dev_err(dev, "Failed to read irqs, code=%d\n", error);
+			return error;
+		}
+	}
+
+	mutex_lock(&data->irq_mutex);
+	bitmap_and(data->irq_status, data->irq_status, data->current_irq_mask,
+	       data->irq_count);
+	/*
+	 * At this point, irq_status has all bits that are set in the
+	 * interrupt status register and are enabled.
+	 */
+	mutex_unlock(&data->irq_mutex);
+
+	/*
+	 * It would be nice to be able to use irq_chip to handle these
+	 * nested IRQs.  Unfortunately, most of the current customers for
+	 * this driver are using older kernels (3.0.x) that don't support
+	 * the features required for that.  Once they've shifted to more
+	 * recent kernels (say, 3.3 and higher), this should be switched to
+	 * use irq_chip.
+	 */
+	list_for_each_entry(entry, &data->function_list, node)
+		if (entry->irq_mask)
+			process_one_interrupt(data, entry);
+
+	if (data->input)
+		input_sync(data->input);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rmi_process_interrupt_requests);
+
+static int suspend_one_function(struct rmi_function *fn)
+{
+	struct rmi_function_handler *fh;
+	int retval = 0;
+
+	if (!fn || !fn->dev.driver)
+		return 0;
+
+	fh = to_rmi_function_handler(fn->dev.driver);
+	if (fh->suspend) {
+		retval = fh->suspend(fn);
+		if (retval < 0)
+			dev_err(&fn->dev, "Suspend failed with code %d.\n",
+				retval);
+	}
+
+	return retval;
+}
+
+static int rmi_suspend_functions(struct rmi_device *rmi_dev)
+{
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	struct rmi_function *entry;
+	int retval;
+
+	list_for_each_entry(entry, &data->function_list, node) {
+		retval = suspend_one_function(entry);
+		if (retval < 0)
+			return retval;
+	}
+
+	return 0;
+}
+
+static int resume_one_function(struct rmi_function *fn)
+{
+	struct rmi_function_handler *fh;
+	int retval = 0;
+
+	if (!fn || !fn->dev.driver)
+		return 0;
+
+	fh = to_rmi_function_handler(fn->dev.driver);
+	if (fh->resume) {
+		retval = fh->resume(fn);
+		if (retval < 0)
+			dev_err(&fn->dev, "Resume failed with code %d.\n",
+				retval);
+	}
+
+	return retval;
+}
+
+static int rmi_resume_functions(struct rmi_device *rmi_dev)
+{
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	struct rmi_function *entry;
+	int retval;
+
+	list_for_each_entry(entry, &data->function_list, node) {
+		retval = resume_one_function(entry);
+		if (retval < 0)
+			return retval;
+	}
+
+	return 0;
+}
+
+static int enable_sensor(struct rmi_device *rmi_dev)
+{
+	int retval = 0;
+
+	retval = rmi_driver_process_config_requests(rmi_dev);
+	if (retval < 0)
+		return retval;
+
+	return rmi_process_interrupt_requests(rmi_dev);
+}
+
+/**
+ * rmi_driver_set_input_params - set input device id and other data.
+ *
+ * @rmi_dev: Pointer to an RMI device
+ * @input: Pointer to input device
+ *
+ */
+static int rmi_driver_set_input_params(struct rmi_device *rmi_dev,
+				struct input_dev *input)
+{
+	input->name = SYNAPTICS_INPUT_DEVICE_NAME;
+	input->id.vendor  = SYNAPTICS_VENDOR_ID;
+	input->id.bustype = BUS_RMI;
+	return 0;
+}
+
+static void rmi_driver_set_input_name(struct rmi_device *rmi_dev,
+				struct input_dev *input)
+{
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	char *device_name = rmi_f01_get_product_ID(data->f01_container);
+	char *name;
+
+	name = devm_kasprintf(&rmi_dev->dev, GFP_KERNEL,
+			      "Synaptics %s", device_name);
+	if (!name)
+		return;
+
+	input->name = name;
+}
+
+static int rmi_driver_set_irq_bits(struct rmi_device *rmi_dev,
+				   unsigned long *mask)
+{
+	int error = 0;
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	struct device *dev = &rmi_dev->dev;
+
+	mutex_lock(&data->irq_mutex);
+	bitmap_or(data->new_irq_mask,
+		  data->current_irq_mask, mask, data->irq_count);
+
+	error = rmi_write_block(rmi_dev,
+			data->f01_container->fd.control_base_addr + 1,
+			data->new_irq_mask, data->num_of_irq_regs);
+	if (error < 0) {
+		dev_err(dev, "%s: Failed to change enabled interrupts!",
+							__func__);
+		goto error_unlock;
+	}
+	bitmap_copy(data->current_irq_mask, data->new_irq_mask,
+		    data->num_of_irq_regs);
+
+error_unlock:
+	mutex_unlock(&data->irq_mutex);
+	return error;
+}
+
+static int rmi_driver_clear_irq_bits(struct rmi_device *rmi_dev,
+				     unsigned long *mask)
+{
+	int error = 0;
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	struct device *dev = &rmi_dev->dev;
+
+	mutex_lock(&data->irq_mutex);
+	bitmap_andnot(data->new_irq_mask,
+		  data->current_irq_mask, mask, data->irq_count);
+
+	error = rmi_write_block(rmi_dev,
+			data->f01_container->fd.control_base_addr + 1,
+			data->new_irq_mask, data->num_of_irq_regs);
+	if (error < 0) {
+		dev_err(dev, "%s: Failed to change enabled interrupts!",
+							__func__);
+		goto error_unlock;
+	}
+	bitmap_copy(data->current_irq_mask, data->new_irq_mask,
+		    data->num_of_irq_regs);
+
+error_unlock:
+	mutex_unlock(&data->irq_mutex);
+	return error;
+}
+
+static int rmi_driver_reset_handler(struct rmi_device *rmi_dev)
+{
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	int error;
+
+	/*
+	 * Can get called before the driver is fully ready to deal with
+	 * this situation.
+	 */
+	if (!data || !data->f01_container) {
+		dev_warn(&rmi_dev->dev,
+			 "Not ready to handle reset yet!\n");
+		return 0;
+	}
+
+	error = rmi_read_block(rmi_dev,
+			       data->f01_container->fd.control_base_addr + 1,
+			       data->current_irq_mask, data->num_of_irq_regs);
+	if (error < 0) {
+		dev_err(&rmi_dev->dev, "%s: Failed to read current IRQ mask.\n",
+			__func__);
+		return error;
+	}
+
+	error = rmi_driver_process_reset_requests(rmi_dev);
+	if (error < 0)
+		return error;
+
+	error = rmi_driver_process_config_requests(rmi_dev);
+	if (error < 0)
+		return error;
+
+	return 0;
+}
+
+int rmi_read_pdt_entry(struct rmi_device *rmi_dev, struct pdt_entry *entry,
+			u16 pdt_address)
+{
+	u8 buf[RMI_PDT_ENTRY_SIZE];
+	int error;
+
+	error = rmi_read_block(rmi_dev, pdt_address, buf, RMI_PDT_ENTRY_SIZE);
+	if (error) {
+		dev_err(&rmi_dev->dev, "Read PDT entry at %#06x failed, code: %d.\n",
+				pdt_address, error);
+		return error;
+	}
+
+	entry->page_start = pdt_address & RMI4_PAGE_MASK;
+	entry->query_base_addr = buf[0];
+	entry->command_base_addr = buf[1];
+	entry->control_base_addr = buf[2];
+	entry->data_base_addr = buf[3];
+	entry->interrupt_source_count = buf[4] & RMI_PDT_INT_SOURCE_COUNT_MASK;
+	entry->function_version = (buf[4] & RMI_PDT_FUNCTION_VERSION_MASK) >> 5;
+	entry->function_number = buf[5];
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rmi_read_pdt_entry);
+
+static void rmi_driver_copy_pdt_to_fd(const struct pdt_entry *pdt,
+				      struct rmi_function_descriptor *fd)
+{
+	fd->query_base_addr = pdt->query_base_addr + pdt->page_start;
+	fd->command_base_addr = pdt->command_base_addr + pdt->page_start;
+	fd->control_base_addr = pdt->control_base_addr + pdt->page_start;
+	fd->data_base_addr = pdt->data_base_addr + pdt->page_start;
+	fd->function_number = pdt->function_number;
+	fd->interrupt_source_count = pdt->interrupt_source_count;
+	fd->function_version = pdt->function_version;
+}
+
+#define RMI_SCAN_CONTINUE	0
+#define RMI_SCAN_DONE		1
+
+static int rmi_scan_pdt_page(struct rmi_device *rmi_dev,
+			     int page,
+			     void *ctx,
+			     int (*callback)(struct rmi_device *rmi_dev,
+					     void *ctx,
+					     const struct pdt_entry *entry))
+{
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	struct pdt_entry pdt_entry;
+	u16 page_start = RMI4_PAGE_SIZE * page;
+	u16 pdt_start = page_start + PDT_START_SCAN_LOCATION;
+	u16 pdt_end = page_start + PDT_END_SCAN_LOCATION;
+	u16 addr;
+	int error;
+	int retval;
+
+	for (addr = pdt_start; addr >= pdt_end; addr -= RMI_PDT_ENTRY_SIZE) {
+		error = rmi_read_pdt_entry(rmi_dev, &pdt_entry, addr);
+		if (error)
+			return error;
+
+		if (RMI4_END_OF_PDT(pdt_entry.function_number))
+			break;
+
+		retval = callback(rmi_dev, ctx, &pdt_entry);
+		if (retval != RMI_SCAN_CONTINUE)
+			return retval;
+	}
+
+	return (data->f01_bootloader_mode || addr == pdt_start) ?
+					RMI_SCAN_DONE : RMI_SCAN_CONTINUE;
+}
+
+static int rmi_scan_pdt(struct rmi_device *rmi_dev, void *ctx,
+			int (*callback)(struct rmi_device *rmi_dev,
+					void *ctx,
+					const struct pdt_entry *entry))
+{
+	int page;
+	int retval = RMI_SCAN_DONE;
+
+	for (page = 0; page <= RMI4_MAX_PAGE; page++) {
+		retval = rmi_scan_pdt_page(rmi_dev, page, ctx, callback);
+		if (retval != RMI_SCAN_CONTINUE)
+			break;
+	}
+
+	return retval < 0 ? retval : 0;
+}
+
+int rmi_read_register_desc(struct rmi_device *d, u16 addr,
+				struct rmi_register_descriptor *rdesc)
+{
+	int ret;
+	u8 size_presence_reg;
+	u8 buf[35];
+	int presense_offset = 1;
+	u8 *struct_buf;
+	int reg;
+	int offset = 0;
+	int map_offset = 0;
+	int i;
+	int b;
+
+	/*
+	 * The first register of the register descriptor is the size of
+	 * the register descriptor's presense register.
+	 */
+	ret = rmi_read(d, addr, &size_presence_reg);
+	if (ret)
+		return ret;
+	++addr;
+
+	if (size_presence_reg < 0 || size_presence_reg > 35)
+		return -EIO;
+
+	memset(buf, 0, sizeof(buf));
+
+	/*
+	 * The presence register contains the size of the register structure
+	 * and a bitmap which identified which packet registers are present
+	 * for this particular register type (ie query, control, or data).
+	 */
+	ret = rmi_read_block(d, addr, buf, size_presence_reg);
+	if (ret)
+		return ret;
+	++addr;
+
+	if (buf[0] == 0) {
+		presense_offset = 3;
+		rdesc->struct_size = buf[1] | (buf[2] << 8);
+	} else {
+		rdesc->struct_size = buf[0];
+	}
+
+	for (i = presense_offset; i < size_presence_reg; i++) {
+		for (b = 0; b < 8; b++) {
+			if (buf[i] & (0x1 << b))
+				bitmap_set(rdesc->presense_map, map_offset, 1);
+			++map_offset;
+		}
+	}
+
+	rdesc->num_registers = bitmap_weight(rdesc->presense_map,
+						RMI_REG_DESC_PRESENSE_BITS);
+
+	rdesc->registers = devm_kzalloc(&d->dev, rdesc->num_registers *
+				sizeof(struct rmi_register_desc_item),
+				GFP_KERNEL);
+	if (!rdesc->registers)
+		return -ENOMEM;
+
+	/*
+	 * Allocate a temporary buffer to hold the register structure.
+	 * I'm not using devm_kzalloc here since it will not be retained
+	 * after exiting this function
+	 */
+	struct_buf = kzalloc(rdesc->struct_size, GFP_KERNEL);
+	if (!struct_buf)
+		return -ENOMEM;
+
+	/*
+	 * The register structure contains information about every packet
+	 * register of this type. This includes the size of the packet
+	 * register and a bitmap of all subpackets contained in the packet
+	 * register.
+	 */
+	ret = rmi_read_block(d, addr, struct_buf, rdesc->struct_size);
+	if (ret)
+		goto free_struct_buff;
+
+	reg = find_first_bit(rdesc->presense_map, RMI_REG_DESC_PRESENSE_BITS);
+	map_offset = 0;
+	for (i = 0; i < rdesc->num_registers; i++) {
+		struct rmi_register_desc_item *item = &rdesc->registers[i];
+		int reg_size = struct_buf[offset];
+
+		++offset;
+		if (reg_size == 0) {
+			reg_size = struct_buf[offset] |
+					(struct_buf[offset + 1] << 8);
+			offset += 2;
+		}
+
+		if (reg_size == 0) {
+			reg_size = struct_buf[offset] |
+					(struct_buf[offset + 1] << 8) |
+					(struct_buf[offset + 2] << 16) |
+					(struct_buf[offset + 3] << 24);
+			offset += 4;
+		}
+
+		item->reg = reg;
+		item->reg_size = reg_size;
+
+		do {
+			for (b = 0; b < 7; b++) {
+				if (struct_buf[offset] & (0x1 << b))
+					bitmap_set(item->subpacket_map,
+						map_offset, 1);
+				++map_offset;
+			}
+		} while (struct_buf[offset++] & 0x80);
+
+		item->num_subpackets = bitmap_weight(item->subpacket_map,
+						RMI_REG_DESC_SUBPACKET_BITS);
+
+		rmi_dbg(RMI_DEBUG_CORE, &d->dev,
+			"%s: reg: %d reg size: %ld subpackets: %d\n", __func__,
+			item->reg, item->reg_size, item->num_subpackets);
+
+		reg = find_next_bit(rdesc->presense_map,
+				RMI_REG_DESC_PRESENSE_BITS, reg + 1);
+	}
+
+free_struct_buff:
+	kfree(struct_buf);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rmi_read_register_desc);
+
+const struct rmi_register_desc_item *rmi_get_register_desc_item(
+				struct rmi_register_descriptor *rdesc, u16 reg)
+{
+	const struct rmi_register_desc_item *item;
+	int i;
+
+	for (i = 0; i < rdesc->num_registers; i++) {
+		item = &rdesc->registers[i];
+		if (item->reg == reg)
+			return item;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(rmi_get_register_desc_item);
+
+size_t rmi_register_desc_calc_size(struct rmi_register_descriptor *rdesc)
+{
+	const struct rmi_register_desc_item *item;
+	int i;
+	size_t size = 0;
+
+	for (i = 0; i < rdesc->num_registers; i++) {
+		item = &rdesc->registers[i];
+		size += item->reg_size;
+	}
+	return size;
+}
+EXPORT_SYMBOL_GPL(rmi_register_desc_calc_size);
+
+/* Compute the register offset relative to the base address */
+int rmi_register_desc_calc_reg_offset(
+		struct rmi_register_descriptor *rdesc, u16 reg)
+{
+	const struct rmi_register_desc_item *item;
+	int offset = 0;
+	int i;
+
+	for (i = 0; i < rdesc->num_registers; i++) {
+		item = &rdesc->registers[i];
+		if (item->reg == reg)
+			return offset;
+		++offset;
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(rmi_register_desc_calc_reg_offset);
+
+bool rmi_register_desc_has_subpacket(const struct rmi_register_desc_item *item,
+	u8 subpacket)
+{
+	return find_next_bit(item->subpacket_map, RMI_REG_DESC_PRESENSE_BITS,
+				subpacket) == subpacket;
+}
+
+/* Indicates that flash programming is enabled (bootloader mode). */
+#define RMI_F01_STATUS_BOOTLOADER(status)	(!!((status) & 0x40))
+
+/*
+ * Given the PDT entry for F01, read the device status register to determine
+ * if we're stuck in bootloader mode or not.
+ *
+ */
+static int rmi_check_bootloader_mode(struct rmi_device *rmi_dev,
+				     const struct pdt_entry *pdt)
+{
+	int error;
+	u8 device_status;
+
+	error = rmi_read(rmi_dev, pdt->data_base_addr + pdt->page_start,
+			 &device_status);
+	if (error) {
+		dev_err(&rmi_dev->dev,
+			"Failed to read device status: %d.\n", error);
+		return error;
+	}
+
+	return RMI_F01_STATUS_BOOTLOADER(device_status);
+}
+
+static int rmi_count_irqs(struct rmi_device *rmi_dev,
+			 void *ctx, const struct pdt_entry *pdt)
+{
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	int *irq_count = ctx;
+
+	*irq_count += pdt->interrupt_source_count;
+	if (pdt->function_number == 0x01) {
+		data->f01_bootloader_mode =
+			rmi_check_bootloader_mode(rmi_dev, pdt);
+		if (data->f01_bootloader_mode)
+			dev_warn(&rmi_dev->dev,
+				"WARNING: RMI4 device is in bootloader mode!\n");
+	}
+
+	return RMI_SCAN_CONTINUE;
+}
+
+static int rmi_initial_reset(struct rmi_device *rmi_dev,
+			     void *ctx, const struct pdt_entry *pdt)
+{
+	int error;
+
+	if (pdt->function_number == 0x01) {
+		u16 cmd_addr = pdt->page_start + pdt->command_base_addr;
+		u8 cmd_buf = RMI_DEVICE_RESET_CMD;
+		const struct rmi_device_platform_data *pdata =
+				rmi_get_platform_data(rmi_dev);
+
+		if (rmi_dev->xport->ops->reset) {
+			error = rmi_dev->xport->ops->reset(rmi_dev->xport,
+								cmd_addr);
+			if (error)
+				return error;
+
+			return RMI_SCAN_DONE;
+		}
+
+		error = rmi_write_block(rmi_dev, cmd_addr, &cmd_buf, 1);
+		if (error) {
+			dev_err(&rmi_dev->dev,
+				"Initial reset failed. Code = %d.\n", error);
+			return error;
+		}
+
+		mdelay(pdata->reset_delay_ms ?: DEFAULT_RESET_DELAY_MS);
+
+		return RMI_SCAN_DONE;
+	}
+
+	/* F01 should always be on page 0. If we don't find it there, fail. */
+	return pdt->page_start == 0 ? RMI_SCAN_CONTINUE : -ENODEV;
+}
+
+static int rmi_create_function(struct rmi_device *rmi_dev,
+			       void *ctx, const struct pdt_entry *pdt)
+{
+	struct device *dev = &rmi_dev->dev;
+	struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
+	int *current_irq_count = ctx;
+	struct rmi_function *fn;
+	int i;
+	int error;
+
+	rmi_dbg(RMI_DEBUG_CORE, dev, "Initializing F%02X.\n",
+			pdt->function_number);
+
+	fn = kzalloc(sizeof(struct rmi_function) +
+			BITS_TO_LONGS(data->irq_count) * sizeof(unsigned long),
+		     GFP_KERNEL);
+	if (!fn) {
+		dev_err(dev, "Failed to allocate memory for F%02X\n",
+			pdt->function_number);
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&fn->node);
+	rmi_driver_copy_pdt_to_fd(pdt, &fn->fd);
+
+	fn->rmi_dev = rmi_dev;
+
+	fn->num_of_irqs = pdt->interrupt_source_count;
+	fn->irq_pos = *current_irq_count;
+	*current_irq_count += fn->num_of_irqs;
+
+	for (i = 0; i < fn->num_of_irqs; i++)
+		set_bit(fn->irq_pos + i, fn->irq_mask);
+
+	error = rmi_register_function(fn);
+	if (error)
+		goto err_put_fn;
+
+	if (pdt->function_number == 0x01)
+		data->f01_container = fn;
+
+	list_add_tail(&fn->node, &data->function_list);
+
+	return RMI_SCAN_CONTINUE;
+
+err_put_fn:
+	put_device(&fn->dev);
+	return error;
+}
+
+int rmi_driver_suspend(struct rmi_device *rmi_dev)
+{
+	int retval = 0;
+
+	retval = rmi_suspend_functions(rmi_dev);
+	if (retval)
+		dev_warn(&rmi_dev->dev, "Failed to suspend functions: %d\n",
+			retval);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(rmi_driver_suspend);
+
+int rmi_driver_resume(struct rmi_device *rmi_dev)
+{
+	int retval;
+
+	retval = rmi_resume_functions(rmi_dev);
+	if (retval)
+		dev_warn(&rmi_dev->dev, "Failed to suspend functions: %d\n",
+			retval);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(rmi_driver_resume);
+
+static int rmi_driver_remove(struct device *dev)
+{
+	struct rmi_device *rmi_dev = to_rmi_device(dev);
+
+	rmi_free_function_list(rmi_dev);
+
+	return 0;
+}
+
+static int rmi_driver_probe(struct device *dev)
+{
+	struct rmi_driver *rmi_driver;
+	struct rmi_driver_data *data;
+	struct rmi_device_platform_data *pdata;
+	struct rmi_device *rmi_dev;
+	size_t size;
+	void *irq_memory;
+	int irq_count;
+	int retval;
+
+	rmi_dbg(RMI_DEBUG_CORE, dev, "%s: Starting probe.\n",
+			__func__);
+
+	if (!rmi_is_physical_device(dev)) {
+		rmi_dbg(RMI_DEBUG_CORE, dev, "Not a physical device.\n");
+		return -ENODEV;
+	}
+
+	rmi_dev = to_rmi_device(dev);
+	rmi_driver = to_rmi_driver(dev->driver);
+	rmi_dev->driver = rmi_driver;
+
+	pdata = rmi_get_platform_data(rmi_dev);
+
+	data = devm_kzalloc(dev, sizeof(struct rmi_driver_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&data->function_list);
+	data->rmi_dev = rmi_dev;
+	dev_set_drvdata(&rmi_dev->dev, data);
+
+	/*
+	 * Right before a warm boot, the sensor might be in some unusual state,
+	 * such as F54 diagnostics, or F34 bootloader mode after a firmware
+	 * or configuration update.  In order to clear the sensor to a known
+	 * state and/or apply any updates, we issue a initial reset to clear any
+	 * previous settings and force it into normal operation.
+	 *
+	 * We have to do this before actually building the PDT because
+	 * the reflash updates (if any) might cause various registers to move
+	 * around.
+	 *
+	 * For a number of reasons, this initial reset may fail to return
+	 * within the specified time, but we'll still be able to bring up the
+	 * driver normally after that failure.  This occurs most commonly in
+	 * a cold boot situation (where then firmware takes longer to come up
+	 * than from a warm boot) and the reset_delay_ms in the platform data
+	 * has been set too short to accommodate that.  Since the sensor will
+	 * eventually come up and be usable, we don't want to just fail here
+	 * and leave the customer's device unusable.  So we warn them, and
+	 * continue processing.
+	 */
+	retval = rmi_scan_pdt(rmi_dev, NULL, rmi_initial_reset);
+	if (retval < 0)
+		dev_warn(dev, "RMI initial reset failed! Continuing in spite of this.\n");
+
+	retval = rmi_read(rmi_dev, PDT_PROPERTIES_LOCATION, &data->pdt_props);
+	if (retval < 0) {
+		/*
+		 * we'll print out a warning and continue since
+		 * failure to get the PDT properties is not a cause to fail
+		 */
+		dev_warn(dev, "Could not read PDT properties from %#06x (code %d). Assuming 0x00.\n",
+			 PDT_PROPERTIES_LOCATION, retval);
+	}
+
+	/*
+	 * We need to count the IRQs and allocate their storage before scanning
+	 * the PDT and creating the function entries, because adding a new
+	 * function can trigger events that result in the IRQ related storage
+	 * being accessed.
+	 */
+	rmi_dbg(RMI_DEBUG_CORE, dev, "Counting IRQs.\n");
+	irq_count = 0;
+	retval = rmi_scan_pdt(rmi_dev, &irq_count, rmi_count_irqs);
+	if (retval < 0) {
+		dev_err(dev, "IRQ counting failed with code %d.\n", retval);
+		goto err;
+	}
+	data->irq_count = irq_count;
+	data->num_of_irq_regs = (data->irq_count + 7) / 8;
+
+	mutex_init(&data->irq_mutex);
+
+	size = BITS_TO_LONGS(data->irq_count) * sizeof(unsigned long);
+	irq_memory = devm_kzalloc(dev, size * 4, GFP_KERNEL);
+	if (!irq_memory) {
+		dev_err(dev, "Failed to allocate memory for irq masks.\n");
+		goto err;
+	}
+
+	data->irq_status	= irq_memory + size * 0;
+	data->fn_irq_bits	= irq_memory + size * 1;
+	data->current_irq_mask	= irq_memory + size * 2;
+	data->new_irq_mask	= irq_memory + size * 3;
+
+	if (rmi_dev->xport->input) {
+		/*
+		 * The transport driver already has an input device.
+		 * In some cases it is preferable to reuse the transport
+		 * devices input device instead of creating a new one here.
+		 * One example is some HID touchpads report "pass-through"
+		 * button events are not reported by rmi registers.
+		 */
+		data->input = rmi_dev->xport->input;
+	} else {
+		data->input = devm_input_allocate_device(dev);
+		if (!data->input) {
+			dev_err(dev, "%s: Failed to allocate input device.\n",
+				__func__);
+			retval = -ENOMEM;
+			goto err_destroy_functions;
+		}
+		rmi_driver_set_input_params(rmi_dev, data->input);
+		data->input->phys = devm_kasprintf(dev, GFP_KERNEL,
+						"%s/input0", dev_name(dev));
+	}
+
+	irq_count = 0;
+	rmi_dbg(RMI_DEBUG_CORE, dev, "Creating functions.");
+	retval = rmi_scan_pdt(rmi_dev, &irq_count, rmi_create_function);
+	if (retval < 0) {
+		dev_err(dev, "Function creation failed with code %d.\n",
+			retval);
+		goto err_destroy_functions;
+	}
+
+	if (!data->f01_container) {
+		dev_err(dev, "Missing F01 container!\n");
+		retval = -EINVAL;
+		goto err_destroy_functions;
+	}
+
+	retval = rmi_read_block(rmi_dev,
+				data->f01_container->fd.control_base_addr + 1,
+				data->current_irq_mask, data->num_of_irq_regs);
+	if (retval < 0) {
+		dev_err(dev, "%s: Failed to read current IRQ mask.\n",
+			__func__);
+		goto err_destroy_functions;
+	}
+
+	if (data->input) {
+		rmi_driver_set_input_name(rmi_dev, data->input);
+		if (!rmi_dev->xport->input) {
+			if (input_register_device(data->input)) {
+				dev_err(dev, "%s: Failed to register input device.\n",
+					__func__);
+				goto err_destroy_functions;
+			}
+		}
+	}
+
+	if (data->f01_container->dev.driver)
+		/* Driver already bound, so enable ATTN now. */
+		return enable_sensor(rmi_dev);
+
+	return 0;
+
+err_destroy_functions:
+	rmi_free_function_list(rmi_dev);
+err:
+	return retval < 0 ? retval : 0;
+}
+
+static struct rmi_driver rmi_physical_driver = {
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "rmi4_physical",
+		.bus	= &rmi_bus_type,
+		.probe = rmi_driver_probe,
+		.remove = rmi_driver_remove,
+	},
+	.reset_handler = rmi_driver_reset_handler,
+	.clear_irq_bits = rmi_driver_clear_irq_bits,
+	.set_irq_bits = rmi_driver_set_irq_bits,
+	.set_input_params = rmi_driver_set_input_params,
+};
+
+bool rmi_is_physical_driver(struct device_driver *drv)
+{
+	return drv == &rmi_physical_driver.driver;
+}
+
+int __init rmi_register_physical_driver(void)
+{
+	int error;
+
+	error = driver_register(&rmi_physical_driver.driver);
+	if (error) {
+		pr_err("%s: driver register failed, code=%d.\n", __func__,
+		       error);
+		return error;
+	}
+
+	return 0;
+}
+
+void __exit rmi_unregister_physical_driver(void)
+{
+	driver_unregister(&rmi_physical_driver.driver);
+}
diff --git a/drivers/input/rmi4/rmi_driver.h b/drivers/input/rmi4/rmi_driver.h
new file mode 100644
index 000000000000..3ab3f1a8078f
--- /dev/null
+++ b/drivers/input/rmi4/rmi_driver.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2011-2016 Synaptics Incorporated
+ * Copyright (c) 2011 Unixphere
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef _RMI_DRIVER_H
+#define _RMI_DRIVER_H
+
+#include <linux/ctype.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/input.h>
+#include "rmi_bus.h"
+
+#define RMI_DRIVER_VERSION "2.0"
+
+#define SYNAPTICS_INPUT_DEVICE_NAME "Synaptics RMI4 Touch Sensor"
+#define SYNAPTICS_VENDOR_ID 0x06cb
+
+#define GROUP(_attrs) { \
+	.attrs = _attrs,  \
+}
+
+#define PDT_PROPERTIES_LOCATION 0x00EF
+#define BSR_LOCATION 0x00FE
+
+#define RMI_PDT_PROPS_HAS_BSR 0x02
+
+#define NAME_BUFFER_SIZE 256
+
+#define RMI_PDT_ENTRY_SIZE 6
+#define RMI_PDT_FUNCTION_VERSION_MASK   0x60
+#define RMI_PDT_INT_SOURCE_COUNT_MASK   0x07
+
+#define PDT_START_SCAN_LOCATION 0x00e9
+#define PDT_END_SCAN_LOCATION	0x0005
+#define RMI4_END_OF_PDT(id) ((id) == 0x00 || (id) == 0xff)
+
+struct pdt_entry {
+	u16 page_start;
+	u8 query_base_addr;
+	u8 command_base_addr;
+	u8 control_base_addr;
+	u8 data_base_addr;
+	u8 interrupt_source_count;
+	u8 function_version;
+	u8 function_number;
+};
+
+int rmi_read_pdt_entry(struct rmi_device *rmi_dev, struct pdt_entry *entry,
+			u16 pdt_address);
+
+#define RMI_REG_DESC_PRESENSE_BITS	(32 * BITS_PER_BYTE)
+#define RMI_REG_DESC_SUBPACKET_BITS	(37 * BITS_PER_BYTE)
+
+/* describes a single packet register */
+struct rmi_register_desc_item {
+	u16 reg;
+	unsigned long reg_size;
+	u8 num_subpackets;
+	unsigned long subpacket_map[BITS_TO_LONGS(
+				RMI_REG_DESC_SUBPACKET_BITS)];
+};
+
+/*
+ * describes the packet registers for a particular type
+ * (ie query, control, data)
+ */
+struct rmi_register_descriptor {
+	unsigned long struct_size;
+	unsigned long presense_map[BITS_TO_LONGS(RMI_REG_DESC_PRESENSE_BITS)];
+	u8 num_registers;
+	struct rmi_register_desc_item *registers;
+};
+
+int rmi_read_register_desc(struct rmi_device *d, u16 addr,
+				struct rmi_register_descriptor *rdesc);
+const struct rmi_register_desc_item *rmi_get_register_desc_item(
+				struct rmi_register_descriptor *rdesc, u16 reg);
+
+/*
+ * Calculate the total size of all of the registers described in the
+ * descriptor.
+ */
+size_t rmi_register_desc_calc_size(struct rmi_register_descriptor *rdesc);
+int rmi_register_desc_calc_reg_offset(
+			struct rmi_register_descriptor *rdesc, u16 reg);
+bool rmi_register_desc_has_subpacket(const struct rmi_register_desc_item *item,
+			u8 subpacket);
+
+bool rmi_is_physical_driver(struct device_driver *);
+int rmi_register_physical_driver(void);
+void rmi_unregister_physical_driver(void);
+
+char *rmi_f01_get_product_ID(struct rmi_function *fn);
+
+extern struct rmi_function_handler rmi_f01_handler;
+
+#endif
diff --git a/drivers/input/rmi4/rmi_f01.c b/drivers/input/rmi4/rmi_f01.c
new file mode 100644
index 000000000000..e50ecc6699e7
--- /dev/null
+++ b/drivers/input/rmi4/rmi_f01.c
@@ -0,0 +1,574 @@
+/*
+ * Copyright (c) 2011-2016 Synaptics Incorporated
+ * Copyright (c) 2011 Unixphere
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kconfig.h>
+#include <linux/rmi.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/of.h>
+#include "rmi_driver.h"
+
+#define RMI_PRODUCT_ID_LENGTH    10
+#define RMI_PRODUCT_INFO_LENGTH   2
+
+#define RMI_DATE_CODE_LENGTH      3
+
+#define PRODUCT_ID_OFFSET 0x10
+#define PRODUCT_INFO_OFFSET 0x1E
+
+
+/* Force a firmware reset of the sensor */
+#define RMI_F01_CMD_DEVICE_RESET	1
+
+/* Various F01_RMI_QueryX bits */
+
+#define RMI_F01_QRY1_CUSTOM_MAP		BIT(0)
+#define RMI_F01_QRY1_NON_COMPLIANT	BIT(1)
+#define RMI_F01_QRY1_HAS_LTS		BIT(2)
+#define RMI_F01_QRY1_HAS_SENSOR_ID	BIT(3)
+#define RMI_F01_QRY1_HAS_CHARGER_INP	BIT(4)
+#define RMI_F01_QRY1_HAS_ADJ_DOZE	BIT(5)
+#define RMI_F01_QRY1_HAS_ADJ_DOZE_HOFF	BIT(6)
+#define RMI_F01_QRY1_HAS_QUERY42	BIT(7)
+
+#define RMI_F01_QRY5_YEAR_MASK		0x1f
+#define RMI_F01_QRY6_MONTH_MASK		0x0f
+#define RMI_F01_QRY7_DAY_MASK		0x1f
+
+#define RMI_F01_QRY2_PRODINFO_MASK	0x7f
+
+#define RMI_F01_BASIC_QUERY_LEN		21 /* From Query 00 through 20 */
+
+struct f01_basic_properties {
+	u8 manufacturer_id;
+	bool has_lts;
+	bool has_adjustable_doze;
+	bool has_adjustable_doze_holdoff;
+	char dom[11]; /* YYYY/MM/DD + '\0' */
+	u8 product_id[RMI_PRODUCT_ID_LENGTH + 1];
+	u16 productinfo;
+	u32 firmware_id;
+};
+
+/* F01 device status bits */
+
+/* Most recent device status event */
+#define RMI_F01_STATUS_CODE(status)		((status) & 0x0f)
+/* The device has lost its configuration for some reason. */
+#define RMI_F01_STATUS_UNCONFIGURED(status)	(!!((status) & 0x80))
+
+/* Control register bits */
+
+/*
+ * Sleep mode controls power management on the device and affects all
+ * functions of the device.
+ */
+#define RMI_F01_CTRL0_SLEEP_MODE_MASK	0x03
+
+#define RMI_SLEEP_MODE_NORMAL		0x00
+#define RMI_SLEEP_MODE_SENSOR_SLEEP	0x01
+#define RMI_SLEEP_MODE_RESERVED0	0x02
+#define RMI_SLEEP_MODE_RESERVED1	0x03
+
+/*
+ * This bit disables whatever sleep mode may be selected by the sleep_mode
+ * field and forces the device to run at full power without sleeping.
+ */
+#define RMI_F01_CRTL0_NOSLEEP_BIT	BIT(2)
+
+/*
+ * When this bit is set, the touch controller employs a noise-filtering
+ * algorithm designed for use with a connected battery charger.
+ */
+#define RMI_F01_CRTL0_CHARGER_BIT	BIT(5)
+
+/*
+ * Sets the report rate for the device. The effect of this setting is
+ * highly product dependent. Check the spec sheet for your particular
+ * touch sensor.
+ */
+#define RMI_F01_CRTL0_REPORTRATE_BIT	BIT(6)
+
+/*
+ * Written by the host as an indicator that the device has been
+ * successfully configured.
+ */
+#define RMI_F01_CRTL0_CONFIGURED_BIT	BIT(7)
+
+/**
+ * @ctrl0 - see the bit definitions above.
+ * @doze_interval - controls the interval between checks for finger presence
+ * when the touch sensor is in doze mode, in units of 10ms.
+ * @wakeup_threshold - controls the capacitance threshold at which the touch
+ * sensor will decide to wake up from that low power state.
+ * @doze_holdoff - controls how long the touch sensor waits after the last
+ * finger lifts before entering the doze state, in units of 100ms.
+ */
+struct f01_device_control {
+	u8 ctrl0;
+	u8 doze_interval;
+	u8 wakeup_threshold;
+	u8 doze_holdoff;
+};
+
+struct f01_data {
+	struct f01_basic_properties properties;
+	struct f01_device_control device_control;
+
+	u16 doze_interval_addr;
+	u16 wakeup_threshold_addr;
+	u16 doze_holdoff_addr;
+
+	bool suspended;
+	bool old_nosleep;
+
+	unsigned int num_of_irq_regs;
+};
+
+static int rmi_f01_read_properties(struct rmi_device *rmi_dev,
+				   u16 query_base_addr,
+				   struct f01_basic_properties *props)
+{
+	u8 queries[RMI_F01_BASIC_QUERY_LEN];
+	int ret;
+	int query_offset = query_base_addr;
+	bool has_ds4_queries = false;
+	bool has_query42 = false;
+	bool has_sensor_id = false;
+	bool has_package_id_query = false;
+	bool has_build_id_query = false;
+	u16 prod_info_addr;
+	u8 ds4_query_len;
+
+	ret = rmi_read_block(rmi_dev, query_offset,
+			       queries, RMI_F01_BASIC_QUERY_LEN);
+	if (ret) {
+		dev_err(&rmi_dev->dev,
+			"Failed to read device query registers: %d\n", ret);
+		return ret;
+	}
+
+	prod_info_addr = query_offset + 17;
+	query_offset += RMI_F01_BASIC_QUERY_LEN;
+
+	/* Now parse what we got */
+	props->manufacturer_id = queries[0];
+
+	props->has_lts = queries[1] & RMI_F01_QRY1_HAS_LTS;
+	props->has_adjustable_doze =
+			queries[1] & RMI_F01_QRY1_HAS_ADJ_DOZE;
+	props->has_adjustable_doze_holdoff =
+			queries[1] & RMI_F01_QRY1_HAS_ADJ_DOZE_HOFF;
+	has_query42 = queries[1] & RMI_F01_QRY1_HAS_QUERY42;
+	has_sensor_id = queries[1] & RMI_F01_QRY1_HAS_SENSOR_ID;
+
+	snprintf(props->dom, sizeof(props->dom), "20%02d/%02d/%02d",
+		 queries[5] & RMI_F01_QRY5_YEAR_MASK,
+		 queries[6] & RMI_F01_QRY6_MONTH_MASK,
+		 queries[7] & RMI_F01_QRY7_DAY_MASK);
+
+	memcpy(props->product_id, &queries[11],
+		RMI_PRODUCT_ID_LENGTH);
+	props->product_id[RMI_PRODUCT_ID_LENGTH] = '\0';
+
+	props->productinfo =
+			((queries[2] & RMI_F01_QRY2_PRODINFO_MASK) << 7) |
+			(queries[3] & RMI_F01_QRY2_PRODINFO_MASK);
+
+	if (has_sensor_id)
+		query_offset++;
+
+	if (has_query42) {
+		ret = rmi_read(rmi_dev, query_offset, queries);
+		if (ret) {
+			dev_err(&rmi_dev->dev,
+				"Failed to read query 42 register: %d\n", ret);
+			return ret;
+		}
+
+		has_ds4_queries = !!(queries[0] & BIT(0));
+		query_offset++;
+	}
+
+	if (has_ds4_queries) {
+		ret = rmi_read(rmi_dev, query_offset, &ds4_query_len);
+		if (ret) {
+			dev_err(&rmi_dev->dev,
+				"Failed to read DS4 queries length: %d\n", ret);
+			return ret;
+		}
+		query_offset++;
+
+		if (ds4_query_len > 0) {
+			ret = rmi_read(rmi_dev, query_offset, queries);
+			if (ret) {
+				dev_err(&rmi_dev->dev,
+					"Failed to read DS4 queries: %d\n",
+					ret);
+				return ret;
+			}
+
+			has_package_id_query = !!(queries[0] & BIT(0));
+			has_build_id_query = !!(queries[0] & BIT(1));
+		}
+
+		if (has_package_id_query)
+			prod_info_addr++;
+
+		if (has_build_id_query) {
+			ret = rmi_read_block(rmi_dev, prod_info_addr, queries,
+					    3);
+			if (ret) {
+				dev_err(&rmi_dev->dev,
+					"Failed to read product info: %d\n",
+					ret);
+				return ret;
+			}
+
+			props->firmware_id = queries[1] << 8 | queries[0];
+			props->firmware_id += queries[2] * 65536;
+		}
+	}
+
+	return 0;
+}
+
+char *rmi_f01_get_product_ID(struct rmi_function *fn)
+{
+	struct f01_data *f01 = dev_get_drvdata(&fn->dev);
+
+	return f01->properties.product_id;
+}
+
+static int rmi_f01_probe(struct rmi_function *fn)
+{
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	struct rmi_driver_data *driver_data = dev_get_drvdata(&rmi_dev->dev);
+	struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
+	struct f01_data *f01;
+	int error;
+	u16 ctrl_base_addr = fn->fd.control_base_addr;
+	u8 device_status;
+	u8 temp;
+
+	f01 = devm_kzalloc(&fn->dev, sizeof(struct f01_data), GFP_KERNEL);
+	if (!f01)
+		return -ENOMEM;
+
+	f01->num_of_irq_regs = driver_data->num_of_irq_regs;
+
+	/*
+	 * Set the configured bit and (optionally) other important stuff
+	 * in the device control register.
+	 */
+
+	error = rmi_read(rmi_dev, fn->fd.control_base_addr,
+			 &f01->device_control.ctrl0);
+	if (error) {
+		dev_err(&fn->dev, "Failed to read F01 control: %d\n", error);
+		return error;
+	}
+
+	switch (pdata->power_management.nosleep) {
+	case RMI_F01_NOSLEEP_DEFAULT:
+		break;
+	case RMI_F01_NOSLEEP_OFF:
+		f01->device_control.ctrl0 &= ~RMI_F01_CRTL0_NOSLEEP_BIT;
+		break;
+	case RMI_F01_NOSLEEP_ON:
+		f01->device_control.ctrl0 |= RMI_F01_CRTL0_NOSLEEP_BIT;
+		break;
+	}
+
+	/*
+	 * Sleep mode might be set as a hangover from a system crash or
+	 * reboot without power cycle.  If so, clear it so the sensor
+	 * is certain to function.
+	 */
+	if ((f01->device_control.ctrl0 & RMI_F01_CTRL0_SLEEP_MODE_MASK) !=
+			RMI_SLEEP_MODE_NORMAL) {
+		dev_warn(&fn->dev,
+			 "WARNING: Non-zero sleep mode found. Clearing...\n");
+		f01->device_control.ctrl0 &= ~RMI_F01_CTRL0_SLEEP_MODE_MASK;
+	}
+
+	f01->device_control.ctrl0 |= RMI_F01_CRTL0_CONFIGURED_BIT;
+
+	error = rmi_write(rmi_dev, fn->fd.control_base_addr,
+			  f01->device_control.ctrl0);
+	if (error) {
+		dev_err(&fn->dev, "Failed to write F01 control: %d\n", error);
+		return error;
+	}
+
+	/* Dummy read in order to clear irqs */
+	error = rmi_read(rmi_dev, fn->fd.data_base_addr + 1, &temp);
+	if (error < 0) {
+		dev_err(&fn->dev, "Failed to read Interrupt Status.\n");
+		return error;
+	}
+
+	error = rmi_f01_read_properties(rmi_dev, fn->fd.query_base_addr,
+					&f01->properties);
+	if (error < 0) {
+		dev_err(&fn->dev, "Failed to read F01 properties.\n");
+		return error;
+	}
+
+	dev_info(&fn->dev, "found RMI device, manufacturer: %s, product: %s, fw id: %d\n",
+		 f01->properties.manufacturer_id == 1 ? "Synaptics" : "unknown",
+		 f01->properties.product_id, f01->properties.firmware_id);
+
+	/* Advance to interrupt control registers, then skip over them. */
+	ctrl_base_addr++;
+	ctrl_base_addr += f01->num_of_irq_regs;
+
+	/* read control register */
+	if (f01->properties.has_adjustable_doze) {
+		f01->doze_interval_addr = ctrl_base_addr;
+		ctrl_base_addr++;
+
+		if (pdata->power_management.doze_interval) {
+			f01->device_control.doze_interval =
+				pdata->power_management.doze_interval;
+			error = rmi_write(rmi_dev, f01->doze_interval_addr,
+					  f01->device_control.doze_interval);
+			if (error) {
+				dev_err(&fn->dev,
+					"Failed to configure F01 doze interval register: %d\n",
+					error);
+				return error;
+			}
+		} else {
+			error = rmi_read(rmi_dev, f01->doze_interval_addr,
+					 &f01->device_control.doze_interval);
+			if (error) {
+				dev_err(&fn->dev,
+					"Failed to read F01 doze interval register: %d\n",
+					error);
+				return error;
+			}
+		}
+
+		f01->wakeup_threshold_addr = ctrl_base_addr;
+		ctrl_base_addr++;
+
+		if (pdata->power_management.wakeup_threshold) {
+			f01->device_control.wakeup_threshold =
+				pdata->power_management.wakeup_threshold;
+			error = rmi_write(rmi_dev, f01->wakeup_threshold_addr,
+					  f01->device_control.wakeup_threshold);
+			if (error) {
+				dev_err(&fn->dev,
+					"Failed to configure F01 wakeup threshold register: %d\n",
+					error);
+				return error;
+			}
+		} else {
+			error = rmi_read(rmi_dev, f01->wakeup_threshold_addr,
+					 &f01->device_control.wakeup_threshold);
+			if (error < 0) {
+				dev_err(&fn->dev,
+					"Failed to read F01 wakeup threshold register: %d\n",
+					error);
+				return error;
+			}
+		}
+	}
+
+	if (f01->properties.has_lts)
+		ctrl_base_addr++;
+
+	if (f01->properties.has_adjustable_doze_holdoff) {
+		f01->doze_holdoff_addr = ctrl_base_addr;
+		ctrl_base_addr++;
+
+		if (pdata->power_management.doze_holdoff) {
+			f01->device_control.doze_holdoff =
+				pdata->power_management.doze_holdoff;
+			error = rmi_write(rmi_dev, f01->doze_holdoff_addr,
+					  f01->device_control.doze_holdoff);
+			if (error) {
+				dev_err(&fn->dev,
+					"Failed to configure F01 doze holdoff register: %d\n",
+					error);
+				return error;
+			}
+		} else {
+			error = rmi_read(rmi_dev, f01->doze_holdoff_addr,
+					 &f01->device_control.doze_holdoff);
+			if (error) {
+				dev_err(&fn->dev,
+					"Failed to read F01 doze holdoff register: %d\n",
+					error);
+				return error;
+			}
+		}
+	}
+
+	error = rmi_read(rmi_dev, fn->fd.data_base_addr, &device_status);
+	if (error < 0) {
+		dev_err(&fn->dev,
+			"Failed to read device status: %d\n", error);
+		return error;
+	}
+
+	if (RMI_F01_STATUS_UNCONFIGURED(device_status)) {
+		dev_err(&fn->dev,
+			"Device was reset during configuration process, status: %#02x!\n",
+			RMI_F01_STATUS_CODE(device_status));
+		return -EINVAL;
+	}
+
+	dev_set_drvdata(&fn->dev, f01);
+
+	return 0;
+}
+
+static int rmi_f01_config(struct rmi_function *fn)
+{
+	struct f01_data *f01 = dev_get_drvdata(&fn->dev);
+	int error;
+
+	error = rmi_write(fn->rmi_dev, fn->fd.control_base_addr,
+			  f01->device_control.ctrl0);
+	if (error) {
+		dev_err(&fn->dev,
+			"Failed to write device_control register: %d\n", error);
+		return error;
+	}
+
+	if (f01->properties.has_adjustable_doze) {
+		error = rmi_write(fn->rmi_dev, f01->doze_interval_addr,
+				  f01->device_control.doze_interval);
+		if (error) {
+			dev_err(&fn->dev,
+				"Failed to write doze interval: %d\n", error);
+			return error;
+		}
+
+		error = rmi_write_block(fn->rmi_dev,
+					 f01->wakeup_threshold_addr,
+					 &f01->device_control.wakeup_threshold,
+					 sizeof(u8));
+		if (error) {
+			dev_err(&fn->dev,
+				"Failed to write wakeup threshold: %d\n",
+				error);
+			return error;
+		}
+	}
+
+	if (f01->properties.has_adjustable_doze_holdoff) {
+		error = rmi_write(fn->rmi_dev, f01->doze_holdoff_addr,
+				  f01->device_control.doze_holdoff);
+		if (error) {
+			dev_err(&fn->dev,
+				"Failed to write doze holdoff: %d\n", error);
+			return error;
+		}
+	}
+
+	return 0;
+}
+
+static int rmi_f01_suspend(struct rmi_function *fn)
+{
+	struct f01_data *f01 = dev_get_drvdata(&fn->dev);
+	int error;
+
+	f01->old_nosleep =
+		f01->device_control.ctrl0 & RMI_F01_CRTL0_NOSLEEP_BIT;
+	f01->device_control.ctrl0 &= ~RMI_F01_CRTL0_NOSLEEP_BIT;
+
+	f01->device_control.ctrl0 &= ~RMI_F01_CTRL0_SLEEP_MODE_MASK;
+	if (device_may_wakeup(fn->rmi_dev->xport->dev))
+		f01->device_control.ctrl0 |= RMI_SLEEP_MODE_RESERVED1;
+	else
+		f01->device_control.ctrl0 |= RMI_SLEEP_MODE_SENSOR_SLEEP;
+
+	error = rmi_write(fn->rmi_dev, fn->fd.control_base_addr,
+			  f01->device_control.ctrl0);
+	if (error) {
+		dev_err(&fn->dev, "Failed to write sleep mode: %d.\n", error);
+		if (f01->old_nosleep)
+			f01->device_control.ctrl0 |= RMI_F01_CRTL0_NOSLEEP_BIT;
+		f01->device_control.ctrl0 &= ~RMI_F01_CTRL0_SLEEP_MODE_MASK;
+		f01->device_control.ctrl0 |= RMI_SLEEP_MODE_NORMAL;
+		return error;
+	}
+
+	return 0;
+}
+
+static int rmi_f01_resume(struct rmi_function *fn)
+{
+	struct f01_data *f01 = dev_get_drvdata(&fn->dev);
+	int error;
+
+	if (f01->old_nosleep)
+		f01->device_control.ctrl0 |= RMI_F01_CRTL0_NOSLEEP_BIT;
+
+	f01->device_control.ctrl0 &= ~RMI_F01_CTRL0_SLEEP_MODE_MASK;
+	f01->device_control.ctrl0 |= RMI_SLEEP_MODE_NORMAL;
+
+	error = rmi_write(fn->rmi_dev, fn->fd.control_base_addr,
+			  f01->device_control.ctrl0);
+	if (error) {
+		dev_err(&fn->dev,
+			"Failed to restore normal operation: %d.\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int rmi_f01_attention(struct rmi_function *fn,
+			     unsigned long *irq_bits)
+{
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	int error;
+	u8 device_status;
+
+	error = rmi_read(rmi_dev, fn->fd.data_base_addr, &device_status);
+	if (error) {
+		dev_err(&fn->dev,
+			"Failed to read device status: %d.\n", error);
+		return error;
+	}
+
+	if (RMI_F01_STATUS_UNCONFIGURED(device_status)) {
+		dev_warn(&fn->dev, "Device reset detected.\n");
+		error = rmi_dev->driver->reset_handler(rmi_dev);
+		if (error) {
+			dev_err(&fn->dev, "Device reset failed: %d\n", error);
+			return error;
+		}
+	}
+
+	return 0;
+}
+
+struct rmi_function_handler rmi_f01_handler = {
+	.driver = {
+		.name	= "rmi4_f01",
+		/*
+		 * Do not allow user unbinding F01 as it is critical
+		 * function.
+		 */
+		.suppress_bind_attrs = true,
+	},
+	.func		= 0x01,
+	.probe		= rmi_f01_probe,
+	.config		= rmi_f01_config,
+	.attention	= rmi_f01_attention,
+	.suspend	= rmi_f01_suspend,
+	.resume		= rmi_f01_resume,
+};
diff --git a/include/linux/rmi.h b/include/linux/rmi.h
new file mode 100644
index 000000000000..c748fa302067
--- /dev/null
+++ b/include/linux/rmi.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2011-2016 Synaptics Incorporated
+ * Copyright (c) 2011 Unixphere
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef _RMI_H
+#define _RMI_H
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#define NAME_BUFFER_SIZE 256
+
+/**
+ * struct rmi_f01_power - override default power management settings.
+ *
+ */
+enum rmi_f01_nosleep {
+	RMI_F01_NOSLEEP_DEFAULT = 0,
+	RMI_F01_NOSLEEP_OFF = 1,
+	RMI_F01_NOSLEEP_ON = 2
+};
+
+/**
+ * struct rmi_f01_power_management -When non-zero, these values will be written
+ * to the touch sensor to override the default firmware settigns.  For a
+ * detailed explanation of what each field does, see the corresponding
+ * documention in the RMI4 specification.
+ *
+ * @nosleep - specifies whether the device is permitted to sleep or doze (that
+ * is, enter a temporary low power state) when no fingers are touching the
+ * sensor.
+ * @wakeup_threshold - controls the capacitance threshold at which the touch
+ * sensor will decide to wake up from that low power state.
+ * @doze_holdoff - controls how long the touch sensor waits after the last
+ * finger lifts before entering the doze state, in units of 100ms.
+ * @doze_interval - controls the interval between checks for finger presence
+ * when the touch sensor is in doze mode, in units of 10ms.
+ */
+struct rmi_f01_power_management {
+	enum rmi_f01_nosleep nosleep;
+	u8 wakeup_threshold;
+	u8 doze_holdoff;
+	u8 doze_interval;
+};
+
+/**
+ * struct rmi_device_platform_data - system specific configuration info.
+ *
+ * @reset_delay_ms - after issuing a reset command to the touch sensor, the
+ * driver waits a few milliseconds to give the firmware a chance to
+ * to re-initialize.  You can override the default wait period here.
+ */
+struct rmi_device_platform_data {
+	int reset_delay_ms;
+
+	/* function handler pdata */
+	struct rmi_f01_power_management power_management;
+};
+
+/**
+ * struct rmi_function_descriptor - RMI function base addresses
+ *
+ * @query_base_addr: The RMI Query base address
+ * @command_base_addr: The RMI Command base address
+ * @control_base_addr: The RMI Control base address
+ * @data_base_addr: The RMI Data base address
+ * @interrupt_source_count: The number of irqs this RMI function needs
+ * @function_number: The RMI function number
+ *
+ * This struct is used when iterating the Page Description Table. The addresses
+ * are 16-bit values to include the current page address.
+ *
+ */
+struct rmi_function_descriptor {
+	u16 query_base_addr;
+	u16 command_base_addr;
+	u16 control_base_addr;
+	u16 data_base_addr;
+	u8 interrupt_source_count;
+	u8 function_number;
+	u8 function_version;
+};
+
+struct rmi_device;
+
+/**
+ * struct rmi_transport_dev - represent an RMI transport device
+ *
+ * @dev: Pointer to the communication device, e.g. i2c or spi
+ * @rmi_dev: Pointer to the RMI device
+ * @proto_name: name of the transport protocol (SPI, i2c, etc)
+ * @ops: pointer to transport operations implementation
+ *
+ * The RMI transport device implements the glue between different communication
+ * buses such as I2C and SPI.
+ *
+ */
+struct rmi_transport_dev {
+	struct device *dev;
+	struct rmi_device *rmi_dev;
+
+	const char *proto_name;
+	const struct rmi_transport_ops *ops;
+
+	struct rmi_device_platform_data pdata;
+
+	struct input_dev *input;
+
+	void *attn_data;
+	int attn_size;
+};
+
+/**
+ * struct rmi_transport_ops - defines transport protocol operations.
+ *
+ * @write_block: Writing a block of data to the specified address
+ * @read_block: Read a block of data from the specified address.
+ */
+struct rmi_transport_ops {
+	int (*write_block)(struct rmi_transport_dev *xport, u16 addr,
+			   const void *buf, size_t len);
+	int (*read_block)(struct rmi_transport_dev *xport, u16 addr,
+			  void *buf, size_t len);
+	int (*reset)(struct rmi_transport_dev *xport, u16 reset_addr);
+};
+
+/**
+ * struct rmi_driver - driver for an RMI4 sensor on the RMI bus.
+ *
+ * @driver: Device driver model driver
+ * @reset_handler: Called when a reset is detected.
+ * @clear_irq_bits: Clear the specified bits in the current interrupt mask.
+ * @set_irq_bist: Set the specified bits in the current interrupt mask.
+ * @store_productid: Callback for cache product id from function 01
+ * @data: Private data pointer
+ *
+ */
+struct rmi_driver {
+	struct device_driver driver;
+
+	int (*reset_handler)(struct rmi_device *rmi_dev);
+	int (*clear_irq_bits)(struct rmi_device *rmi_dev, unsigned long *mask);
+	int (*set_irq_bits)(struct rmi_device *rmi_dev, unsigned long *mask);
+	int (*store_productid)(struct rmi_device *rmi_dev);
+	int (*set_input_params)(struct rmi_device *rmi_dev,
+			struct input_dev *input);
+	void *data;
+};
+
+/**
+ * struct rmi_device - represents an RMI4 sensor device on the RMI bus.
+ *
+ * @dev: The device created for the RMI bus
+ * @number: Unique number for the device on the bus.
+ * @driver: Pointer to associated driver
+ * @xport: Pointer to the transport interface
+ *
+ */
+struct rmi_device {
+	struct device dev;
+	int number;
+
+	struct rmi_driver *driver;
+	struct rmi_transport_dev *xport;
+
+};
+
+struct rmi_driver_data {
+	struct list_head function_list;
+
+	struct rmi_device *rmi_dev;
+
+	struct rmi_function *f01_container;
+	bool f01_bootloader_mode;
+
+	u32 attn_count;
+	int num_of_irq_regs;
+	int irq_count;
+	unsigned long *irq_status;
+	unsigned long *fn_irq_bits;
+	unsigned long *current_irq_mask;
+	unsigned long *new_irq_mask;
+	struct mutex irq_mutex;
+	struct input_dev *input;
+
+	u8 pdt_props;
+	u8 bsr;
+
+	bool enabled;
+
+	void *data;
+};
+
+int rmi_register_transport_device(struct rmi_transport_dev *xport);
+void rmi_unregister_transport_device(struct rmi_transport_dev *xport);
+int rmi_process_interrupt_requests(struct rmi_device *rmi_dev);
+
+int rmi_driver_suspend(struct rmi_device *rmi_dev);
+int rmi_driver_resume(struct rmi_device *rmi_dev);
+#endif
diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index 2758687300b4..01113841190d 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -246,6 +246,7 @@ struct input_mask {
 #define BUS_GSC			0x1A
 #define BUS_ATARI		0x1B
 #define BUS_SPI			0x1C
+#define BUS_RMI			0x1D
 
 /*
  * MT_TOOL types
-- 
cgit v1.2.3


From ff8f83708b3e36c050dc3fd7e2f04ea7f1752599 Mon Sep 17 00:00:00 2001
From: Andrew Duggan <aduggan@synaptics.com>
Date: Thu, 10 Mar 2016 15:47:28 -0800
Subject: Input: synaptics-rmi4 - add support for 2D sensors and F11

RMI4 currently defines two functions for reporting data for 2D sensors
(F11 and F12). This patch adds the common functionality which is shared
by devices with 2D reporting along with implementing functionality for
F11.

Signed-off-by: Andrew Duggan <aduggan@synaptics.com>
Signed-off-by: Christopher Heiny <cheiny@synaptics.com>
Tested-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Tested-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/Kconfig         |   15 +
 drivers/input/rmi4/Makefile        |    5 +
 drivers/input/rmi4/rmi_2d_sensor.c |  221 ++++++
 drivers/input/rmi4/rmi_2d_sensor.h |   84 +++
 drivers/input/rmi4/rmi_bus.c       |    3 +
 drivers/input/rmi4/rmi_driver.h    |    2 +-
 drivers/input/rmi4/rmi_f11.c       | 1312 ++++++++++++++++++++++++++++++++++++
 include/linux/rmi.h                |   83 +++
 8 files changed, 1724 insertions(+), 1 deletion(-)
 create mode 100644 drivers/input/rmi4/rmi_2d_sensor.c
 create mode 100644 drivers/input/rmi4/rmi_2d_sensor.h
 create mode 100644 drivers/input/rmi4/rmi_f11.c

(limited to 'include/linux')

diff --git a/drivers/input/rmi4/Kconfig b/drivers/input/rmi4/Kconfig
index cc3f7c508c19..dc7de6bbb8e6 100644
--- a/drivers/input/rmi4/Kconfig
+++ b/drivers/input/rmi4/Kconfig
@@ -17,3 +17,18 @@ config RMI4_I2C
 	  bus.
 
 	  If unsure, say Y.
+
+config RMI4_2D_SENSOR
+	bool
+	depends on RMI4_CORE
+
+config RMI4_F11
+	bool "RMI4 Function 11 (2D pointing)"
+	select RMI4_2D_SENSOR
+	depends on RMI4_CORE
+	help
+	  Say Y here if you want to add support for RMI4 function 11.
+
+	  Function 11 provides 2D multifinger pointing for touchscreens and
+	  touchpads. For sensors that support relative pointing, F11 also
+	  provides mouse input.
diff --git a/drivers/input/rmi4/Makefile b/drivers/input/rmi4/Makefile
index 5539494aecd1..9e7424ae1c72 100644
--- a/drivers/input/rmi4/Makefile
+++ b/drivers/input/rmi4/Makefile
@@ -1,5 +1,10 @@
 obj-$(CONFIG_RMI4_CORE) += rmi_core.o
 rmi_core-y := rmi_bus.o rmi_driver.o rmi_f01.o
 
+rmi_core-$(CONFIG_RMI4_2D_SENSOR) += rmi_2d_sensor.o
+
+# Function drivers
+rmi_core-$(CONFIG_RMI4_F11) += rmi_f11.o
+
 # Transports
 obj-$(CONFIG_RMI4_I2C) += rmi_i2c.o
diff --git a/drivers/input/rmi4/rmi_2d_sensor.c b/drivers/input/rmi4/rmi_2d_sensor.c
new file mode 100644
index 000000000000..8f48647ca5b0
--- /dev/null
+++ b/drivers/input/rmi4/rmi_2d_sensor.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2011-2016 Synaptics Incorporated
+ * Copyright (c) 2011 Unixphere
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/of.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/rmi.h>
+#include "rmi_driver.h"
+#include "rmi_2d_sensor.h"
+
+#define RMI_2D_REL_POS_MIN		-128
+#define RMI_2D_REL_POS_MAX		127
+
+/* maximum ABS_MT_POSITION displacement (in mm) */
+#define DMAX 10
+
+void rmi_2d_sensor_abs_process(struct rmi_2d_sensor *sensor,
+				struct rmi_2d_sensor_abs_object *obj,
+				int slot)
+{
+	struct rmi_2d_axis_alignment *axis_align = &sensor->axis_align;
+
+	/* we keep the previous values if the finger is released */
+	if (obj->type == RMI_2D_OBJECT_NONE)
+		return;
+
+	if (axis_align->swap_axes)
+		swap(obj->x, obj->y);
+
+	if (axis_align->flip_x)
+		obj->x = sensor->max_x - obj->x;
+
+	if (axis_align->flip_y)
+		obj->y = sensor->max_y - obj->y;
+
+	/*
+	 * Here checking if X offset or y offset are specified is
+	 * redundant. We just add the offsets or clip the values.
+	 *
+	 * Note: offsets need to be applied before clipping occurs,
+	 * or we could get funny values that are outside of
+	 * clipping boundaries.
+	 */
+	obj->x += axis_align->offset_x;
+	obj->y += axis_align->offset_y;
+
+	obj->x =  max(axis_align->clip_x_low, obj->x);
+	obj->y =  max(axis_align->clip_y_low, obj->y);
+
+	if (axis_align->clip_x_high)
+		obj->x = min(sensor->max_x, obj->x);
+
+	if (axis_align->clip_y_high)
+		obj->y =  min(sensor->max_y, obj->y);
+
+	sensor->tracking_pos[slot].x = obj->x;
+	sensor->tracking_pos[slot].y = obj->y;
+}
+EXPORT_SYMBOL_GPL(rmi_2d_sensor_abs_process);
+
+void rmi_2d_sensor_abs_report(struct rmi_2d_sensor *sensor,
+				struct rmi_2d_sensor_abs_object *obj,
+				int slot)
+{
+	struct rmi_2d_axis_alignment *axis_align = &sensor->axis_align;
+	struct input_dev *input = sensor->input;
+	int wide, major, minor;
+
+	if (sensor->kernel_tracking)
+		input_mt_slot(input, sensor->tracking_slots[slot]);
+	else
+		input_mt_slot(input, slot);
+
+	input_mt_report_slot_state(input, obj->mt_tool,
+				   obj->type != RMI_2D_OBJECT_NONE);
+
+	if (obj->type != RMI_2D_OBJECT_NONE) {
+		obj->x = sensor->tracking_pos[slot].x;
+		obj->y = sensor->tracking_pos[slot].y;
+
+		if (axis_align->swap_axes)
+			swap(obj->wx, obj->wy);
+
+		wide = (obj->wx > obj->wy);
+		major = max(obj->wx, obj->wy);
+		minor = min(obj->wx, obj->wy);
+
+		if (obj->type == RMI_2D_OBJECT_STYLUS) {
+			major = max(1, major);
+			minor = max(1, minor);
+		}
+
+		input_event(sensor->input, EV_ABS, ABS_MT_POSITION_X, obj->x);
+		input_event(sensor->input, EV_ABS, ABS_MT_POSITION_Y, obj->y);
+		input_event(sensor->input, EV_ABS, ABS_MT_ORIENTATION, wide);
+		input_event(sensor->input, EV_ABS, ABS_MT_PRESSURE, obj->z);
+		input_event(sensor->input, EV_ABS, ABS_MT_TOUCH_MAJOR, major);
+		input_event(sensor->input, EV_ABS, ABS_MT_TOUCH_MINOR, minor);
+
+		rmi_dbg(RMI_DEBUG_2D_SENSOR, &sensor->input->dev,
+			"%s: obj[%d]: type: 0x%02x X: %d Y: %d Z: %d WX: %d WY: %d\n",
+			__func__, slot, obj->type, obj->x, obj->y, obj->z,
+			obj->wx, obj->wy);
+	}
+}
+EXPORT_SYMBOL_GPL(rmi_2d_sensor_abs_report);
+
+void rmi_2d_sensor_rel_report(struct rmi_2d_sensor *sensor, int x, int y)
+{
+	struct rmi_2d_axis_alignment *axis_align = &sensor->axis_align;
+
+	x = min(RMI_2D_REL_POS_MAX, max(RMI_2D_REL_POS_MIN, (int)x));
+	y = min(RMI_2D_REL_POS_MAX, max(RMI_2D_REL_POS_MIN, (int)y));
+
+	if (axis_align->swap_axes)
+		swap(x, y);
+
+	if (axis_align->flip_x)
+		x = min(RMI_2D_REL_POS_MAX, -x);
+
+	if (axis_align->flip_y)
+		y = min(RMI_2D_REL_POS_MAX, -y);
+
+	if (x || y) {
+		input_report_rel(sensor->input, REL_X, x);
+		input_report_rel(sensor->input, REL_Y, y);
+	}
+}
+EXPORT_SYMBOL_GPL(rmi_2d_sensor_rel_report);
+
+static void rmi_2d_sensor_set_input_params(struct rmi_2d_sensor *sensor)
+{
+	struct input_dev *input = sensor->input;
+	int res_x;
+	int res_y;
+	int input_flags = 0;
+
+	if (sensor->report_abs) {
+		if (sensor->axis_align.swap_axes)
+			swap(sensor->max_x, sensor->max_y);
+
+		sensor->min_x = sensor->axis_align.clip_x_low;
+		if (sensor->axis_align.clip_x_high)
+			sensor->max_x = min(sensor->max_x,
+				sensor->axis_align.clip_x_high);
+
+		sensor->min_y = sensor->axis_align.clip_y_low;
+		if (sensor->axis_align.clip_y_high)
+			sensor->max_y = min(sensor->max_y,
+				sensor->axis_align.clip_y_high);
+
+		set_bit(EV_ABS, input->evbit);
+		input_set_abs_params(input, ABS_MT_POSITION_X, 0, sensor->max_x,
+					0, 0);
+		input_set_abs_params(input, ABS_MT_POSITION_Y, 0, sensor->max_y,
+					0, 0);
+
+		if (sensor->x_mm && sensor->y_mm) {
+			res_x = (sensor->max_x - sensor->min_x) / sensor->x_mm;
+			res_y = (sensor->max_y - sensor->min_y) / sensor->y_mm;
+
+			input_abs_set_res(input, ABS_X, res_x);
+			input_abs_set_res(input, ABS_Y, res_y);
+
+			input_abs_set_res(input, ABS_MT_POSITION_X, res_x);
+			input_abs_set_res(input, ABS_MT_POSITION_Y, res_y);
+
+			if (!sensor->dmax)
+				sensor->dmax = DMAX * res_x;
+		}
+
+		input_set_abs_params(input, ABS_MT_PRESSURE, 0,	0xff, 0, 0);
+		input_set_abs_params(input, ABS_MT_TOUCH_MAJOR,	0, 0x0f, 0, 0);
+		input_set_abs_params(input, ABS_MT_TOUCH_MINOR,	0, 0x0f, 0, 0);
+		input_set_abs_params(input, ABS_MT_ORIENTATION,	0, 1, 0, 0);
+
+		if (sensor->sensor_type == rmi_sensor_touchpad)
+			input_flags = INPUT_MT_POINTER;
+		else
+			input_flags = INPUT_MT_DIRECT;
+
+		if (sensor->kernel_tracking)
+			input_flags |= INPUT_MT_TRACK;
+
+		input_mt_init_slots(input, sensor->nbr_fingers, input_flags);
+	}
+
+	if (sensor->report_rel) {
+		set_bit(EV_REL, input->evbit);
+		set_bit(REL_X, input->relbit);
+		set_bit(REL_Y, input->relbit);
+	}
+
+	if (sensor->topbuttonpad)
+		set_bit(INPUT_PROP_TOPBUTTONPAD, input->propbit);
+}
+EXPORT_SYMBOL_GPL(rmi_2d_sensor_set_input_params);
+
+int rmi_2d_sensor_configure_input(struct rmi_function *fn,
+					struct rmi_2d_sensor *sensor)
+{
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	struct rmi_driver_data *drv_data = dev_get_drvdata(&rmi_dev->dev);
+
+	if (!drv_data->input)
+		return -ENODEV;
+
+	sensor->input = drv_data->input;
+	rmi_2d_sensor_set_input_params(sensor);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rmi_2d_sensor_configure_input);
diff --git a/drivers/input/rmi4/rmi_2d_sensor.h b/drivers/input/rmi4/rmi_2d_sensor.h
new file mode 100644
index 000000000000..6a715d392ec2
--- /dev/null
+++ b/drivers/input/rmi4/rmi_2d_sensor.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2011-2016 Synaptics Incorporated
+ * Copyright (c) 2011 Unixphere
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef _RMI_2D_SENSOR_H
+#define _RMI_2D_SENSOR_H
+
+enum rmi_2d_sensor_object_type {
+	RMI_2D_OBJECT_NONE,
+	RMI_2D_OBJECT_FINGER,
+	RMI_2D_OBJECT_STYLUS,
+	RMI_2D_OBJECT_PALM,
+	RMI_2D_OBJECT_UNCLASSIFIED,
+};
+
+struct rmi_2d_sensor_abs_object {
+	enum rmi_2d_sensor_object_type type;
+	int mt_tool;
+	u16 x;
+	u16 y;
+	u8 z;
+	u8 wx;
+	u8 wy;
+};
+
+/**
+ * @axis_align - controls parameters that are useful in system prototyping
+ * and bring up.
+ * @max_x - The maximum X coordinate that will be reported by this sensor.
+ * @max_y - The maximum Y coordinate that will be reported by this sensor.
+ * @nbr_fingers - How many fingers can this sensor report?
+ * @data_pkt - buffer for data reported by this sensor.
+ * @pkt_size - number of bytes in that buffer.
+ * @attn_size - Size of the HID attention report (only contains abs data).
+ * position when two fingers are on the device.  When this is true, we
+ * assume we have one of those sensors and report events appropriately.
+ * @sensor_type - indicates whether we're touchscreen or touchpad.
+ * @input - input device for absolute pointing stream
+ * @input_phys - buffer for the absolute phys name for this sensor.
+ */
+struct rmi_2d_sensor {
+	struct rmi_2d_axis_alignment axis_align;
+	struct input_mt_pos *tracking_pos;
+	int *tracking_slots;
+	bool kernel_tracking;
+	struct rmi_2d_sensor_abs_object *objs;
+	int dmax;
+	u16 min_x;
+	u16 max_x;
+	u16 min_y;
+	u16 max_y;
+	u8 nbr_fingers;
+	u8 *data_pkt;
+	int pkt_size;
+	int attn_size;
+	bool topbuttonpad;
+	enum rmi_sensor_type sensor_type;
+	struct input_dev *input;
+	struct rmi_function *fn;
+	char input_phys[32];
+	u8 report_abs;
+	u8 report_rel;
+	u8 x_mm;
+	u8 y_mm;
+};
+
+void rmi_2d_sensor_abs_process(struct rmi_2d_sensor *sensor,
+				struct rmi_2d_sensor_abs_object *obj,
+				int slot);
+
+void rmi_2d_sensor_abs_report(struct rmi_2d_sensor *sensor,
+				struct rmi_2d_sensor_abs_object *obj,
+				int slot);
+
+void rmi_2d_sensor_rel_report(struct rmi_2d_sensor *sensor, int x, int y);
+
+int rmi_2d_sensor_configure_input(struct rmi_function *fn,
+					struct rmi_2d_sensor *sensor);
+#endif /* _RMI_2D_SENSOR_H */
diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c
index 434477ea0c4e..2e3878343961 100644
--- a/drivers/input/rmi4/rmi_bus.c
+++ b/drivers/input/rmi4/rmi_bus.c
@@ -306,6 +306,9 @@ struct bus_type rmi_bus_type = {
 
 static struct rmi_function_handler *fn_handlers[] = {
 	&rmi_f01_handler,
+#ifdef CONFIG_RMI4_F11
+	&rmi_f11_handler,
+#endif
 };
 
 static void __rmi_unregister_function_handlers(int start_idx)
diff --git a/drivers/input/rmi4/rmi_driver.h b/drivers/input/rmi4/rmi_driver.h
index 3ab3f1a8078f..09d2aa7f11d4 100644
--- a/drivers/input/rmi4/rmi_driver.h
+++ b/drivers/input/rmi4/rmi_driver.h
@@ -99,5 +99,5 @@ void rmi_unregister_physical_driver(void);
 char *rmi_f01_get_product_ID(struct rmi_function *fn);
 
 extern struct rmi_function_handler rmi_f01_handler;
-
+extern struct rmi_function_handler rmi_f11_handler;
 #endif
diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c
new file mode 100644
index 000000000000..948a360b6261
--- /dev/null
+++ b/drivers/input/rmi4/rmi_f11.c
@@ -0,0 +1,1312 @@
+/*
+ * Copyright (c) 2011-2015 Synaptics Incorporated
+ * Copyright (c) 2011 Unixphere
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/kconfig.h>
+#include <linux/rmi.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include "rmi_driver.h"
+#include "rmi_2d_sensor.h"
+
+#define F11_MAX_NUM_OF_FINGERS		10
+#define F11_MAX_NUM_OF_TOUCH_SHAPES	16
+
+#define FINGER_STATE_MASK	0x03
+
+#define F11_CTRL_SENSOR_MAX_X_POS_OFFSET	6
+#define F11_CTRL_SENSOR_MAX_Y_POS_OFFSET	8
+
+#define DEFAULT_XY_MAX 9999
+#define DEFAULT_MAX_ABS_MT_PRESSURE 255
+#define DEFAULT_MAX_ABS_MT_TOUCH 15
+#define DEFAULT_MAX_ABS_MT_ORIENTATION 1
+#define DEFAULT_MIN_ABS_MT_TRACKING_ID 1
+#define DEFAULT_MAX_ABS_MT_TRACKING_ID 10
+
+/** A note about RMI4 F11 register structure.
+ *
+ * The properties for
+ * a given sensor are described by its query registers.  The number of query
+ * registers and the layout of their contents are described by the F11 device
+ * queries as well as the sensor query information.
+ *
+ * Similarly, each sensor has control registers that govern its behavior.  The
+ * size and layout of the control registers for a given sensor can be determined
+ * by parsing that sensors query registers.
+ *
+ * And in a likewise fashion, each sensor has data registers where it reports
+ * its touch data and other interesting stuff.  The size and layout of a
+ * sensors data registers must be determined by parsing its query registers.
+ *
+ * The short story is that we need to read and parse a lot of query
+ * registers in order to determine the attributes of a sensor. Then
+ * we need to use that data to compute the size of the control and data
+ * registers for sensor.
+ *
+ * The end result is that we have a number of structs that aren't used to
+ * directly generate the input events, but their size, location and contents
+ * are critical to determining where the data we are interested in lives.
+ *
+ * At this time, the driver does not yet comprehend all possible F11
+ * configuration options, but it should be sufficient to cover 99% of RMI4 F11
+ * devices currently in the field.
+ */
+
+/* maximum ABS_MT_POSITION displacement (in mm) */
+#define DMAX 10
+
+/**
+ * @rezero - writing this to the F11 command register will cause the sensor to
+ * calibrate to the current capacitive state.
+ */
+#define RMI_F11_REZERO  0x01
+
+#define RMI_F11_HAS_QUERY9              (1 << 3)
+#define RMI_F11_HAS_QUERY11             (1 << 4)
+#define RMI_F11_HAS_QUERY12             (1 << 5)
+#define RMI_F11_HAS_QUERY27             (1 << 6)
+#define RMI_F11_HAS_QUERY28             (1 << 7)
+
+/** Defs for Query 1 */
+
+#define RMI_F11_NR_FINGERS_MASK 0x07
+#define RMI_F11_HAS_REL                 (1 << 3)
+#define RMI_F11_HAS_ABS                 (1 << 4)
+#define RMI_F11_HAS_GESTURES            (1 << 5)
+#define RMI_F11_HAS_SENSITIVITY_ADJ     (1 << 6)
+#define RMI_F11_CONFIGURABLE            (1 << 7)
+
+/** Defs for Query 2, 3, and 4. */
+#define RMI_F11_NR_ELECTRODES_MASK      0x7F
+
+/** Defs for Query 5 */
+
+#define RMI_F11_ABS_DATA_SIZE_MASK      0x03
+#define RMI_F11_HAS_ANCHORED_FINGER     (1 << 2)
+#define RMI_F11_HAS_ADJ_HYST            (1 << 3)
+#define RMI_F11_HAS_DRIBBLE             (1 << 4)
+#define RMI_F11_HAS_BENDING_CORRECTION  (1 << 5)
+#define RMI_F11_HAS_LARGE_OBJECT_SUPPRESSION    (1 << 6)
+#define RMI_F11_HAS_JITTER_FILTER       (1 << 7)
+
+/** Defs for Query 7 */
+#define RMI_F11_HAS_SINGLE_TAP                  (1 << 0)
+#define RMI_F11_HAS_TAP_AND_HOLD                (1 << 1)
+#define RMI_F11_HAS_DOUBLE_TAP                  (1 << 2)
+#define RMI_F11_HAS_EARLY_TAP                   (1 << 3)
+#define RMI_F11_HAS_FLICK                       (1 << 4)
+#define RMI_F11_HAS_PRESS                       (1 << 5)
+#define RMI_F11_HAS_PINCH                       (1 << 6)
+#define RMI_F11_HAS_CHIRAL                      (1 << 7)
+
+/** Defs for Query 8 */
+#define RMI_F11_HAS_PALM_DET                    (1 << 0)
+#define RMI_F11_HAS_ROTATE                      (1 << 1)
+#define RMI_F11_HAS_TOUCH_SHAPES                (1 << 2)
+#define RMI_F11_HAS_SCROLL_ZONES                (1 << 3)
+#define RMI_F11_HAS_INDIVIDUAL_SCROLL_ZONES     (1 << 4)
+#define RMI_F11_HAS_MF_SCROLL                   (1 << 5)
+#define RMI_F11_HAS_MF_EDGE_MOTION              (1 << 6)
+#define RMI_F11_HAS_MF_SCROLL_INERTIA           (1 << 7)
+
+/** Defs for Query 9. */
+#define RMI_F11_HAS_PEN                         (1 << 0)
+#define RMI_F11_HAS_PROXIMITY                   (1 << 1)
+#define RMI_F11_HAS_PALM_DET_SENSITIVITY        (1 << 2)
+#define RMI_F11_HAS_SUPPRESS_ON_PALM_DETECT     (1 << 3)
+#define RMI_F11_HAS_TWO_PEN_THRESHOLDS          (1 << 4)
+#define RMI_F11_HAS_CONTACT_GEOMETRY            (1 << 5)
+#define RMI_F11_HAS_PEN_HOVER_DISCRIMINATION    (1 << 6)
+#define RMI_F11_HAS_PEN_FILTERS                 (1 << 7)
+
+/** Defs for Query 10. */
+#define RMI_F11_NR_TOUCH_SHAPES_MASK            0x1F
+
+/** Defs for Query 11 */
+
+#define RMI_F11_HAS_Z_TUNING                    (1 << 0)
+#define RMI_F11_HAS_ALGORITHM_SELECTION         (1 << 1)
+#define RMI_F11_HAS_W_TUNING                    (1 << 2)
+#define RMI_F11_HAS_PITCH_INFO                  (1 << 3)
+#define RMI_F11_HAS_FINGER_SIZE                 (1 << 4)
+#define RMI_F11_HAS_SEGMENTATION_AGGRESSIVENESS (1 << 5)
+#define RMI_F11_HAS_XY_CLIP                     (1 << 6)
+#define RMI_F11_HAS_DRUMMING_FILTER             (1 << 7)
+
+/** Defs for Query 12. */
+
+#define RMI_F11_HAS_GAPLESS_FINGER              (1 << 0)
+#define RMI_F11_HAS_GAPLESS_FINGER_TUNING       (1 << 1)
+#define RMI_F11_HAS_8BIT_W                      (1 << 2)
+#define RMI_F11_HAS_ADJUSTABLE_MAPPING          (1 << 3)
+#define RMI_F11_HAS_INFO2                       (1 << 4)
+#define RMI_F11_HAS_PHYSICAL_PROPS              (1 << 5)
+#define RMI_F11_HAS_FINGER_LIMIT                (1 << 6)
+#define RMI_F11_HAS_LINEAR_COEFF                (1 << 7)
+
+/** Defs for Query 13. */
+
+#define RMI_F11_JITTER_WINDOW_MASK              0x1F
+#define RMI_F11_JITTER_FILTER_MASK              0x60
+#define RMI_F11_JITTER_FILTER_SHIFT             5
+
+/** Defs for Query 14. */
+#define RMI_F11_LIGHT_CONTROL_MASK              0x03
+#define RMI_F11_IS_CLEAR                        (1 << 2)
+#define RMI_F11_CLICKPAD_PROPS_MASK             0x18
+#define RMI_F11_CLICKPAD_PROPS_SHIFT            3
+#define RMI_F11_MOUSE_BUTTONS_MASK              0x60
+#define RMI_F11_MOUSE_BUTTONS_SHIFT             5
+#define RMI_F11_HAS_ADVANCED_GESTURES           (1 << 7)
+
+#define RMI_F11_QUERY_SIZE                      4
+#define RMI_F11_QUERY_GESTURE_SIZE              2
+
+#define F11_LIGHT_CTL_NONE 0x00
+#define F11_LUXPAD	   0x01
+#define F11_DUAL_MODE      0x02
+
+#define F11_NOT_CLICKPAD     0x00
+#define F11_HINGED_CLICKPAD  0x01
+#define F11_UNIFORM_CLICKPAD 0x02
+
+/**
+ * Query registers 1 through 4 are always present.
+ *
+ * @nr_fingers - describes the maximum number of fingers the 2-D sensor
+ * supports.
+ * @has_rel - the sensor supports relative motion reporting.
+ * @has_abs - the sensor supports absolute poition reporting.
+ * @has_gestures - the sensor supports gesture reporting.
+ * @has_sensitivity_adjust - the sensor supports a global sensitivity
+ * adjustment.
+ * @configurable - the sensor supports various configuration options.
+ * @num_of_x_electrodes -  the maximum number of electrodes the 2-D sensor
+ * supports on the X axis.
+ * @num_of_y_electrodes -  the maximum number of electrodes the 2-D sensor
+ * supports on the Y axis.
+ * @max_electrodes - the total number of X and Y electrodes that may be
+ * configured.
+ *
+ * Query 5 is present if the has_abs bit is set.
+ *
+ * @abs_data_size - describes the format of data reported by the absolute
+ * data source.  Only one format (the kind used here) is supported at this
+ * time.
+ * @has_anchored_finger - then the sensor supports the high-precision second
+ * finger tracking provided by the manual tracking and motion sensitivity
+ * options.
+ * @has_adjust_hyst - the difference between the finger release threshold and
+ * the touch threshold.
+ * @has_dribble - the sensor supports the generation of dribble interrupts,
+ * which may be enabled or disabled with the dribble control bit.
+ * @has_bending_correction - Bending related data registers 28 and 36, and
+ * control register 52..57 are present.
+ * @has_large_object_suppression - control register 58 and data register 28
+ * exist.
+ * @has_jitter_filter - query 13 and control 73..76 exist.
+ *
+ * Gesture information queries 7 and 8 are present if has_gestures bit is set.
+ *
+ * @has_single_tap - a basic single-tap gesture is supported.
+ * @has_tap_n_hold - tap-and-hold gesture is supported.
+ * @has_double_tap - double-tap gesture is supported.
+ * @has_early_tap - early tap is supported and reported as soon as the finger
+ * lifts for any tap event that could be interpreted as either a single tap
+ * or as the first tap of a double-tap or tap-and-hold gesture.
+ * @has_flick - flick detection is supported.
+ * @has_press - press gesture reporting is supported.
+ * @has_pinch - pinch gesture detection is supported.
+ * @has_palm_det - the 2-D sensor notifies the host whenever a large conductive
+ * object such as a palm or a cheek touches the 2-D sensor.
+ * @has_rotate - rotation gesture detection is supported.
+ * @has_touch_shapes - TouchShapes are supported.  A TouchShape is a fixed
+ * rectangular area on the sensor that behaves like a capacitive button.
+ * @has_scroll_zones - scrolling areas near the sensor edges are supported.
+ * @has_individual_scroll_zones - if 1, then 4 scroll zones are supported;
+ * if 0, then only two are supported.
+ * @has_mf_scroll - the multifinger_scrolling bit will be set when
+ * more than one finger is involved in a scrolling action.
+ *
+ * Convenience for checking bytes in the gesture info registers.  This is done
+ * often enough that we put it here to declutter the conditionals
+ *
+ * @query7_nonzero - true if none of the query 7 bits are set
+ * @query8_nonzero - true if none of the query 8 bits are set
+ *
+ * Query 9 is present if the has_query9 is set.
+ *
+ * @has_pen - detection of a stylus is supported and registers F11_2D_Ctrl20
+ * and F11_2D_Ctrl21 exist.
+ * @has_proximity - detection of fingers near the sensor is supported and
+ * registers F11_2D_Ctrl22 through F11_2D_Ctrl26 exist.
+ * @has_palm_det_sensitivity -  the sensor supports the palm detect sensitivity
+ * feature and register F11_2D_Ctrl27 exists.
+ * @has_two_pen_thresholds - is has_pen is also set, then F11_2D_Ctrl35 exists.
+ * @has_contact_geometry - the sensor supports the use of contact geometry to
+ * map absolute X and Y target positions and registers F11_2D_Data18
+ * through F11_2D_Data27 exist.
+ *
+ * Touch shape info (query 10) is present if has_touch_shapes is set.
+ *
+ * @nr_touch_shapes - the total number of touch shapes supported.
+ *
+ * Query 11 is present if the has_query11 bit is set in query 0.
+ *
+ * @has_z_tuning - if set, the sensor supports Z tuning and registers
+ * F11_2D_Ctrl29 through F11_2D_Ctrl33 exist.
+ * @has_algorithm_selection - controls choice of noise suppression algorithm
+ * @has_w_tuning - the sensor supports Wx and Wy scaling and registers
+ * F11_2D_Ctrl36 through F11_2D_Ctrl39 exist.
+ * @has_pitch_info - the X and Y pitches of the sensor electrodes can be
+ * configured and registers F11_2D_Ctrl40 and F11_2D_Ctrl41 exist.
+ * @has_finger_size -  the default finger width settings for the
+ * sensor can be configured and registers F11_2D_Ctrl42 through F11_2D_Ctrl44
+ * exist.
+ * @has_segmentation_aggressiveness - the sensor’s ability to distinguish
+ * multiple objects close together can be configured and register F11_2D_Ctrl45
+ * exists.
+ * @has_XY_clip -  the inactive outside borders of the sensor can be
+ * configured and registers F11_2D_Ctrl46 through F11_2D_Ctrl49 exist.
+ * @has_drumming_filter - the sensor can be configured to distinguish
+ * between a fast flick and a quick drumming movement and registers
+ * F11_2D_Ctrl50 and F11_2D_Ctrl51 exist.
+ *
+ * Query 12 is present if hasQuery12 bit is set.
+ *
+ * @has_gapless_finger - control registers relating to gapless finger are
+ * present.
+ * @has_gapless_finger_tuning - additional control and data registers relating
+ * to gapless finger are present.
+ * @has_8bit_w - larger W value reporting is supported.
+ * @has_adjustable_mapping - TBD
+ * @has_info2 - the general info query14 is present
+ * @has_physical_props - additional queries describing the physical properties
+ * of the sensor are present.
+ * @has_finger_limit - indicates that F11 Ctrl 80 exists.
+ * @has_linear_coeff - indicates that F11 Ctrl 81 exists.
+ *
+ * Query 13 is present if Query 5's has_jitter_filter bit is set.
+ * @jitter_window_size - used by Design Studio 4.
+ * @jitter_filter_type - used by Design Studio 4.
+ *
+ * Query 14 is present if query 12's has_general_info2 flag is set.
+ *
+ * @light_control - Indicates what light/led control features are present, if
+ * any.
+ * @is_clear - if set, this is a clear sensor (indicating direct pointing
+ * application), otherwise it's opaque (indicating indirect pointing).
+ * @clickpad_props - specifies if this is a clickpad, and if so what sort of
+ * mechanism it uses
+ * @mouse_buttons - specifies the number of mouse buttons present (if any).
+ * @has_advanced_gestures - advanced driver gestures are supported.
+ */
+struct f11_2d_sensor_queries {
+	/* query1 */
+	u8 nr_fingers;
+	bool has_rel;
+	bool has_abs;
+	bool has_gestures;
+	bool has_sensitivity_adjust;
+	bool configurable;
+
+	/* query2 */
+	u8 nr_x_electrodes;
+
+	/* query3 */
+	u8 nr_y_electrodes;
+
+	/* query4 */
+	u8 max_electrodes;
+
+	/* query5 */
+	u8 abs_data_size;
+	bool has_anchored_finger;
+	bool has_adj_hyst;
+	bool has_dribble;
+	bool has_bending_correction;
+	bool has_large_object_suppression;
+	bool has_jitter_filter;
+
+	u8 f11_2d_query6;
+
+	/* query 7 */
+	bool has_single_tap;
+	bool has_tap_n_hold;
+	bool has_double_tap;
+	bool has_early_tap;
+	bool has_flick;
+	bool has_press;
+	bool has_pinch;
+	bool has_chiral;
+
+	bool query7_nonzero;
+
+	/* query 8 */
+	bool has_palm_det;
+	bool has_rotate;
+	bool has_touch_shapes;
+	bool has_scroll_zones;
+	bool has_individual_scroll_zones;
+	bool has_mf_scroll;
+	bool has_mf_edge_motion;
+	bool has_mf_scroll_inertia;
+
+	bool query8_nonzero;
+
+	/* Query 9 */
+	bool has_pen;
+	bool has_proximity;
+	bool has_palm_det_sensitivity;
+	bool has_suppress_on_palm_detect;
+	bool has_two_pen_thresholds;
+	bool has_contact_geometry;
+	bool has_pen_hover_discrimination;
+	bool has_pen_filters;
+
+	/* Query 10 */
+	u8 nr_touch_shapes;
+
+	/* Query 11. */
+	bool has_z_tuning;
+	bool has_algorithm_selection;
+	bool has_w_tuning;
+	bool has_pitch_info;
+	bool has_finger_size;
+	bool has_segmentation_aggressiveness;
+	bool has_XY_clip;
+	bool has_drumming_filter;
+
+	/* Query 12 */
+	bool has_gapless_finger;
+	bool has_gapless_finger_tuning;
+	bool has_8bit_w;
+	bool has_adjustable_mapping;
+	bool has_info2;
+	bool has_physical_props;
+	bool has_finger_limit;
+	bool has_linear_coeff_2;
+
+	/* Query 13 */
+	u8 jitter_window_size;
+	u8 jitter_filter_type;
+
+	/* Query 14 */
+	u8 light_control;
+	bool is_clear;
+	u8 clickpad_props;
+	u8 mouse_buttons;
+	bool has_advanced_gestures;
+
+	/* Query 15 - 18 */
+	u16 x_sensor_size_mm;
+	u16 y_sensor_size_mm;
+};
+
+/* Defs for Ctrl0. */
+#define RMI_F11_REPORT_MODE_MASK        0x07
+#define RMI_F11_ABS_POS_FILT            (1 << 3)
+#define RMI_F11_REL_POS_FILT            (1 << 4)
+#define RMI_F11_REL_BALLISTICS          (1 << 5)
+#define RMI_F11_DRIBBLE                 (1 << 6)
+#define RMI_F11_REPORT_BEYOND_CLIP      (1 << 7)
+
+/* Defs for Ctrl1. */
+#define RMI_F11_PALM_DETECT_THRESH_MASK 0x0F
+#define RMI_F11_MOTION_SENSITIVITY_MASK 0x30
+#define RMI_F11_MANUAL_TRACKING         (1 << 6)
+#define RMI_F11_MANUAL_TRACKED_FINGER   (1 << 7)
+
+#define RMI_F11_DELTA_X_THRESHOLD       2
+#define RMI_F11_DELTA_Y_THRESHOLD       3
+
+#define RMI_F11_CTRL_REG_COUNT          12
+
+struct f11_2d_ctrl {
+	u8              ctrl0_11[RMI_F11_CTRL_REG_COUNT];
+	u16             ctrl0_11_address;
+};
+
+#define RMI_F11_ABS_BYTES 5
+#define RMI_F11_REL_BYTES 2
+
+/* Defs for Data 8 */
+
+#define RMI_F11_SINGLE_TAP              (1 << 0)
+#define RMI_F11_TAP_AND_HOLD            (1 << 1)
+#define RMI_F11_DOUBLE_TAP              (1 << 2)
+#define RMI_F11_EARLY_TAP               (1 << 3)
+#define RMI_F11_FLICK                   (1 << 4)
+#define RMI_F11_PRESS                   (1 << 5)
+#define RMI_F11_PINCH                   (1 << 6)
+
+/* Defs for Data 9 */
+
+#define RMI_F11_PALM_DETECT                     (1 << 0)
+#define RMI_F11_ROTATE                          (1 << 1)
+#define RMI_F11_SHAPE                           (1 << 2)
+#define RMI_F11_SCROLLZONE                      (1 << 3)
+#define RMI_F11_GESTURE_FINGER_COUNT_MASK       0x70
+
+/** Handy pointers into our data buffer.
+ *
+ * @f_state - start of finger state registers.
+ * @abs_pos - start of absolute position registers (if present).
+ * @rel_pos - start of relative data registers (if present).
+ * @gest_1  - gesture flags (if present).
+ * @gest_2  - gesture flags & finger count (if present).
+ * @pinch   - pinch motion register (if present).
+ * @flick   - flick distance X & Y, flick time (if present).
+ * @rotate  - rotate motion and finger separation.
+ * @multi_scroll - chiral deltas for X and Y (if present).
+ * @scroll_zones - scroll deltas for 4 regions (if present).
+ */
+struct f11_2d_data {
+	u8	*f_state;
+	u8	*abs_pos;
+	s8	*rel_pos;
+	u8	*gest_1;
+	u8	*gest_2;
+	s8	*pinch;
+	u8	*flick;
+	u8	*rotate;
+	u8	*shapes;
+	s8	*multi_scroll;
+	s8	*scroll_zones;
+};
+
+/** Data pertaining to F11 in general.  For per-sensor data, see struct
+ * f11_2d_sensor.
+ *
+ * @dev_query - F11 device specific query registers.
+ * @dev_controls - F11 device specific control registers.
+ * @dev_controls_mutex - lock for the control registers.
+ * @rezero_wait_ms - if nonzero, upon resume we will wait this many
+ * milliseconds before rezeroing the sensor(s).  This is useful in systems with
+ * poor electrical behavior on resume, where the initial calibration of the
+ * sensor(s) coming out of sleep state may be bogus.
+ * @sensors - per sensor data structures.
+ */
+struct f11_data {
+	bool has_query9;
+	bool has_query11;
+	bool has_query12;
+	bool has_query27;
+	bool has_query28;
+	bool has_acm;
+	struct f11_2d_ctrl dev_controls;
+	struct mutex dev_controls_mutex;
+	u16 rezero_wait_ms;
+	struct rmi_2d_sensor sensor;
+	struct f11_2d_sensor_queries sens_query;
+	struct f11_2d_data data;
+	struct rmi_2d_sensor_platform_data sensor_pdata;
+	unsigned long *abs_mask;
+	unsigned long *rel_mask;
+	unsigned long *result_bits;
+};
+
+enum f11_finger_state {
+	F11_NO_FINGER	= 0x00,
+	F11_PRESENT	= 0x01,
+	F11_INACCURATE	= 0x02,
+	F11_RESERVED	= 0x03
+};
+
+static void rmi_f11_rel_pos_report(struct f11_data *f11, u8 n_finger)
+{
+	struct rmi_2d_sensor *sensor = &f11->sensor;
+	struct f11_2d_data *data = &f11->data;
+	s8 x, y;
+
+	x = data->rel_pos[n_finger * 2];
+	y = data->rel_pos[n_finger * 2 + 1];
+
+	rmi_2d_sensor_rel_report(sensor, x, y);
+}
+
+static void rmi_f11_abs_pos_process(struct f11_data *f11,
+				   struct rmi_2d_sensor *sensor,
+				   struct rmi_2d_sensor_abs_object *obj,
+				   enum f11_finger_state finger_state,
+				   u8 n_finger)
+{
+	struct f11_2d_data *data = &f11->data;
+	u8 *pos_data = &data->abs_pos[n_finger * RMI_F11_ABS_BYTES];
+	int tool_type = MT_TOOL_FINGER;
+
+	switch (finger_state) {
+	case F11_PRESENT:
+		obj->type = RMI_2D_OBJECT_FINGER;
+		break;
+	default:
+		obj->type = RMI_2D_OBJECT_NONE;
+	}
+
+	obj->mt_tool = tool_type;
+	obj->x = (pos_data[0] << 4) | (pos_data[2] & 0x0F);
+	obj->y = (pos_data[1] << 4) | (pos_data[2] >> 4);
+	obj->z = pos_data[4];
+	obj->wx = pos_data[3] & 0x0f;
+	obj->wy = pos_data[3] >> 4;
+
+	rmi_2d_sensor_abs_process(sensor, obj, n_finger);
+}
+
+static inline u8 rmi_f11_parse_finger_state(const u8 *f_state, u8 n_finger)
+{
+	return (f_state[n_finger / 4] >> (2 * (n_finger % 4))) &
+							FINGER_STATE_MASK;
+}
+
+static void rmi_f11_finger_handler(struct f11_data *f11,
+				   struct rmi_2d_sensor *sensor,
+				   unsigned long *irq_bits, int num_irq_regs)
+{
+	const u8 *f_state = f11->data.f_state;
+	u8 finger_state;
+	u8 i;
+
+	int abs_bits = bitmap_and(f11->result_bits, irq_bits, f11->abs_mask,
+				  num_irq_regs * 8);
+	int rel_bits = bitmap_and(f11->result_bits, irq_bits, f11->rel_mask,
+				  num_irq_regs * 8);
+
+	for (i = 0; i < sensor->nbr_fingers; i++) {
+		/* Possible of having 4 fingers per f_statet register */
+		finger_state = rmi_f11_parse_finger_state(f_state, i);
+		if (finger_state == F11_RESERVED) {
+			pr_err("Invalid finger state[%d]: 0x%02x", i,
+				finger_state);
+			continue;
+		}
+
+		if (abs_bits)
+			rmi_f11_abs_pos_process(f11, sensor, &sensor->objs[i],
+							finger_state, i);
+
+		if (rel_bits)
+			rmi_f11_rel_pos_report(f11, i);
+	}
+
+	if (abs_bits) {
+		/*
+		 * the absolute part is made in 2 parts to allow the kernel
+		 * tracking to take place.
+		 */
+		if (sensor->kernel_tracking)
+			input_mt_assign_slots(sensor->input,
+					      sensor->tracking_slots,
+					      sensor->tracking_pos,
+					      sensor->nbr_fingers,
+					      sensor->dmax);
+
+		for (i = 0; i < sensor->nbr_fingers; i++) {
+			finger_state = rmi_f11_parse_finger_state(f_state, i);
+			if (finger_state == F11_RESERVED)
+				/* no need to send twice the error */
+				continue;
+
+			rmi_2d_sensor_abs_report(sensor, &sensor->objs[i], i);
+		}
+
+		input_mt_sync_frame(sensor->input);
+	}
+}
+
+static int f11_2d_construct_data(struct f11_data *f11)
+{
+	struct rmi_2d_sensor *sensor = &f11->sensor;
+	struct f11_2d_sensor_queries *query = &f11->sens_query;
+	struct f11_2d_data *data = &f11->data;
+	int i;
+
+	sensor->nbr_fingers = (query->nr_fingers == 5 ? 10 :
+				query->nr_fingers + 1);
+
+	sensor->pkt_size = DIV_ROUND_UP(sensor->nbr_fingers, 4);
+
+	if (query->has_abs) {
+		sensor->pkt_size += (sensor->nbr_fingers * 5);
+		sensor->attn_size = sensor->pkt_size;
+	}
+
+	if (query->has_rel)
+		sensor->pkt_size +=  (sensor->nbr_fingers * 2);
+
+	/* Check if F11_2D_Query7 is non-zero */
+	if (query->query7_nonzero)
+		sensor->pkt_size += sizeof(u8);
+
+	/* Check if F11_2D_Query7 or F11_2D_Query8 is non-zero */
+	if (query->query7_nonzero || query->query8_nonzero)
+		sensor->pkt_size += sizeof(u8);
+
+	if (query->has_pinch || query->has_flick || query->has_rotate) {
+		sensor->pkt_size += 3;
+		if (!query->has_flick)
+			sensor->pkt_size--;
+		if (!query->has_rotate)
+			sensor->pkt_size--;
+	}
+
+	if (query->has_touch_shapes)
+		sensor->pkt_size +=
+			DIV_ROUND_UP(query->nr_touch_shapes + 1, 8);
+
+	sensor->data_pkt = devm_kzalloc(&sensor->fn->dev, sensor->pkt_size,
+					GFP_KERNEL);
+	if (!sensor->data_pkt)
+		return -ENOMEM;
+
+	data->f_state = sensor->data_pkt;
+	i = DIV_ROUND_UP(sensor->nbr_fingers, 4);
+
+	if (query->has_abs) {
+		data->abs_pos = &sensor->data_pkt[i];
+		i += (sensor->nbr_fingers * RMI_F11_ABS_BYTES);
+	}
+
+	if (query->has_rel) {
+		data->rel_pos = &sensor->data_pkt[i];
+		i += (sensor->nbr_fingers * RMI_F11_REL_BYTES);
+	}
+
+	if (query->query7_nonzero) {
+		data->gest_1 = &sensor->data_pkt[i];
+		i++;
+	}
+
+	if (query->query7_nonzero || query->query8_nonzero) {
+		data->gest_2 = &sensor->data_pkt[i];
+		i++;
+	}
+
+	if (query->has_pinch) {
+		data->pinch = &sensor->data_pkt[i];
+		i++;
+	}
+
+	if (query->has_flick) {
+		if (query->has_pinch) {
+			data->flick = data->pinch;
+			i += 2;
+		} else {
+			data->flick = &sensor->data_pkt[i];
+			i += 3;
+		}
+	}
+
+	if (query->has_rotate) {
+		if (query->has_flick) {
+			data->rotate = data->flick + 1;
+		} else {
+			data->rotate = &sensor->data_pkt[i];
+			i += 2;
+		}
+	}
+
+	if (query->has_touch_shapes)
+		data->shapes = &sensor->data_pkt[i];
+
+	return 0;
+}
+
+static int f11_read_control_regs(struct rmi_function *fn,
+				struct f11_2d_ctrl *ctrl, u16 ctrl_base_addr) {
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	int error = 0;
+
+	ctrl->ctrl0_11_address = ctrl_base_addr;
+	error = rmi_read_block(rmi_dev, ctrl_base_addr, ctrl->ctrl0_11,
+				RMI_F11_CTRL_REG_COUNT);
+	if (error < 0) {
+		dev_err(&fn->dev, "Failed to read ctrl0, code: %d.\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int f11_write_control_regs(struct rmi_function *fn,
+					struct f11_2d_sensor_queries *query,
+					struct f11_2d_ctrl *ctrl,
+					u16 ctrl_base_addr)
+{
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	int error;
+
+	error = rmi_write_block(rmi_dev, ctrl_base_addr, ctrl->ctrl0_11,
+				RMI_F11_CTRL_REG_COUNT);
+	if (error < 0)
+		return error;
+
+	return 0;
+}
+
+static int rmi_f11_get_query_parameters(struct rmi_device *rmi_dev,
+			struct f11_data *f11,
+			struct f11_2d_sensor_queries *sensor_query,
+			u16 query_base_addr)
+{
+	int query_size;
+	int rc;
+	u8 query_buf[RMI_F11_QUERY_SIZE];
+	bool has_query36 = false;
+
+	rc = rmi_read_block(rmi_dev, query_base_addr, query_buf,
+				RMI_F11_QUERY_SIZE);
+	if (rc < 0)
+		return rc;
+
+	sensor_query->nr_fingers = query_buf[0] & RMI_F11_NR_FINGERS_MASK;
+	sensor_query->has_rel = !!(query_buf[0] & RMI_F11_HAS_REL);
+	sensor_query->has_abs = !!(query_buf[0] & RMI_F11_HAS_ABS);
+	sensor_query->has_gestures = !!(query_buf[0] & RMI_F11_HAS_GESTURES);
+	sensor_query->has_sensitivity_adjust =
+		!!(query_buf[0] && RMI_F11_HAS_SENSITIVITY_ADJ);
+	sensor_query->configurable = !!(query_buf[0] & RMI_F11_CONFIGURABLE);
+
+	sensor_query->nr_x_electrodes =
+				query_buf[1] & RMI_F11_NR_ELECTRODES_MASK;
+	sensor_query->nr_y_electrodes =
+				query_buf[2] & RMI_F11_NR_ELECTRODES_MASK;
+	sensor_query->max_electrodes =
+				query_buf[3] & RMI_F11_NR_ELECTRODES_MASK;
+
+	query_size = RMI_F11_QUERY_SIZE;
+
+	if (sensor_query->has_abs) {
+		rc = rmi_read(rmi_dev, query_base_addr + query_size, query_buf);
+		if (rc < 0)
+			return rc;
+
+		sensor_query->abs_data_size =
+			query_buf[0] & RMI_F11_ABS_DATA_SIZE_MASK;
+		sensor_query->has_anchored_finger =
+			!!(query_buf[0] & RMI_F11_HAS_ANCHORED_FINGER);
+		sensor_query->has_adj_hyst =
+			!!(query_buf[0] & RMI_F11_HAS_ADJ_HYST);
+		sensor_query->has_dribble =
+			!!(query_buf[0] & RMI_F11_HAS_DRIBBLE);
+		sensor_query->has_bending_correction =
+			!!(query_buf[0] & RMI_F11_HAS_BENDING_CORRECTION);
+		sensor_query->has_large_object_suppression =
+		!!(query_buf[0] && RMI_F11_HAS_LARGE_OBJECT_SUPPRESSION);
+		sensor_query->has_jitter_filter =
+			!!(query_buf[0] & RMI_F11_HAS_JITTER_FILTER);
+		query_size++;
+	}
+
+	if (sensor_query->has_rel) {
+		rc = rmi_read(rmi_dev, query_base_addr + query_size,
+					&sensor_query->f11_2d_query6);
+		if (rc < 0)
+			return rc;
+		query_size++;
+	}
+
+	if (sensor_query->has_gestures) {
+		rc = rmi_read_block(rmi_dev, query_base_addr + query_size,
+					query_buf, RMI_F11_QUERY_GESTURE_SIZE);
+		if (rc < 0)
+			return rc;
+
+		sensor_query->has_single_tap =
+			!!(query_buf[0] & RMI_F11_HAS_SINGLE_TAP);
+		sensor_query->has_tap_n_hold =
+			!!(query_buf[0] & RMI_F11_HAS_TAP_AND_HOLD);
+		sensor_query->has_double_tap =
+			!!(query_buf[0] & RMI_F11_HAS_DOUBLE_TAP);
+		sensor_query->has_early_tap =
+			!!(query_buf[0] & RMI_F11_HAS_EARLY_TAP);
+		sensor_query->has_flick =
+			!!(query_buf[0] & RMI_F11_HAS_FLICK);
+		sensor_query->has_press =
+			!!(query_buf[0] & RMI_F11_HAS_PRESS);
+		sensor_query->has_pinch =
+			!!(query_buf[0] & RMI_F11_HAS_PINCH);
+		sensor_query->has_chiral =
+			!!(query_buf[0] & RMI_F11_HAS_CHIRAL);
+
+		/* query 8 */
+		sensor_query->has_palm_det =
+			!!(query_buf[1] & RMI_F11_HAS_PALM_DET);
+		sensor_query->has_rotate =
+			!!(query_buf[1] & RMI_F11_HAS_ROTATE);
+		sensor_query->has_touch_shapes =
+			!!(query_buf[1] & RMI_F11_HAS_TOUCH_SHAPES);
+		sensor_query->has_scroll_zones =
+			!!(query_buf[1] & RMI_F11_HAS_SCROLL_ZONES);
+		sensor_query->has_individual_scroll_zones =
+			!!(query_buf[1] & RMI_F11_HAS_INDIVIDUAL_SCROLL_ZONES);
+		sensor_query->has_mf_scroll =
+			!!(query_buf[1] & RMI_F11_HAS_MF_SCROLL);
+		sensor_query->has_mf_edge_motion =
+			!!(query_buf[1] & RMI_F11_HAS_MF_EDGE_MOTION);
+		sensor_query->has_mf_scroll_inertia =
+			!!(query_buf[1] & RMI_F11_HAS_MF_SCROLL_INERTIA);
+
+		sensor_query->query7_nonzero = !!(query_buf[0]);
+		sensor_query->query8_nonzero = !!(query_buf[1]);
+
+		query_size += 2;
+	}
+
+	if (f11->has_query9) {
+		rc = rmi_read(rmi_dev, query_base_addr + query_size, query_buf);
+		if (rc < 0)
+			return rc;
+
+		sensor_query->has_pen =
+			!!(query_buf[0] & RMI_F11_HAS_PEN);
+		sensor_query->has_proximity =
+			!!(query_buf[0] & RMI_F11_HAS_PROXIMITY);
+		sensor_query->has_palm_det_sensitivity =
+			!!(query_buf[0] & RMI_F11_HAS_PALM_DET_SENSITIVITY);
+		sensor_query->has_suppress_on_palm_detect =
+			!!(query_buf[0] & RMI_F11_HAS_SUPPRESS_ON_PALM_DETECT);
+		sensor_query->has_two_pen_thresholds =
+			!!(query_buf[0] & RMI_F11_HAS_TWO_PEN_THRESHOLDS);
+		sensor_query->has_contact_geometry =
+			!!(query_buf[0] & RMI_F11_HAS_CONTACT_GEOMETRY);
+		sensor_query->has_pen_hover_discrimination =
+			!!(query_buf[0] & RMI_F11_HAS_PEN_HOVER_DISCRIMINATION);
+		sensor_query->has_pen_filters =
+			!!(query_buf[0] & RMI_F11_HAS_PEN_FILTERS);
+
+		query_size++;
+	}
+
+	if (sensor_query->has_touch_shapes) {
+		rc = rmi_read(rmi_dev, query_base_addr + query_size, query_buf);
+		if (rc < 0)
+			return rc;
+
+		sensor_query->nr_touch_shapes = query_buf[0] &
+				RMI_F11_NR_TOUCH_SHAPES_MASK;
+
+		query_size++;
+	}
+
+	if (f11->has_query11) {
+		rc = rmi_read(rmi_dev, query_base_addr + query_size, query_buf);
+		if (rc < 0)
+			return rc;
+
+		sensor_query->has_z_tuning =
+			!!(query_buf[0] & RMI_F11_HAS_Z_TUNING);
+		sensor_query->has_algorithm_selection =
+			!!(query_buf[0] & RMI_F11_HAS_ALGORITHM_SELECTION);
+		sensor_query->has_w_tuning =
+			!!(query_buf[0] & RMI_F11_HAS_W_TUNING);
+		sensor_query->has_pitch_info =
+			!!(query_buf[0] & RMI_F11_HAS_PITCH_INFO);
+		sensor_query->has_finger_size =
+			!!(query_buf[0] & RMI_F11_HAS_FINGER_SIZE);
+		sensor_query->has_segmentation_aggressiveness =
+			!!(query_buf[0] &
+				RMI_F11_HAS_SEGMENTATION_AGGRESSIVENESS);
+		sensor_query->has_XY_clip =
+			!!(query_buf[0] & RMI_F11_HAS_XY_CLIP);
+		sensor_query->has_drumming_filter =
+			!!(query_buf[0] & RMI_F11_HAS_DRUMMING_FILTER);
+
+		query_size++;
+	}
+
+	if (f11->has_query12) {
+		rc = rmi_read(rmi_dev, query_base_addr + query_size, query_buf);
+		if (rc < 0)
+			return rc;
+
+		sensor_query->has_gapless_finger =
+			!!(query_buf[0] & RMI_F11_HAS_GAPLESS_FINGER);
+		sensor_query->has_gapless_finger_tuning =
+			!!(query_buf[0] & RMI_F11_HAS_GAPLESS_FINGER_TUNING);
+		sensor_query->has_8bit_w =
+			!!(query_buf[0] & RMI_F11_HAS_8BIT_W);
+		sensor_query->has_adjustable_mapping =
+			!!(query_buf[0] & RMI_F11_HAS_ADJUSTABLE_MAPPING);
+		sensor_query->has_info2 =
+			!!(query_buf[0] & RMI_F11_HAS_INFO2);
+		sensor_query->has_physical_props =
+			!!(query_buf[0] & RMI_F11_HAS_PHYSICAL_PROPS);
+		sensor_query->has_finger_limit =
+			!!(query_buf[0] & RMI_F11_HAS_FINGER_LIMIT);
+		sensor_query->has_linear_coeff_2 =
+			!!(query_buf[0] & RMI_F11_HAS_LINEAR_COEFF);
+
+		query_size++;
+	}
+
+	if (sensor_query->has_jitter_filter) {
+		rc = rmi_read(rmi_dev, query_base_addr + query_size, query_buf);
+		if (rc < 0)
+			return rc;
+
+		sensor_query->jitter_window_size = query_buf[0] &
+			RMI_F11_JITTER_WINDOW_MASK;
+		sensor_query->jitter_filter_type = (query_buf[0] &
+			RMI_F11_JITTER_FILTER_MASK) >>
+			RMI_F11_JITTER_FILTER_SHIFT;
+
+		query_size++;
+	}
+
+	if (sensor_query->has_info2) {
+		rc = rmi_read(rmi_dev, query_base_addr + query_size, query_buf);
+		if (rc < 0)
+			return rc;
+
+		sensor_query->light_control =
+			query_buf[0] & RMI_F11_LIGHT_CONTROL_MASK;
+		sensor_query->is_clear =
+			!!(query_buf[0] & RMI_F11_IS_CLEAR);
+		sensor_query->clickpad_props =
+			(query_buf[0] & RMI_F11_CLICKPAD_PROPS_MASK) >>
+			RMI_F11_CLICKPAD_PROPS_SHIFT;
+		sensor_query->mouse_buttons =
+			(query_buf[0] & RMI_F11_MOUSE_BUTTONS_MASK) >>
+			RMI_F11_MOUSE_BUTTONS_SHIFT;
+		sensor_query->has_advanced_gestures =
+			!!(query_buf[0] & RMI_F11_HAS_ADVANCED_GESTURES);
+
+		query_size++;
+	}
+
+	if (sensor_query->has_physical_props) {
+		rc = rmi_read_block(rmi_dev, query_base_addr
+			+ query_size, query_buf, 4);
+		if (rc < 0)
+			return rc;
+
+		sensor_query->x_sensor_size_mm =
+			(query_buf[0] | (query_buf[1] << 8)) / 10;
+		sensor_query->y_sensor_size_mm =
+			(query_buf[2] | (query_buf[3] << 8)) / 10;
+
+		/*
+		 * query 15 - 18 contain the size of the sensor
+		 * and query 19 - 26 contain bezel dimensions
+		 */
+		query_size += 12;
+	}
+
+	if (f11->has_query27)
+		++query_size;
+
+	if (f11->has_query28) {
+		rc = rmi_read(rmi_dev, query_base_addr + query_size,
+				query_buf);
+		if (rc < 0)
+			return rc;
+
+		has_query36 = !!(query_buf[0] & BIT(6));
+	}
+
+	if (has_query36) {
+		query_size += 2;
+		rc = rmi_read(rmi_dev, query_base_addr + query_size,
+				query_buf);
+		if (rc < 0)
+			return rc;
+
+		if (!!(query_buf[0] & BIT(5)))
+			f11->has_acm = true;
+	}
+
+	return query_size;
+}
+
+static int rmi_f11_initialize(struct rmi_function *fn)
+{
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	struct f11_data *f11;
+	struct f11_2d_ctrl *ctrl;
+	u8 query_offset;
+	u16 query_base_addr;
+	u16 control_base_addr;
+	u16 max_x_pos, max_y_pos;
+	int rc;
+	const struct rmi_device_platform_data *pdata =
+				rmi_get_platform_data(rmi_dev);
+	struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev);
+	struct rmi_2d_sensor *sensor;
+	u8 buf;
+	int mask_size;
+
+	rmi_dbg(RMI_DEBUG_FN, &fn->dev, "Initializing F11 values.\n");
+
+	mask_size = BITS_TO_LONGS(drvdata->irq_count) * sizeof(unsigned long);
+
+	/*
+	** init instance data, fill in values and create any sysfs files
+	*/
+	f11 = devm_kzalloc(&fn->dev, sizeof(struct f11_data) + mask_size * 3,
+			GFP_KERNEL);
+	if (!f11)
+		return -ENOMEM;
+
+	if (pdata->sensor_pdata)
+		f11->sensor_pdata = *pdata->sensor_pdata;
+
+	f11->rezero_wait_ms = f11->sensor_pdata.rezero_wait;
+
+	f11->abs_mask = (unsigned long *)((char *)f11
+			+ sizeof(struct f11_data));
+	f11->rel_mask = (unsigned long *)((char *)f11
+			+ sizeof(struct f11_data) + mask_size);
+	f11->result_bits = (unsigned long *)((char *)f11
+			+ sizeof(struct f11_data) + mask_size * 2);
+
+	set_bit(fn->irq_pos, f11->abs_mask);
+	set_bit(fn->irq_pos + 1, f11->rel_mask);
+
+	query_base_addr = fn->fd.query_base_addr;
+	control_base_addr = fn->fd.control_base_addr;
+
+	rc = rmi_read(rmi_dev, query_base_addr, &buf);
+	if (rc < 0)
+		return rc;
+
+	f11->has_query9 = !!(buf & RMI_F11_HAS_QUERY9);
+	f11->has_query11 = !!(buf & RMI_F11_HAS_QUERY11);
+	f11->has_query12 = !!(buf & RMI_F11_HAS_QUERY12);
+	f11->has_query27 = !!(buf & RMI_F11_HAS_QUERY27);
+	f11->has_query28 = !!(buf & RMI_F11_HAS_QUERY28);
+
+	query_offset = (query_base_addr + 1);
+	sensor = &f11->sensor;
+	sensor->fn = fn;
+
+	rc = rmi_f11_get_query_parameters(rmi_dev, f11,
+			&f11->sens_query, query_offset);
+	if (rc < 0)
+		return rc;
+	query_offset += rc;
+
+	rc = f11_read_control_regs(fn, &f11->dev_controls,
+			control_base_addr);
+	if (rc < 0) {
+		dev_err(&fn->dev,
+			"Failed to read F11 control params.\n");
+		return rc;
+	}
+
+	if (f11->sens_query.has_info2) {
+		if (f11->sens_query.is_clear)
+			f11->sensor.sensor_type = rmi_sensor_touchscreen;
+		else
+			f11->sensor.sensor_type = rmi_sensor_touchpad;
+	}
+
+	sensor->report_abs = f11->sens_query.has_abs;
+
+	sensor->axis_align =
+		f11->sensor_pdata.axis_align;
+
+	sensor->topbuttonpad = f11->sensor_pdata.topbuttonpad;
+	sensor->kernel_tracking = f11->sensor_pdata.kernel_tracking;
+	sensor->dmax = f11->sensor_pdata.dmax;
+
+	if (f11->sens_query.has_physical_props) {
+		sensor->x_mm = f11->sens_query.x_sensor_size_mm;
+		sensor->y_mm = f11->sens_query.y_sensor_size_mm;
+	} else {
+		sensor->x_mm = f11->sensor_pdata.x_mm;
+		sensor->y_mm = f11->sensor_pdata.y_mm;
+	}
+
+	if (sensor->sensor_type == rmi_sensor_default)
+		sensor->sensor_type =
+			f11->sensor_pdata.sensor_type;
+
+	sensor->report_abs = sensor->report_abs
+		&& !(f11->sensor_pdata.disable_report_mask
+			& RMI_F11_DISABLE_ABS_REPORT);
+
+	if (!sensor->report_abs)
+		/*
+		 * If device doesn't have abs or if it has been disables
+		 * fallback to reporting rel data.
+		 */
+		sensor->report_rel = f11->sens_query.has_rel;
+
+	rc = rmi_read_block(rmi_dev,
+		control_base_addr + F11_CTRL_SENSOR_MAX_X_POS_OFFSET,
+		(u8 *)&max_x_pos, sizeof(max_x_pos));
+	if (rc < 0)
+		return rc;
+
+	rc = rmi_read_block(rmi_dev,
+		control_base_addr + F11_CTRL_SENSOR_MAX_Y_POS_OFFSET,
+		(u8 *)&max_y_pos, sizeof(max_y_pos));
+	if (rc < 0)
+		return rc;
+
+	sensor->max_x = max_x_pos;
+	sensor->max_y = max_y_pos;
+
+	rc = f11_2d_construct_data(f11);
+	if (rc < 0)
+		return rc;
+
+	if (f11->has_acm)
+		f11->sensor.attn_size += f11->sensor.nbr_fingers * 2;
+
+	/* allocate the in-kernel tracking buffers */
+	sensor->tracking_pos = devm_kzalloc(&fn->dev,
+			sizeof(struct input_mt_pos) * sensor->nbr_fingers,
+			GFP_KERNEL);
+	sensor->tracking_slots = devm_kzalloc(&fn->dev,
+			sizeof(int) * sensor->nbr_fingers, GFP_KERNEL);
+	sensor->objs = devm_kzalloc(&fn->dev,
+			sizeof(struct rmi_2d_sensor_abs_object)
+			* sensor->nbr_fingers, GFP_KERNEL);
+	if (!sensor->tracking_pos || !sensor->tracking_slots || !sensor->objs)
+		return -ENOMEM;
+
+	ctrl = &f11->dev_controls;
+	if (sensor->axis_align.delta_x_threshold)
+		ctrl->ctrl0_11[RMI_F11_DELTA_X_THRESHOLD] =
+			sensor->axis_align.delta_x_threshold;
+
+	if (sensor->axis_align.delta_y_threshold)
+		ctrl->ctrl0_11[RMI_F11_DELTA_Y_THRESHOLD] =
+			sensor->axis_align.delta_y_threshold;
+
+	if (f11->sens_query.has_dribble)
+		ctrl->ctrl0_11[0] = ctrl->ctrl0_11[0] & ~BIT(6);
+
+	if (f11->sens_query.has_palm_det)
+		ctrl->ctrl0_11[11] = ctrl->ctrl0_11[11] & ~BIT(0);
+
+	rc = f11_write_control_regs(fn, &f11->sens_query,
+			   &f11->dev_controls, fn->fd.query_base_addr);
+	if (rc)
+		dev_warn(&fn->dev, "Failed to write control registers\n");
+
+	mutex_init(&f11->dev_controls_mutex);
+
+	dev_set_drvdata(&fn->dev, f11);
+
+	return 0;
+}
+
+static int rmi_f11_config(struct rmi_function *fn)
+{
+	struct f11_data *f11 = dev_get_drvdata(&fn->dev);
+	struct rmi_driver *drv = fn->rmi_dev->driver;
+	struct rmi_2d_sensor *sensor = &f11->sensor;
+	int rc;
+
+	if (!sensor->report_abs)
+		drv->clear_irq_bits(fn->rmi_dev, f11->abs_mask);
+	else
+		drv->set_irq_bits(fn->rmi_dev, f11->abs_mask);
+
+	if (!sensor->report_rel)
+		drv->clear_irq_bits(fn->rmi_dev, f11->rel_mask);
+	else
+		drv->set_irq_bits(fn->rmi_dev, f11->rel_mask);
+
+	rc = f11_write_control_regs(fn, &f11->sens_query,
+			   &f11->dev_controls, fn->fd.query_base_addr);
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
+
+static int rmi_f11_attention(struct rmi_function *fn, unsigned long *irq_bits)
+{
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev);
+	struct f11_data *f11 = dev_get_drvdata(&fn->dev);
+	u16 data_base_addr = fn->fd.data_base_addr;
+	u16 data_base_addr_offset = 0;
+	int error;
+
+	if (rmi_dev->xport->attn_data) {
+		memcpy(f11->sensor.data_pkt, rmi_dev->xport->attn_data,
+			f11->sensor.attn_size);
+		rmi_dev->xport->attn_data += f11->sensor.attn_size;
+		rmi_dev->xport->attn_size -= f11->sensor.attn_size;
+	} else {
+		error = rmi_read_block(rmi_dev,
+				data_base_addr + data_base_addr_offset,
+				f11->sensor.data_pkt,
+				f11->sensor.pkt_size);
+		if (error < 0)
+			return error;
+	}
+
+	rmi_f11_finger_handler(f11, &f11->sensor, irq_bits,
+				drvdata->num_of_irq_regs);
+	data_base_addr_offset += f11->sensor.pkt_size;
+
+	return 0;
+}
+
+static int rmi_f11_resume(struct rmi_function *fn)
+{
+	struct f11_data *f11 = dev_get_drvdata(&fn->dev);
+	int error;
+
+	rmi_dbg(RMI_DEBUG_FN, &fn->dev, "Resuming...\n");
+	if (!f11->rezero_wait_ms)
+		return 0;
+
+	mdelay(f11->rezero_wait_ms);
+
+	error = rmi_write(fn->rmi_dev, fn->fd.command_base_addr,
+				RMI_F11_REZERO);
+	if (error) {
+		dev_err(&fn->dev,
+			"%s: failed to issue rezero command, error = %d.",
+			__func__, error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int rmi_f11_probe(struct rmi_function *fn)
+{
+	int error;
+	struct f11_data *f11;
+
+	error = rmi_f11_initialize(fn);
+	if (error)
+		return error;
+
+	f11 = dev_get_drvdata(&fn->dev);
+	error = rmi_2d_sensor_configure_input(fn, &f11->sensor);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+struct rmi_function_handler rmi_f11_handler = {
+	.driver = {
+		.name	= "rmi4_f11",
+	},
+	.func		= 0x11,
+	.probe		= rmi_f11_probe,
+	.config		= rmi_f11_config,
+	.attention	= rmi_f11_attention,
+	.resume		= rmi_f11_resume,
+};
diff --git a/include/linux/rmi.h b/include/linux/rmi.h
index c748fa302067..b527064bac47 100644
--- a/include/linux/rmi.h
+++ b/include/linux/rmi.h
@@ -19,6 +19,88 @@
 
 #define NAME_BUFFER_SIZE 256
 
+/**
+ * struct rmi_2d_axis_alignment - target axis alignment
+ * @swap_axes: set to TRUE if desired to swap x- and y-axis
+ * @flip_x: set to TRUE if desired to flip direction on x-axis
+ * @flip_y: set to TRUE if desired to flip direction on y-axis
+ * @clip_x_low - reported X coordinates below this setting will be clipped to
+ *               the specified value
+ * @clip_x_high - reported X coordinates above this setting will be clipped to
+ *               the specified value
+ * @clip_y_low - reported Y coordinates below this setting will be clipped to
+ *               the specified value
+ * @clip_y_high - reported Y coordinates above this setting will be clipped to
+ *               the specified value
+ * @offset_x - this value will be added to all reported X coordinates
+ * @offset_y - this value will be added to all reported Y coordinates
+ * @rel_report_enabled - if set to true, the relative reporting will be
+ *               automatically enabled for this sensor.
+ */
+struct rmi_2d_axis_alignment {
+	bool swap_axes;
+	bool flip_x;
+	bool flip_y;
+	u16 clip_x_low;
+	u16 clip_y_low;
+	u16 clip_x_high;
+	u16 clip_y_high;
+	u16 offset_x;
+	u16 offset_y;
+	u8 delta_x_threshold;
+	u8 delta_y_threshold;
+};
+
+/** This is used to override any hints an F11 2D sensor might have provided
+ * as to what type of sensor it is.
+ *
+ * @rmi_f11_sensor_default - do not override, determine from F11_2D_QUERY14 if
+ * available.
+ * @rmi_f11_sensor_touchscreen - treat the sensor as a touchscreen (direct
+ * pointing).
+ * @rmi_f11_sensor_touchpad - thread the sensor as a touchpad (indirect
+ * pointing).
+ */
+enum rmi_sensor_type {
+	rmi_sensor_default = 0,
+	rmi_sensor_touchscreen,
+	rmi_sensor_touchpad
+};
+
+#define RMI_F11_DISABLE_ABS_REPORT      BIT(0)
+
+/**
+ * struct rmi_2d_sensor_data - overrides defaults for a 2D sensor.
+ * @axis_align - provides axis alignment overrides (see above).
+ * @sensor_type - Forces the driver to treat the sensor as an indirect
+ * pointing device (touchpad) rather than a direct pointing device
+ * (touchscreen).  This is useful when F11_2D_QUERY14 register is not
+ * available.
+ * @disable_report_mask - Force data to not be reported even if it is supported
+ * by the firware.
+ * @topbuttonpad - Used with the "5 buttons touchpads" found on the Lenovo 40
+ * series
+ * @kernel_tracking - most moderns RMI f11 firmwares implement Multifinger
+ * Type B protocol. However, there are some corner cases where the user
+ * triggers some jumps by tapping with two fingers on the touchpad.
+ * Use this setting and dmax to filter out these jumps.
+ * Also, when using an old sensor using MF Type A behavior, set to true to
+ * report an actual MT protocol B.
+ * @dmax - the maximum distance (in sensor units) the kernel tracking allows two
+ * distincts fingers to be considered the same.
+ */
+struct rmi_2d_sensor_platform_data {
+	struct rmi_2d_axis_alignment axis_align;
+	enum rmi_sensor_type sensor_type;
+	int x_mm;
+	int y_mm;
+	int disable_report_mask;
+	u16 rezero_wait;
+	bool topbuttonpad;
+	bool kernel_tracking;
+	int dmax;
+};
+
 /**
  * struct rmi_f01_power - override default power management settings.
  *
@@ -63,6 +145,7 @@ struct rmi_device_platform_data {
 	int reset_delay_ms;
 
 	/* function handler pdata */
+	struct rmi_2d_sensor_platform_data *sensor_pdata;
 	struct rmi_f01_power_management power_management;
 };
 
-- 
cgit v1.2.3


From 562b42d3ee305472e1b2ea31574c59925e95fd7e Mon Sep 17 00:00:00 2001
From: Andrew Duggan <aduggan@synaptics.com>
Date: Thu, 10 Mar 2016 15:56:58 -0800
Subject: Input: synaptics-rmi4 - add support for F30

RMI4 F30 supports input from clickpad buttons and controls LEDs located
on the touchpad PCB. This patch adds support of the clickpad buttons and
defers supporting LEDs for the future.

Signed-off-by: Andrew Duggan <aduggan@synaptics.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Tested-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Tested-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/Kconfig      |   9 +
 drivers/input/rmi4/Makefile     |   1 +
 drivers/input/rmi4/rmi_bus.c    |   3 +
 drivers/input/rmi4/rmi_driver.h |   1 +
 drivers/input/rmi4/rmi_f30.c    | 407 ++++++++++++++++++++++++++++++++++++++++
 include/linux/rmi.h             |  16 ++
 6 files changed, 437 insertions(+)
 create mode 100644 drivers/input/rmi4/rmi_f30.c

(limited to 'include/linux')

diff --git a/drivers/input/rmi4/Kconfig b/drivers/input/rmi4/Kconfig
index aabb8542a55d..284faec30a7a 100644
--- a/drivers/input/rmi4/Kconfig
+++ b/drivers/input/rmi4/Kconfig
@@ -43,3 +43,12 @@ config RMI4_F12
 	  Function 12 provides 2D multifinger pointing for touchscreens and
 	  touchpads. For sensors that support relative pointing, F12 also
 	  provides mouse input.
+
+config RMI4_F30
+	bool "RMI4 Function 30 (GPIO LED)"
+	depends on RMI4_CORE
+	help
+	  Say Y here if you want to add support for RMI4 function 30.
+
+	  Function 30 provides GPIO and LED support for RMI4 devices. This
+	  includes support for buttons on TouchPads and ClickPads.
diff --git a/drivers/input/rmi4/Makefile b/drivers/input/rmi4/Makefile
index d01e703304c3..ad7156d8252c 100644
--- a/drivers/input/rmi4/Makefile
+++ b/drivers/input/rmi4/Makefile
@@ -6,6 +6,7 @@ rmi_core-$(CONFIG_RMI4_2D_SENSOR) += rmi_2d_sensor.o
 # Function drivers
 rmi_core-$(CONFIG_RMI4_F11) += rmi_f11.o
 rmi_core-$(CONFIG_RMI4_F12) += rmi_f12.o
+rmi_core-$(CONFIG_RMI4_F30) += rmi_f30.o
 
 # Transports
 obj-$(CONFIG_RMI4_I2C) += rmi_i2c.o
diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c
index a3d766118b61..b368b0515c5a 100644
--- a/drivers/input/rmi4/rmi_bus.c
+++ b/drivers/input/rmi4/rmi_bus.c
@@ -312,6 +312,9 @@ static struct rmi_function_handler *fn_handlers[] = {
 #ifdef CONFIG_RMI4_F12
 	&rmi_f12_handler,
 #endif
+#ifdef CONFIG_RMI4_F30
+	&rmi_f30_handler,
+#endif
 };
 
 static void __rmi_unregister_function_handlers(int start_idx)
diff --git a/drivers/input/rmi4/rmi_driver.h b/drivers/input/rmi4/rmi_driver.h
index ed9127daf5a2..6e140fa3cce1 100644
--- a/drivers/input/rmi4/rmi_driver.h
+++ b/drivers/input/rmi4/rmi_driver.h
@@ -101,4 +101,5 @@ char *rmi_f01_get_product_ID(struct rmi_function *fn);
 extern struct rmi_function_handler rmi_f01_handler;
 extern struct rmi_function_handler rmi_f11_handler;
 extern struct rmi_function_handler rmi_f12_handler;
+extern struct rmi_function_handler rmi_f30_handler;
 #endif
diff --git a/drivers/input/rmi4/rmi_f30.c b/drivers/input/rmi4/rmi_f30.c
new file mode 100644
index 000000000000..760aff1bc420
--- /dev/null
+++ b/drivers/input/rmi4/rmi_f30.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 2012-2016 Synaptics Incorporated
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/rmi.h>
+#include <linux/input.h>
+#include <linux/slab.h>
+#include "rmi_driver.h"
+
+#define RMI_F30_QUERY_SIZE			2
+
+/* Defs for Query 0 */
+#define RMI_F30_EXTENDED_PATTERNS		0x01
+#define RMI_F30_HAS_MAPPABLE_BUTTONS		(1 << 1)
+#define RMI_F30_HAS_LED			(1 << 2)
+#define RMI_F30_HAS_GPIO			(1 << 3)
+#define RMI_F30_HAS_HAPTIC			(1 << 4)
+#define RMI_F30_HAS_GPIO_DRV_CTL		(1 << 5)
+#define RMI_F30_HAS_MECH_MOUSE_BTNS		(1 << 6)
+
+/* Defs for Query 1 */
+#define RMI_F30_GPIO_LED_COUNT			0x1F
+
+/* Defs for Control Registers */
+#define RMI_F30_CTRL_1_GPIO_DEBOUNCE		0x01
+#define RMI_F30_CTRL_1_HALT			(1 << 4)
+#define RMI_F30_CTRL_1_HALTED			(1 << 5)
+#define RMI_F30_CTRL_10_NUM_MECH_MOUSE_BTNS	0x03
+
+struct rmi_f30_ctrl_data {
+	int address;
+	int length;
+	u8 *regs;
+};
+
+#define RMI_F30_CTRL_MAX_REGS		32
+#define RMI_F30_CTRL_MAX_BYTES		((RMI_F30_CTRL_MAX_REGS + 7) >> 3)
+#define RMI_F30_CTRL_MAX_REG_BLOCKS	11
+
+#define RMI_F30_CTRL_REGS_MAX_SIZE (RMI_F30_CTRL_MAX_BYTES		\
+					+ 1				\
+					+ RMI_F30_CTRL_MAX_BYTES	\
+					+ RMI_F30_CTRL_MAX_BYTES	\
+					+ RMI_F30_CTRL_MAX_BYTES	\
+					+ 6				\
+					+ RMI_F30_CTRL_MAX_REGS		\
+					+ RMI_F30_CTRL_MAX_REGS		\
+					+ RMI_F30_CTRL_MAX_BYTES	\
+					+ 1				\
+					+ 1)
+
+struct f30_data {
+	/* Query Data */
+	bool has_extended_pattern;
+	bool has_mappable_buttons;
+	bool has_led;
+	bool has_gpio;
+	bool has_haptic;
+	bool has_gpio_driver_control;
+	bool has_mech_mouse_btns;
+	u8 gpioled_count;
+
+	u8 register_count;
+
+	/* Control Register Data */
+	struct rmi_f30_ctrl_data ctrl[RMI_F30_CTRL_MAX_REG_BLOCKS];
+	u8 ctrl_regs[RMI_F30_CTRL_REGS_MAX_SIZE];
+	u32 ctrl_regs_size;
+
+	u8 data_regs[RMI_F30_CTRL_MAX_BYTES];
+	u16 *gpioled_key_map;
+
+	struct input_dev *input;
+};
+
+static int rmi_f30_read_control_parameters(struct rmi_function *fn,
+						struct f30_data *f30)
+{
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	int error = 0;
+
+	error = rmi_read_block(rmi_dev, fn->fd.control_base_addr,
+				f30->ctrl_regs, f30->ctrl_regs_size);
+	if (error) {
+		dev_err(&rmi_dev->dev, "%s : Could not read control registers at 0x%x error (%d)\n",
+			__func__, fn->fd.control_base_addr, error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int rmi_f30_attention(struct rmi_function *fn, unsigned long *irq_bits)
+{
+	struct f30_data *f30 = dev_get_drvdata(&fn->dev);
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	int retval;
+	int gpiled = 0;
+	int value = 0;
+	int i;
+	int reg_num;
+
+	if (!f30->input)
+		return 0;
+
+	/* Read the gpi led data. */
+	if (rmi_dev->xport->attn_data) {
+		memcpy(f30->data_regs, rmi_dev->xport->attn_data,
+			f30->register_count);
+		rmi_dev->xport->attn_data += f30->register_count;
+		rmi_dev->xport->attn_size -= f30->register_count;
+	} else {
+		retval = rmi_read_block(rmi_dev, fn->fd.data_base_addr,
+			f30->data_regs, f30->register_count);
+
+		if (retval) {
+			dev_err(&fn->dev, "%s: Failed to read F30 data registers.\n",
+				__func__);
+			return retval;
+		}
+	}
+
+	for (reg_num = 0; reg_num < f30->register_count; ++reg_num) {
+		for (i = 0; gpiled < f30->gpioled_count && i < 8; ++i,
+			++gpiled) {
+			if (f30->gpioled_key_map[gpiled] != 0) {
+				/* buttons have pull up resistors */
+				value = (((f30->data_regs[reg_num] >> i) & 0x01)
+									== 0);
+
+				rmi_dbg(RMI_DEBUG_FN, &fn->dev,
+					"%s: call input report key (0x%04x) value (0x%02x)",
+					__func__,
+					f30->gpioled_key_map[gpiled], value);
+				input_report_key(f30->input,
+						 f30->gpioled_key_map[gpiled],
+						 value);
+			}
+
+		}
+	}
+
+	return 0;
+}
+
+static int rmi_f30_register_device(struct rmi_function *fn)
+{
+	int i;
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	struct rmi_driver_data *drv_data = dev_get_drvdata(&rmi_dev->dev);
+	struct f30_data *f30 = dev_get_drvdata(&fn->dev);
+	struct input_dev *input_dev;
+	int button_count = 0;
+
+	input_dev = drv_data->input;
+	if (!input_dev) {
+		dev_info(&fn->dev, "F30: no input device found, ignoring.\n");
+		return -EINVAL;
+	}
+
+	f30->input = input_dev;
+
+	set_bit(EV_KEY, input_dev->evbit);
+
+	input_dev->keycode = f30->gpioled_key_map;
+	input_dev->keycodesize = sizeof(u16);
+	input_dev->keycodemax = f30->gpioled_count;
+
+	for (i = 0; i < f30->gpioled_count; i++) {
+		if (f30->gpioled_key_map[i] != 0) {
+			input_set_capability(input_dev, EV_KEY,
+						f30->gpioled_key_map[i]);
+			button_count++;
+		}
+	}
+
+	if (button_count == 1)
+		__set_bit(INPUT_PROP_BUTTONPAD, input_dev->propbit);
+	return 0;
+}
+
+static int rmi_f30_config(struct rmi_function *fn)
+{
+	struct f30_data *f30 = dev_get_drvdata(&fn->dev);
+	struct rmi_driver *drv = fn->rmi_dev->driver;
+	const struct rmi_device_platform_data *pdata =
+				rmi_get_platform_data(fn->rmi_dev);
+	int error;
+
+	if (pdata->f30_data && pdata->f30_data->disable) {
+		drv->clear_irq_bits(fn->rmi_dev, fn->irq_mask);
+	} else {
+		/* Write Control Register values back to device */
+		error = rmi_write_block(fn->rmi_dev, fn->fd.control_base_addr,
+					f30->ctrl_regs, f30->ctrl_regs_size);
+		if (error) {
+			dev_err(&fn->rmi_dev->dev,
+				"%s : Could not write control registers at 0x%x error (%d)\n",
+				__func__, fn->fd.control_base_addr, error);
+			return error;
+		}
+
+		drv->set_irq_bits(fn->rmi_dev, fn->irq_mask);
+	}
+	return 0;
+}
+
+static inline void rmi_f30_set_ctrl_data(struct rmi_f30_ctrl_data *ctrl,
+					int *ctrl_addr, int len, u8 **reg)
+{
+	ctrl->address = *ctrl_addr;
+	ctrl->length = len;
+	ctrl->regs = *reg;
+	*ctrl_addr += len;
+	*reg += len;
+}
+
+static inline bool rmi_f30_is_valid_button(int button,
+		struct rmi_f30_ctrl_data *ctrl)
+{
+	int byte_position = button >> 3;
+	int bit_position = button & 0x07;
+
+	/*
+	 * ctrl2 -> dir == 0 -> input mode
+	 * ctrl3 -> data == 1 -> actual button
+	 */
+	return !(ctrl[2].regs[byte_position] & BIT(bit_position)) &&
+		(ctrl[3].regs[byte_position] & BIT(bit_position));
+}
+
+static inline int rmi_f30_initialize(struct rmi_function *fn)
+{
+	struct f30_data *f30;
+	struct rmi_device *rmi_dev = fn->rmi_dev;
+	const struct rmi_device_platform_data *pdata;
+	int retval = 0;
+	int control_address;
+	int i;
+	int button;
+	u8 buf[RMI_F30_QUERY_SIZE];
+	u8 *ctrl_reg;
+	u8 *map_memory;
+
+	f30 = devm_kzalloc(&fn->dev, sizeof(struct f30_data),
+			   GFP_KERNEL);
+	if (!f30)
+		return -ENOMEM;
+
+	dev_set_drvdata(&fn->dev, f30);
+
+	retval = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr, buf,
+				RMI_F30_QUERY_SIZE);
+
+	if (retval) {
+		dev_err(&fn->dev, "Failed to read query register.\n");
+		return retval;
+	}
+
+	f30->has_extended_pattern = buf[0] & RMI_F30_EXTENDED_PATTERNS;
+	f30->has_mappable_buttons = buf[0] & RMI_F30_HAS_MAPPABLE_BUTTONS;
+	f30->has_led = buf[0] & RMI_F30_HAS_LED;
+	f30->has_gpio = buf[0] & RMI_F30_HAS_GPIO;
+	f30->has_haptic = buf[0] & RMI_F30_HAS_HAPTIC;
+	f30->has_gpio_driver_control = buf[0] & RMI_F30_HAS_GPIO_DRV_CTL;
+	f30->has_mech_mouse_btns = buf[0] & RMI_F30_HAS_MECH_MOUSE_BTNS;
+	f30->gpioled_count = buf[1] & RMI_F30_GPIO_LED_COUNT;
+
+	f30->register_count = (f30->gpioled_count + 7) >> 3;
+
+	control_address = fn->fd.control_base_addr;
+	ctrl_reg = f30->ctrl_regs;
+
+	if (f30->has_gpio && f30->has_led)
+		rmi_f30_set_ctrl_data(&f30->ctrl[0], &control_address,
+					f30->register_count, &ctrl_reg);
+
+	rmi_f30_set_ctrl_data(&f30->ctrl[1], &control_address, sizeof(u8),
+				&ctrl_reg);
+
+	if (f30->has_gpio) {
+		rmi_f30_set_ctrl_data(&f30->ctrl[2], &control_address,
+					f30->register_count, &ctrl_reg);
+
+		rmi_f30_set_ctrl_data(&f30->ctrl[3], &control_address,
+					f30->register_count, &ctrl_reg);
+	}
+
+	if (f30->has_led) {
+		int ctrl5_len;
+
+		rmi_f30_set_ctrl_data(&f30->ctrl[4], &control_address,
+					f30->register_count, &ctrl_reg);
+
+		if (f30->has_extended_pattern)
+			ctrl5_len = 6;
+		else
+			ctrl5_len = 2;
+
+		rmi_f30_set_ctrl_data(&f30->ctrl[5], &control_address,
+					ctrl5_len, &ctrl_reg);
+	}
+
+	if (f30->has_led || f30->has_gpio_driver_control) {
+		/* control 6 uses a byte per gpio/led */
+		rmi_f30_set_ctrl_data(&f30->ctrl[6], &control_address,
+					f30->gpioled_count, &ctrl_reg);
+	}
+
+	if (f30->has_mappable_buttons) {
+		/* control 7 uses a byte per gpio/led */
+		rmi_f30_set_ctrl_data(&f30->ctrl[7], &control_address,
+					f30->gpioled_count, &ctrl_reg);
+	}
+
+	if (f30->has_haptic) {
+		rmi_f30_set_ctrl_data(&f30->ctrl[8], &control_address,
+					f30->register_count, &ctrl_reg);
+
+		rmi_f30_set_ctrl_data(&f30->ctrl[9], &control_address,
+					sizeof(u8), &ctrl_reg);
+	}
+
+	if (f30->has_mech_mouse_btns)
+		rmi_f30_set_ctrl_data(&f30->ctrl[10], &control_address,
+					sizeof(u8), &ctrl_reg);
+
+	f30->ctrl_regs_size = ctrl_reg - f30->ctrl_regs
+				?: RMI_F30_CTRL_REGS_MAX_SIZE;
+
+	retval = rmi_f30_read_control_parameters(fn, f30);
+	if (retval < 0) {
+		dev_err(&fn->dev,
+			"Failed to initialize F19 control params.\n");
+		return retval;
+	}
+
+	map_memory = devm_kzalloc(&fn->dev,
+				  (f30->gpioled_count * (sizeof(u16))),
+				  GFP_KERNEL);
+	if (!map_memory) {
+		dev_err(&fn->dev, "Failed to allocate gpioled map memory.\n");
+		return -ENOMEM;
+	}
+
+	f30->gpioled_key_map = (u16 *)map_memory;
+
+	pdata = rmi_get_platform_data(rmi_dev);
+	if (pdata && f30->has_gpio) {
+		button = BTN_LEFT;
+		for (i = 0; i < f30->gpioled_count; i++) {
+			if (rmi_f30_is_valid_button(i, f30->ctrl)) {
+				f30->gpioled_key_map[i] = button++;
+
+				/*
+				 * buttonpad might be given by
+				 * f30->has_mech_mouse_btns, but I am
+				 * not sure, so use only the pdata info
+				 */
+				if (pdata->f30_data &&
+				    pdata->f30_data->buttonpad)
+					break;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int rmi_f30_probe(struct rmi_function *fn)
+{
+	int rc;
+	const struct rmi_device_platform_data *pdata =
+				rmi_get_platform_data(fn->rmi_dev);
+
+	if (pdata->f30_data && pdata->f30_data->disable)
+		return 0;
+
+	rc = rmi_f30_initialize(fn);
+	if (rc < 0)
+		goto error_exit;
+
+	rc = rmi_f30_register_device(fn);
+	if (rc < 0)
+		goto error_exit;
+
+	return 0;
+
+error_exit:
+	return rc;
+
+}
+
+struct rmi_function_handler rmi_f30_handler = {
+	.driver = {
+		.name = "rmi4_f30",
+	},
+	.func = 0x30,
+	.probe = rmi_f30_probe,
+	.config = rmi_f30_config,
+	.attention = rmi_f30_attention,
+};
diff --git a/include/linux/rmi.h b/include/linux/rmi.h
index b527064bac47..ac89d1e731dc 100644
--- a/include/linux/rmi.h
+++ b/include/linux/rmi.h
@@ -101,6 +101,21 @@ struct rmi_2d_sensor_platform_data {
 	int dmax;
 };
 
+/**
+ * struct rmi_f30_data - overrides defaults for a single F30 GPIOs/LED chip.
+ * @buttonpad - the touchpad is a buttonpad, so enable only the first actual
+ * button that is found.
+ * @trackstick_buttons - Set when the function 30 is handling the physical
+ * buttons of the trackstick (as a PD/2 passthrough device.
+ * @disable - the touchpad incorrectly reports F30 and it should be ignored.
+ * This is a special case which is due to misconfigured firmware.
+ */
+struct rmi_f30_data {
+	bool buttonpad;
+	bool trackstick_buttons;
+	bool disable;
+};
+
 /**
  * struct rmi_f01_power - override default power management settings.
  *
@@ -147,6 +162,7 @@ struct rmi_device_platform_data {
 	/* function handler pdata */
 	struct rmi_2d_sensor_platform_data *sensor_pdata;
 	struct rmi_f01_power_management power_management;
+	struct rmi_f30_data *f30_data;
 };
 
 /**
-- 
cgit v1.2.3


From 8d99758dee31ff4a72bfc35d3a7a51fe66b7bb91 Mon Sep 17 00:00:00 2001
From: Andrew Duggan <aduggan@synaptics.com>
Date: Thu, 10 Mar 2016 15:58:12 -0800
Subject: Input: synaptics-rmi4 - add SPI transport driver

Add the transport driver for devices using RMI4 over SPI.

Signed-off-by: Andrew Duggan <aduggan@synaptics.com>
Tested-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Tested-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/Kconfig   |   9 +
 drivers/input/rmi4/Makefile  |   1 +
 drivers/input/rmi4/rmi_spi.c | 547 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/rmi.h          |  51 ++++
 4 files changed, 608 insertions(+)
 create mode 100644 drivers/input/rmi4/rmi_spi.c

(limited to 'include/linux')

diff --git a/drivers/input/rmi4/Kconfig b/drivers/input/rmi4/Kconfig
index 284faec30a7a..f73df2495fed 100644
--- a/drivers/input/rmi4/Kconfig
+++ b/drivers/input/rmi4/Kconfig
@@ -18,6 +18,15 @@ config RMI4_I2C
 
 	  If unsure, say Y.
 
+config RMI4_SPI
+	tristate "RMI4 SPI Support"
+	depends on RMI4_CORE && SPI
+	help
+	  Say Y here if you want to support RMI4 devices connected to a SPI
+	  bus.
+
+	  If unsure, say N.
+
 config RMI4_2D_SENSOR
 	bool
 	depends on RMI4_CORE
diff --git a/drivers/input/rmi4/Makefile b/drivers/input/rmi4/Makefile
index ad7156d8252c..95c00a783992 100644
--- a/drivers/input/rmi4/Makefile
+++ b/drivers/input/rmi4/Makefile
@@ -10,3 +10,4 @@ rmi_core-$(CONFIG_RMI4_F30) += rmi_f30.o
 
 # Transports
 obj-$(CONFIG_RMI4_I2C) += rmi_i2c.o
+obj-$(CONFIG_RMI4_SPI) += rmi_spi.o
diff --git a/drivers/input/rmi4/rmi_spi.c b/drivers/input/rmi4/rmi_spi.c
new file mode 100644
index 000000000000..4319c634553f
--- /dev/null
+++ b/drivers/input/rmi4/rmi_spi.c
@@ -0,0 +1,547 @@
+/*
+ * Copyright (c) 2011-2016 Synaptics Incorporated
+ * Copyright (c) 2011 Unixphere
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/rmi.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/irq.h>
+#include "rmi_driver.h"
+
+#define RMI_SPI_DEFAULT_XFER_BUF_SIZE	64
+
+#define RMI_PAGE_SELECT_REGISTER	0x00FF
+#define RMI_SPI_PAGE(addr)		(((addr) >> 8) & 0x80)
+#define RMI_SPI_XFER_SIZE_LIMIT		255
+
+#define BUFFER_SIZE_INCREMENT 32
+
+enum rmi_spi_op {
+	RMI_SPI_WRITE = 0,
+	RMI_SPI_READ,
+	RMI_SPI_V2_READ_UNIFIED,
+	RMI_SPI_V2_READ_SPLIT,
+	RMI_SPI_V2_WRITE,
+};
+
+struct rmi_spi_cmd {
+	enum rmi_spi_op op;
+	u16 addr;
+};
+
+struct rmi_spi_xport {
+	struct rmi_transport_dev xport;
+	struct spi_device *spi;
+
+	struct mutex page_mutex;
+	int page;
+
+	int irq;
+
+	u8 *rx_buf;
+	u8 *tx_buf;
+	int xfer_buf_size;
+
+	struct spi_transfer *rx_xfers;
+	struct spi_transfer *tx_xfers;
+	int rx_xfer_count;
+	int tx_xfer_count;
+};
+
+static int rmi_spi_manage_pools(struct rmi_spi_xport *rmi_spi, int len)
+{
+	struct spi_device *spi = rmi_spi->spi;
+	int buf_size = rmi_spi->xfer_buf_size
+		? rmi_spi->xfer_buf_size : RMI_SPI_DEFAULT_XFER_BUF_SIZE;
+	struct spi_transfer *xfer_buf;
+	void *buf;
+	void *tmp;
+
+	while (buf_size < len)
+		buf_size *= 2;
+
+	if (buf_size > RMI_SPI_XFER_SIZE_LIMIT)
+		buf_size = RMI_SPI_XFER_SIZE_LIMIT;
+
+	tmp = rmi_spi->rx_buf;
+	buf = devm_kzalloc(&spi->dev, buf_size * 2,
+				GFP_KERNEL | GFP_DMA);
+	if (!buf)
+		return -ENOMEM;
+
+	rmi_spi->rx_buf = buf;
+	rmi_spi->tx_buf = &rmi_spi->rx_buf[buf_size];
+	rmi_spi->xfer_buf_size = buf_size;
+
+	if (tmp)
+		devm_kfree(&spi->dev, tmp);
+
+	if (rmi_spi->xport.pdata.spi_data.read_delay_us)
+		rmi_spi->rx_xfer_count = buf_size;
+	else
+		rmi_spi->rx_xfer_count = 1;
+
+	if (rmi_spi->xport.pdata.spi_data.write_delay_us)
+		rmi_spi->tx_xfer_count = buf_size;
+	else
+		rmi_spi->tx_xfer_count = 1;
+
+	/*
+	 * Allocate a pool of spi_transfer buffers for devices which need
+	 * per byte delays.
+	 */
+	tmp = rmi_spi->rx_xfers;
+	xfer_buf = devm_kzalloc(&spi->dev,
+		(rmi_spi->rx_xfer_count + rmi_spi->tx_xfer_count)
+		* sizeof(struct spi_transfer), GFP_KERNEL);
+	if (!xfer_buf)
+		return -ENOMEM;
+
+	rmi_spi->rx_xfers = xfer_buf;
+	rmi_spi->tx_xfers = &xfer_buf[rmi_spi->rx_xfer_count];
+
+	if (tmp)
+		devm_kfree(&spi->dev, tmp);
+
+	return 0;
+}
+
+static int rmi_spi_xfer(struct rmi_spi_xport *rmi_spi,
+			const struct rmi_spi_cmd *cmd, const u8 *tx_buf,
+			int tx_len, u8 *rx_buf, int rx_len)
+{
+	struct spi_device *spi = rmi_spi->spi;
+	struct rmi_device_platform_data_spi *spi_data =
+					&rmi_spi->xport.pdata.spi_data;
+	struct spi_message msg;
+	struct spi_transfer *xfer;
+	int ret = 0;
+	int len;
+	int cmd_len = 0;
+	int total_tx_len;
+	int i;
+	u16 addr = cmd->addr;
+
+	spi_message_init(&msg);
+
+	switch (cmd->op) {
+	case RMI_SPI_WRITE:
+	case RMI_SPI_READ:
+		cmd_len += 2;
+		break;
+	case RMI_SPI_V2_READ_UNIFIED:
+	case RMI_SPI_V2_READ_SPLIT:
+	case RMI_SPI_V2_WRITE:
+		cmd_len += 4;
+		break;
+	}
+
+	total_tx_len = cmd_len + tx_len;
+	len = max(total_tx_len, rx_len);
+
+	if (len > RMI_SPI_XFER_SIZE_LIMIT)
+		return -EINVAL;
+
+	if (rmi_spi->xfer_buf_size < len)
+		rmi_spi_manage_pools(rmi_spi, len);
+
+	if (addr == 0)
+		/*
+		 * SPI needs an address. Use 0x7FF if we want to keep
+		 * reading from the last position of the register pointer.
+		 */
+		addr = 0x7FF;
+
+	switch (cmd->op) {
+	case RMI_SPI_WRITE:
+		rmi_spi->tx_buf[0] = (addr >> 8);
+		rmi_spi->tx_buf[1] = addr & 0xFF;
+		break;
+	case RMI_SPI_READ:
+		rmi_spi->tx_buf[0] = (addr >> 8) | 0x80;
+		rmi_spi->tx_buf[1] = addr & 0xFF;
+		break;
+	case RMI_SPI_V2_READ_UNIFIED:
+		break;
+	case RMI_SPI_V2_READ_SPLIT:
+		break;
+	case RMI_SPI_V2_WRITE:
+		rmi_spi->tx_buf[0] = 0x40;
+		rmi_spi->tx_buf[1] = (addr >> 8) & 0xFF;
+		rmi_spi->tx_buf[2] = addr & 0xFF;
+		rmi_spi->tx_buf[3] = tx_len;
+		break;
+	}
+
+	if (tx_buf)
+		memcpy(&rmi_spi->tx_buf[cmd_len], tx_buf, tx_len);
+
+	if (rmi_spi->tx_xfer_count > 1) {
+		for (i = 0; i < total_tx_len; i++) {
+			xfer = &rmi_spi->tx_xfers[i];
+			memset(xfer, 0,	sizeof(struct spi_transfer));
+			xfer->tx_buf = &rmi_spi->tx_buf[i];
+			xfer->len = 1;
+			xfer->delay_usecs = spi_data->write_delay_us;
+			spi_message_add_tail(xfer, &msg);
+		}
+	} else {
+		xfer = rmi_spi->tx_xfers;
+		memset(xfer, 0, sizeof(struct spi_transfer));
+		xfer->tx_buf = rmi_spi->tx_buf;
+		xfer->len = total_tx_len;
+		spi_message_add_tail(xfer, &msg);
+	}
+
+	rmi_dbg(RMI_DEBUG_XPORT, &spi->dev, "%s: cmd: %s tx_buf len: %d tx_buf: %*ph\n",
+		__func__, cmd->op == RMI_SPI_WRITE ? "WRITE" : "READ",
+		total_tx_len, total_tx_len, rmi_spi->tx_buf);
+
+	if (rx_buf) {
+		if (rmi_spi->rx_xfer_count > 1) {
+			for (i = 0; i < rx_len; i++) {
+				xfer = &rmi_spi->rx_xfers[i];
+				memset(xfer, 0, sizeof(struct spi_transfer));
+				xfer->rx_buf = &rmi_spi->rx_buf[i];
+				xfer->len = 1;
+				xfer->delay_usecs = spi_data->read_delay_us;
+				spi_message_add_tail(xfer, &msg);
+			}
+		} else {
+			xfer = rmi_spi->rx_xfers;
+			memset(xfer, 0, sizeof(struct spi_transfer));
+			xfer->rx_buf = rmi_spi->rx_buf;
+			xfer->len = rx_len;
+			spi_message_add_tail(xfer, &msg);
+		}
+	}
+
+	ret = spi_sync(spi, &msg);
+	if (ret < 0) {
+		dev_err(&spi->dev, "spi xfer failed: %d\n", ret);
+		return ret;
+	}
+
+	if (rx_buf) {
+		memcpy(rx_buf, rmi_spi->rx_buf, rx_len);
+		rmi_dbg(RMI_DEBUG_XPORT, &spi->dev, "%s: (%d) %*ph\n",
+			__func__, rx_len, rx_len, rx_buf);
+	}
+
+	return 0;
+}
+
+/*
+ * rmi_set_page - Set RMI page
+ * @xport: The pointer to the rmi_transport_dev struct
+ * @page: The new page address.
+ *
+ * RMI devices have 16-bit addressing, but some of the transport
+ * implementations (like SMBus) only have 8-bit addressing. So RMI implements
+ * a page address at 0xff of every page so we can reliable page addresses
+ * every 256 registers.
+ *
+ * The page_mutex lock must be held when this function is entered.
+ *
+ * Returns zero on success, non-zero on failure.
+ */
+static int rmi_set_page(struct rmi_spi_xport *rmi_spi, u8 page)
+{
+	struct rmi_spi_cmd cmd;
+	int ret;
+
+	cmd.op = RMI_SPI_WRITE;
+	cmd.addr = RMI_PAGE_SELECT_REGISTER;
+
+	ret = rmi_spi_xfer(rmi_spi, &cmd, &page, 1, NULL, 0);
+
+	if (ret)
+		rmi_spi->page = page;
+
+	return ret;
+}
+
+static int rmi_spi_write_block(struct rmi_transport_dev *xport, u16 addr,
+			       const void *buf, size_t len)
+{
+	struct rmi_spi_xport *rmi_spi =
+		container_of(xport, struct rmi_spi_xport, xport);
+	struct rmi_spi_cmd cmd;
+	int ret;
+
+	mutex_lock(&rmi_spi->page_mutex);
+
+	if (RMI_SPI_PAGE(addr) != rmi_spi->page) {
+		ret = rmi_set_page(rmi_spi, RMI_SPI_PAGE(addr));
+		if (ret)
+			goto exit;
+	}
+
+	cmd.op = RMI_SPI_WRITE;
+	cmd.addr = addr;
+
+	ret = rmi_spi_xfer(rmi_spi, &cmd, buf, len, NULL, 0);
+
+exit:
+	mutex_unlock(&rmi_spi->page_mutex);
+	return ret;
+}
+
+static int rmi_spi_read_block(struct rmi_transport_dev *xport, u16 addr,
+			      void *buf, size_t len)
+{
+	struct rmi_spi_xport *rmi_spi =
+		container_of(xport, struct rmi_spi_xport, xport);
+	struct rmi_spi_cmd cmd;
+	int ret;
+
+	mutex_lock(&rmi_spi->page_mutex);
+
+	if (RMI_SPI_PAGE(addr) != rmi_spi->page) {
+		ret = rmi_set_page(rmi_spi, RMI_SPI_PAGE(addr));
+		if (ret)
+			goto exit;
+	}
+
+	cmd.op = RMI_SPI_READ;
+	cmd.addr = addr;
+
+	ret = rmi_spi_xfer(rmi_spi, &cmd, NULL, 0, buf, len);
+
+exit:
+	mutex_unlock(&rmi_spi->page_mutex);
+	return ret;
+}
+
+static const struct rmi_transport_ops rmi_spi_ops = {
+	.write_block	= rmi_spi_write_block,
+	.read_block	= rmi_spi_read_block,
+};
+
+static irqreturn_t rmi_spi_irq(int irq, void *dev_id)
+{
+	struct rmi_spi_xport *rmi_spi = dev_id;
+	struct rmi_device *rmi_dev = rmi_spi->xport.rmi_dev;
+	int ret;
+
+	ret = rmi_process_interrupt_requests(rmi_dev);
+	if (ret)
+		rmi_dbg(RMI_DEBUG_XPORT, &rmi_dev->dev,
+			"Failed to process interrupt request: %d\n", ret);
+
+	return IRQ_HANDLED;
+}
+
+static int rmi_spi_init_irq(struct spi_device *spi)
+{
+	struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi);
+	int irq_flags = irqd_get_trigger_type(irq_get_irq_data(rmi_spi->irq));
+	int ret;
+
+	if (!irq_flags)
+		irq_flags = IRQF_TRIGGER_LOW;
+
+	ret = devm_request_threaded_irq(&spi->dev, rmi_spi->irq, NULL,
+			rmi_spi_irq, irq_flags | IRQF_ONESHOT,
+			dev_name(&spi->dev), rmi_spi);
+	if (ret < 0) {
+		dev_warn(&spi->dev, "Failed to register interrupt %d\n",
+			rmi_spi->irq);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int rmi_spi_probe(struct spi_device *spi)
+{
+	struct rmi_spi_xport *rmi_spi;
+	struct rmi_device_platform_data *pdata;
+	struct rmi_device_platform_data *spi_pdata = spi->dev.platform_data;
+	int retval;
+
+	if (spi->master->flags & SPI_MASTER_HALF_DUPLEX)
+		return -EINVAL;
+
+	rmi_spi = devm_kzalloc(&spi->dev, sizeof(struct rmi_spi_xport),
+			GFP_KERNEL);
+	if (!rmi_spi)
+		return -ENOMEM;
+
+	pdata = &rmi_spi->xport.pdata;
+
+	if (spi_pdata)
+		*pdata = *spi_pdata;
+
+	if (pdata->spi_data.bits_per_word)
+		spi->bits_per_word = pdata->spi_data.bits_per_word;
+
+	if (pdata->spi_data.mode)
+		spi->mode = pdata->spi_data.mode;
+
+	retval = spi_setup(spi);
+	if (retval < 0) {
+		dev_err(&spi->dev, "spi_setup failed!\n");
+		return retval;
+	}
+
+	if (spi->irq > 0)
+		rmi_spi->irq = spi->irq;
+
+	rmi_spi->spi = spi;
+	mutex_init(&rmi_spi->page_mutex);
+
+	rmi_spi->xport.dev = &spi->dev;
+	rmi_spi->xport.proto_name = "spi";
+	rmi_spi->xport.ops = &rmi_spi_ops;
+
+	spi_set_drvdata(spi, rmi_spi);
+
+	retval = rmi_spi_manage_pools(rmi_spi, RMI_SPI_DEFAULT_XFER_BUF_SIZE);
+	if (retval)
+		return retval;
+
+	/*
+	 * Setting the page to zero will (a) make sure the PSR is in a
+	 * known state, and (b) make sure we can talk to the device.
+	 */
+	retval = rmi_set_page(rmi_spi, 0);
+	if (retval) {
+		dev_err(&spi->dev, "Failed to set page select to 0.\n");
+		return retval;
+	}
+
+	retval = rmi_register_transport_device(&rmi_spi->xport);
+	if (retval) {
+		dev_err(&spi->dev, "failed to register transport.\n");
+		return retval;
+	}
+
+	retval = rmi_spi_init_irq(spi);
+	if (retval < 0)
+		return retval;
+
+	dev_info(&spi->dev, "registered RMI SPI driver\n");
+	return 0;
+}
+
+static int rmi_spi_remove(struct spi_device *spi)
+{
+	struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi);
+
+	rmi_unregister_transport_device(&rmi_spi->xport);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int rmi_spi_suspend(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi);
+	int ret;
+
+	ret = rmi_driver_suspend(rmi_spi->xport.rmi_dev);
+	if (ret)
+		dev_warn(dev, "Failed to resume device: %d\n", ret);
+
+	disable_irq(rmi_spi->irq);
+	if (device_may_wakeup(&spi->dev)) {
+		ret = enable_irq_wake(rmi_spi->irq);
+		if (!ret)
+			dev_warn(dev, "Failed to enable irq for wake: %d\n",
+				ret);
+	}
+	return ret;
+}
+
+static int rmi_spi_resume(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi);
+	int ret;
+
+	enable_irq(rmi_spi->irq);
+	if (device_may_wakeup(&spi->dev)) {
+		ret = disable_irq_wake(rmi_spi->irq);
+		if (!ret)
+			dev_warn(dev, "Failed to disable irq for wake: %d\n",
+				ret);
+	}
+
+	ret = rmi_driver_resume(rmi_spi->xport.rmi_dev);
+	if (ret)
+		dev_warn(dev, "Failed to resume device: %d\n", ret);
+
+	return ret;
+}
+#endif
+
+#ifdef CONFIG_PM
+static int rmi_spi_runtime_suspend(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi);
+	int ret;
+
+	ret = rmi_driver_suspend(rmi_spi->xport.rmi_dev);
+	if (ret)
+		dev_warn(dev, "Failed to resume device: %d\n", ret);
+
+	disable_irq(rmi_spi->irq);
+
+	return 0;
+}
+
+static int rmi_spi_runtime_resume(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi);
+	int ret;
+
+	enable_irq(rmi_spi->irq);
+
+	ret = rmi_driver_resume(rmi_spi->xport.rmi_dev);
+	if (ret)
+		dev_warn(dev, "Failed to resume device: %d\n", ret);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops rmi_spi_pm = {
+	SET_SYSTEM_SLEEP_PM_OPS(rmi_spi_suspend, rmi_spi_resume)
+	SET_RUNTIME_PM_OPS(rmi_spi_runtime_suspend, rmi_spi_runtime_resume,
+			   NULL)
+};
+
+static const struct spi_device_id rmi_id[] = {
+	{ "rmi4_spi", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, rmi_id);
+
+static struct spi_driver rmi_spi_driver = {
+	.driver = {
+		.name	= "rmi4_spi",
+		.pm	= &rmi_spi_pm,
+	},
+	.id_table	= rmi_id,
+	.probe		= rmi_spi_probe,
+	.remove		= rmi_spi_remove,
+};
+
+module_spi_driver(rmi_spi_driver);
+
+MODULE_AUTHOR("Christopher Heiny <cheiny@synaptics.com>");
+MODULE_AUTHOR("Andrew Duggan <aduggan@synaptics.com>");
+MODULE_DESCRIPTION("RMI SPI driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(RMI_DRIVER_VERSION);
diff --git a/include/linux/rmi.h b/include/linux/rmi.h
index ac89d1e731dc..e0aca1476001 100644
--- a/include/linux/rmi.h
+++ b/include/linux/rmi.h
@@ -149,6 +149,55 @@ struct rmi_f01_power_management {
 	u8 doze_interval;
 };
 
+/**
+ * struct rmi_device_platform_data_spi - provides parameters used in SPI
+ * communications.  All Synaptics SPI products support a standard SPI
+ * interface; some also support what is called SPI V2 mode, depending on
+ * firmware and/or ASIC limitations.  In V2 mode, the touch sensor can
+ * support shorter delays during certain operations, and these are specified
+ * separately from the standard mode delays.
+ *
+ * @block_delay - for standard SPI transactions consisting of both a read and
+ * write operation, the delay (in microseconds) between the read and write
+ * operations.
+ * @split_read_block_delay_us - for V2 SPI transactions consisting of both a
+ * read and write operation, the delay (in microseconds) between the read and
+ * write operations.
+ * @read_delay_us - the delay between each byte of a read operation in normal
+ * SPI mode.
+ * @write_delay_us - the delay between each byte of a write operation in normal
+ * SPI mode.
+ * @split_read_byte_delay_us - the delay between each byte of a read operation
+ * in V2 mode.
+ * @pre_delay_us - the delay before the start of a SPI transaction.  This is
+ * typically useful in conjunction with custom chip select assertions (see
+ * below).
+ * @post_delay_us - the delay after the completion of an SPI transaction.  This
+ * is typically useful in conjunction with custom chip select assertions (see
+ * below).
+ * @cs_assert - For systems where the SPI subsystem does not control the CS/SSB
+ * line, or where such control is broken, you can provide a custom routine to
+ * handle a GPIO as CS/SSB.  This routine will be called at the beginning and
+ * end of each SPI transaction.  The RMI SPI implementation will wait
+ * pre_delay_us after this routine returns before starting the SPI transfer;
+ * and post_delay_us after completion of the SPI transfer(s) before calling it
+ * with assert==FALSE.
+ */
+struct rmi_device_platform_data_spi {
+	u32 block_delay_us;
+	u32 split_read_block_delay_us;
+	u32 read_delay_us;
+	u32 write_delay_us;
+	u32 split_read_byte_delay_us;
+	u32 pre_delay_us;
+	u32 post_delay_us;
+	u8 bits_per_word;
+	u16 mode;
+
+	void *cs_assert_data;
+	int (*cs_assert)(const void *cs_assert_data, const bool assert);
+};
+
 /**
  * struct rmi_device_platform_data - system specific configuration info.
  *
@@ -159,6 +208,8 @@ struct rmi_f01_power_management {
 struct rmi_device_platform_data {
 	int reset_delay_ms;
 
+	struct rmi_device_platform_data_spi spi_data;
+
 	/* function handler pdata */
 	struct rmi_2d_sensor_platform_data *sensor_pdata;
 	struct rmi_f01_power_management power_management;
-- 
cgit v1.2.3


From c76d9ae415f1a80489a76ea41620c3f17383b7fb Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Thu, 10 Mar 2016 15:01:13 -0300
Subject: spi: Fix htmldocs build error due struct spi_replaced_transfers

The kernel-doc has to be just before the structure definition but the one
for struct spi_replaced_transfers was before a structure declaration and
that confuses kernel-doc which complains with the following build error:

.//include/linux/spi/spi.h:933: error: Cannot parse struct or union!

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 3c02b4d06268..1fc33db4347f 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -898,6 +898,10 @@ spi_max_transfer_size(struct spi_device *spi)
 
 /* SPI transfer replacement methods which make use of spi_res */
 
+struct spi_replaced_transfers;
+typedef void (*spi_replaced_release_t)(struct spi_master *master,
+				       struct spi_message *msg,
+				       struct spi_replaced_transfers *res);
 /**
  * struct spi_replaced_transfers - structure describing the spi_transfer
  *                                 replacements that have occurred
@@ -917,10 +921,6 @@ spi_max_transfer_size(struct spi_device *spi)
  * if some extra allocation is requested, so alignment will be the same
  * as for spi_transfers
  */
-struct spi_replaced_transfers;
-typedef void (*spi_replaced_release_t)(struct spi_master *master,
-				       struct spi_message *msg,
-				       struct spi_replaced_transfers *res);
 struct spi_replaced_transfers {
 	spi_replaced_release_t release;
 	void *extradata;
-- 
cgit v1.2.3


From c7019c4d739e79d7baaa13c86dcaaedec8113d70 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Tue, 1 Mar 2016 13:49:15 -0600
Subject: crypto: ccp - CCP versioning support

Future hardware may introduce new algorithms wherein the
driver will need to manage resources for different versions
of the cryptographic coprocessor. This precursor patch
determines the version of the available device, and marks
and registers algorithms accordingly. A structure is added
which manages the version-specific data.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-crypto-aes.c | 12 +++++++++++-
 drivers/crypto/ccp/ccp-crypto-sha.c |  9 ++++++++-
 drivers/crypto/ccp/ccp-dev.c        | 27 +++++++++++++++++++++++++
 drivers/crypto/ccp/ccp-dev.h        |  8 ++++++++
 drivers/crypto/ccp/ccp-pci.c        |  8 +++++++-
 drivers/crypto/ccp/ccp-platform.c   | 39 +++++++++++++++++++++++++++++++++++--
 include/linux/ccp.h                 | 17 ++++++++++++++++
 7 files changed, 115 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c
index 7984f910884d..89291c15015c 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) AES crypto API support
  *
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  *
@@ -259,6 +259,7 @@ static struct crypto_alg ccp_aes_rfc3686_defaults = {
 
 struct ccp_aes_def {
 	enum ccp_aes_mode mode;
+	unsigned int version;
 	const char *name;
 	const char *driver_name;
 	unsigned int blocksize;
@@ -269,6 +270,7 @@ struct ccp_aes_def {
 static struct ccp_aes_def aes_algs[] = {
 	{
 		.mode		= CCP_AES_MODE_ECB,
+		.version	= CCP_VERSION(3, 0),
 		.name		= "ecb(aes)",
 		.driver_name	= "ecb-aes-ccp",
 		.blocksize	= AES_BLOCK_SIZE,
@@ -277,6 +279,7 @@ static struct ccp_aes_def aes_algs[] = {
 	},
 	{
 		.mode		= CCP_AES_MODE_CBC,
+		.version	= CCP_VERSION(3, 0),
 		.name		= "cbc(aes)",
 		.driver_name	= "cbc-aes-ccp",
 		.blocksize	= AES_BLOCK_SIZE,
@@ -285,6 +288,7 @@ static struct ccp_aes_def aes_algs[] = {
 	},
 	{
 		.mode		= CCP_AES_MODE_CFB,
+		.version	= CCP_VERSION(3, 0),
 		.name		= "cfb(aes)",
 		.driver_name	= "cfb-aes-ccp",
 		.blocksize	= AES_BLOCK_SIZE,
@@ -293,6 +297,7 @@ static struct ccp_aes_def aes_algs[] = {
 	},
 	{
 		.mode		= CCP_AES_MODE_OFB,
+		.version	= CCP_VERSION(3, 0),
 		.name		= "ofb(aes)",
 		.driver_name	= "ofb-aes-ccp",
 		.blocksize	= 1,
@@ -301,6 +306,7 @@ static struct ccp_aes_def aes_algs[] = {
 	},
 	{
 		.mode		= CCP_AES_MODE_CTR,
+		.version	= CCP_VERSION(3, 0),
 		.name		= "ctr(aes)",
 		.driver_name	= "ctr-aes-ccp",
 		.blocksize	= 1,
@@ -309,6 +315,7 @@ static struct ccp_aes_def aes_algs[] = {
 	},
 	{
 		.mode		= CCP_AES_MODE_CTR,
+		.version	= CCP_VERSION(3, 0),
 		.name		= "rfc3686(ctr(aes))",
 		.driver_name	= "rfc3686-ctr-aes-ccp",
 		.blocksize	= 1,
@@ -357,8 +364,11 @@ static int ccp_register_aes_alg(struct list_head *head,
 int ccp_register_aes_algs(struct list_head *head)
 {
 	int i, ret;
+	unsigned int ccpversion = ccp_version();
 
 	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+		if (aes_algs[i].version > ccpversion)
+			continue;
 		ret = ccp_register_aes_alg(head, &aes_algs[i]);
 		if (ret)
 			return ret;
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 8ef06fad8b14..b5ad72897dc2 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) SHA crypto API support
  *
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  *
@@ -341,6 +341,7 @@ static void ccp_hmac_sha_cra_exit(struct crypto_tfm *tfm)
 }
 
 struct ccp_sha_def {
+	unsigned int version;
 	const char *name;
 	const char *drv_name;
 	enum ccp_sha_type type;
@@ -350,6 +351,7 @@ struct ccp_sha_def {
 
 static struct ccp_sha_def sha_algs[] = {
 	{
+		.version	= CCP_VERSION(3, 0),
 		.name		= "sha1",
 		.drv_name	= "sha1-ccp",
 		.type		= CCP_SHA_TYPE_1,
@@ -357,6 +359,7 @@ static struct ccp_sha_def sha_algs[] = {
 		.block_size	= SHA1_BLOCK_SIZE,
 	},
 	{
+		.version	= CCP_VERSION(3, 0),
 		.name		= "sha224",
 		.drv_name	= "sha224-ccp",
 		.type		= CCP_SHA_TYPE_224,
@@ -364,6 +367,7 @@ static struct ccp_sha_def sha_algs[] = {
 		.block_size	= SHA224_BLOCK_SIZE,
 	},
 	{
+		.version	= CCP_VERSION(3, 0),
 		.name		= "sha256",
 		.drv_name	= "sha256-ccp",
 		.type		= CCP_SHA_TYPE_256,
@@ -480,8 +484,11 @@ static int ccp_register_sha_alg(struct list_head *head,
 int ccp_register_sha_algs(struct list_head *head)
 {
 	int i, ret;
+	unsigned int ccpversion = ccp_version();
 
 	for (i = 0; i < ARRAY_SIZE(sha_algs); i++) {
+		if (sha_algs[i].version > ccpversion)
+			continue;
 		ret = ccp_register_sha_alg(head, &sha_algs[i]);
 		if (ret)
 			return ret;
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index dd71e673a109..5348512da643 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -149,6 +149,29 @@ int ccp_present(void)
 }
 EXPORT_SYMBOL_GPL(ccp_present);
 
+/**
+ * ccp_version - get the version of the CCP device
+ *
+ * Returns the version from the first unit on the list;
+ * otherwise a zero if no CCP device is present
+ */
+unsigned int ccp_version(void)
+{
+	struct ccp_device *dp;
+	unsigned long flags;
+	int ret = 0;
+
+	read_lock_irqsave(&ccp_unit_lock, flags);
+	if (!list_empty(&ccp_units)) {
+		dp = list_first_entry(&ccp_units, struct ccp_device, entry);
+		ret = dp->vdata->version;
+	}
+	read_unlock_irqrestore(&ccp_unit_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ccp_version);
+
 /**
  * ccp_enqueue_cmd - queue an operation for processing by the CCP
  *
@@ -664,6 +687,10 @@ bool ccp_queues_suspended(struct ccp_device *ccp)
 }
 #endif
 
+struct ccp_vdata ccpv3 = {
+	.version = CCP_VERSION(3, 0),
+};
+
 static int __init ccp_mod_init(void)
 {
 #ifdef CONFIG_X86
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 974dc055e0ab..90a8cc8c7d46 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -141,6 +141,13 @@
 #define CCP_ECC_RESULT_OFFSET		60
 #define CCP_ECC_RESULT_SUCCESS		0x0001
 
+/* Structure to hold CCP version-specific values */
+struct ccp_vdata {
+	unsigned int version;
+};
+
+extern struct ccp_vdata ccpv3;
+
 struct ccp_device;
 struct ccp_cmd;
 
@@ -187,6 +194,7 @@ struct ccp_cmd_queue {
 struct ccp_device {
 	struct list_head entry;
 
+	struct ccp_vdata *vdata;
 	unsigned int ord;
 	char name[MAX_CCP_NAME_LEN];
 	char rngname[MAX_CCP_NAME_LEN];
diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c
index 668e5154beb3..d1a36af44012 100644
--- a/drivers/crypto/ccp/ccp-pci.c
+++ b/drivers/crypto/ccp/ccp-pci.c
@@ -180,6 +180,12 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto e_err;
 
 	ccp->dev_specific = ccp_pci;
+	ccp->vdata = (struct ccp_vdata *)id->driver_data;
+	if (!ccp->vdata || !ccp->vdata->version) {
+		ret = -ENODEV;
+		dev_err(dev, "missing driver data\n");
+		goto e_err;
+	}
 	ccp->get_irq = ccp_get_irqs;
 	ccp->free_irq = ccp_free_irqs;
 
@@ -313,7 +319,7 @@ static int ccp_pci_resume(struct pci_dev *pdev)
 #endif
 
 static const struct pci_device_id ccp_pci_table[] = {
-	{ PCI_VDEVICE(AMD, 0x1537), },
+	{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&ccpv3 },
 	/* Last entry must be zero */
 	{ 0, }
 };
diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c
index 4331318b57b2..6e1cf228c7c0 100644
--- a/drivers/crypto/ccp/ccp-platform.c
+++ b/drivers/crypto/ccp/ccp-platform.c
@@ -32,6 +32,33 @@ struct ccp_platform {
 	int coherent;
 };
 
+static const struct acpi_device_id ccp_acpi_match[];
+static const struct of_device_id ccp_of_match[];
+
+static struct ccp_vdata *ccp_get_of_version(struct platform_device *pdev)
+{
+#ifdef CONFIG_OF
+	const struct of_device_id *match;
+
+	match = of_match_node(ccp_of_match, pdev->dev.of_node);
+	if (match && match->data)
+		return (struct ccp_vdata *)match->data;
+#endif
+	return 0;
+}
+
+static struct ccp_vdata *ccp_get_acpi_version(struct platform_device *pdev)
+{
+#ifdef CONFIG_ACPI
+	const struct acpi_device_id *match;
+
+	match = acpi_match_device(ccp_acpi_match, &pdev->dev);
+	if (match && match->driver_data)
+		return (struct ccp_vdata *)match->driver_data;
+#endif
+	return 0;
+}
+
 static int ccp_get_irq(struct ccp_device *ccp)
 {
 	struct device *dev = ccp->dev;
@@ -106,6 +133,13 @@ static int ccp_platform_probe(struct platform_device *pdev)
 		goto e_err;
 
 	ccp->dev_specific = ccp_platform;
+	ccp->vdata = pdev->dev.of_node ? ccp_get_of_version(pdev)
+					 : ccp_get_acpi_version(pdev);
+	if (!ccp->vdata || !ccp->vdata->version) {
+		ret = -ENODEV;
+		dev_err(dev, "missing driver data\n");
+		goto e_err;
+	}
 	ccp->get_irq = ccp_get_irqs;
 	ccp->free_irq = ccp_free_irqs;
 
@@ -214,7 +248,7 @@ static int ccp_platform_resume(struct platform_device *pdev)
 
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id ccp_acpi_match[] = {
-	{ "AMDI0C00", 0 },
+	{ "AMDI0C00", (kernel_ulong_t)&ccpv3 },
 	{ },
 };
 MODULE_DEVICE_TABLE(acpi, ccp_acpi_match);
@@ -222,7 +256,8 @@ MODULE_DEVICE_TABLE(acpi, ccp_acpi_match);
 
 #ifdef CONFIG_OF
 static const struct of_device_id ccp_of_match[] = {
-	{ .compatible = "amd,ccp-seattle-v1a" },
+	{ .compatible = "amd,ccp-seattle-v1a",
+	  .data = (const void *)&ccpv3 },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, ccp_of_match);
diff --git a/include/linux/ccp.h b/include/linux/ccp.h
index 7f437036baa4..915af3095b39 100644
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -33,6 +33,18 @@ struct ccp_cmd;
  */
 int ccp_present(void);
 
+#define	CCP_VSIZE 16
+#define	CCP_VMASK		((unsigned int)((1 << CCP_VSIZE) - 1))
+#define	CCP_VERSION(v, r)	((unsigned int)((v << CCP_VSIZE) \
+					       | (r & CCP_VMASK)))
+
+/**
+ * ccp_version - get the version of the CCP
+ *
+ * Returns a positive version number, or zero if no CCP
+ */
+unsigned int ccp_version(void);
+
 /**
  * ccp_enqueue_cmd - queue an operation for processing by the CCP
  *
@@ -65,6 +77,11 @@ static inline int ccp_present(void)
 	return -ENODEV;
 }
 
+static inline unsigned int ccp_version(void)
+{
+	return 0;
+}
+
 static inline int ccp_enqueue_cmd(struct ccp_cmd *cmd)
 {
 	return -ENODEV;
-- 
cgit v1.2.3


From 4c656c13b254d598e83e586b7b4d36a2043dad85 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemming@brocade.com>
Date: Tue, 8 Mar 2016 12:59:35 -0800
Subject: bridge: allow zero ageing time

This fixes a regression in the bridge ageing time caused by:
commit c62987bbd8a1 ("bridge: push bridge setting ageing_time down to switchdev")

There are users of Linux bridge which use the feature that if ageing time
is set to 0 it causes entries to never expire. See:
  https://www.linuxfoundation.org/collaborate/workgroups/networking/bridge

For a pure software bridge, it is unnecessary for the code to have
arbitrary restrictions on what values are allowable.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h |  4 ----
 net/bridge/br_stp.c       | 11 ++++++++---
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index a338a688ee4a..dcb89e3515db 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -46,10 +46,6 @@ struct br_ip_list {
 #define BR_LEARNING_SYNC	BIT(9)
 #define BR_PROXYARP_WIFI	BIT(10)
 
-/* values as per ieee8021QBridgeFdbAgingTime */
-#define BR_MIN_AGEING_TIME	(10 * HZ)
-#define BR_MAX_AGEING_TIME	(1000000 * HZ)
-
 #define BR_DEFAULT_AGEING_TIME	(300 * HZ)
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index c22816a0b1b1..e23449094188 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -562,6 +562,14 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
 
 }
 
+/* Set time interval that dynamic forwarding entries live
+ * For pure software bridge, allow values outside the 802.1
+ * standard specification for special cases:
+ *  0 - entry never ages (all permanant)
+ *  1 - entry disappears (no persistance)
+ *
+ * Offloaded switch entries maybe more restrictive
+ */
 int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
 {
 	struct switchdev_attr attr = {
@@ -573,9 +581,6 @@ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
 	unsigned long t = clock_t_to_jiffies(ageing_time);
 	int err;
 
-	if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME)
-		return -ERANGE;
-
 	err = switchdev_port_attr_set(br->dev, &attr);
 	if (err)
 		return err;
-- 
cgit v1.2.3


From d9c8bea179a6906a74ea42a2a162c4d1c6d9a16b Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 2 Mar 2016 21:46:50 -0600
Subject: PCI: Remove unused IORESOURCE_ROM_COPY and IORESOURCE_ROM_BIOS_COPY

The IORESOURCE_ROM_COPY and IORESOURCE_ROM_BIOS_COPY bits are unused.
Remove them and code that depends on them.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/remove.c   |  1 -
 drivers/pci/rom.c      | 31 +------------------------------
 include/linux/ioport.h |  2 --
 3 files changed, 1 insertion(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 8a280e9c2ad1..6b66329a6bff 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -7,7 +7,6 @@ static void pci_free_resources(struct pci_dev *dev)
 {
 	int i;
 
-	pci_cleanup_rom(dev);
 	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
 		struct resource *res = dev->resource + i;
 		if (res->parent)
diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index 2a07f344616e..06663d391b39 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c
@@ -128,12 +128,6 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size)
 	loff_t start;
 	void __iomem *rom;
 
-	if (res->flags & (IORESOURCE_ROM_COPY | IORESOURCE_ROM_BIOS_COPY)) {
-		*size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
-		return (void __iomem *)(unsigned long)
-			pci_resource_start(pdev, PCI_ROM_RESOURCE);
-	}
-
 	/* assign the ROM an address if it doesn't have one */
 	if (res->parent == NULL && pci_assign_resource(pdev, PCI_ROM_RESOURCE))
 		return NULL;
@@ -150,8 +144,7 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size)
 	rom = ioremap(start, *size);
 	if (!rom) {
 		/* restore enable if ioremap fails */
-		if (!(res->flags & (IORESOURCE_ROM_ENABLE |
-				    IORESOURCE_ROM_COPY)))
+		if (!(res->flags & IORESOURCE_ROM_ENABLE))
 			pci_disable_rom(pdev);
 		return NULL;
 	}
@@ -177,9 +170,6 @@ void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom)
 {
 	struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
 
-	if (res->flags & (IORESOURCE_ROM_COPY | IORESOURCE_ROM_BIOS_COPY))
-		return;
-
 	iounmap(rom);
 
 	/* Disable again before continuing */
@@ -188,25 +178,6 @@ void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom)
 }
 EXPORT_SYMBOL(pci_unmap_rom);
 
-/**
- * pci_cleanup_rom - free the ROM copy created by pci_map_rom_copy
- * @pdev: pointer to pci device struct
- *
- * Free the copied ROM if we allocated one.
- */
-void pci_cleanup_rom(struct pci_dev *pdev)
-{
-	struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
-
-	if (res->flags & IORESOURCE_ROM_COPY) {
-		kfree((void *)(unsigned long)res->start);
-		res->flags |= IORESOURCE_UNSET;
-		res->flags &= ~IORESOURCE_ROM_COPY;
-		res->start = 0;
-		res->end = 0;
-	}
-}
-
 /**
  * pci_platform_rom - provides a pointer to any ROM image provided by the
  * platform
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 2cf16673f17a..29a6deb9f054 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -99,8 +99,6 @@ struct resource {
 /* PCI ROM control bits (IORESOURCE_BITS) */
 #define IORESOURCE_ROM_ENABLE		(1<<0)	/* ROM is enabled, same as PCI_ROM_ADDRESS_ENABLE */
 #define IORESOURCE_ROM_SHADOW		(1<<1)	/* Use RAM image, not ROM BAR */
-#define IORESOURCE_ROM_COPY		(1<<2)	/* ROM is alloc'd copy, resource field overlaid */
-#define IORESOURCE_ROM_BIOS_COPY	(1<<3)	/* ROM is BIOS copy, resource field overlaid */
 
 /* PCI control bits.  Shares IORESOURCE_BITS with above PCI ROM.  */
 #define IORESOURCE_PCI_FIXED		(1<<4)	/* Do not move resource */
-- 
cgit v1.2.3


From 470c3822d2ab7fadcbb1ac317ef27b31caac370e Mon Sep 17 00:00:00 2001
From: LABBE Corentin <clabbe.montjoie@gmail.com>
Date: Thu, 10 Mar 2016 13:58:58 +0100
Subject: phy: remove documentation of removed members of phy_device structure

Commit e5a03bfd873c ("phy: Add an mdio_device structure") removed addr,
bus and dev member of the phy_device structure.
This patch remove the documentation about those members.

Signed-off-by: LABBE Corentin <clabbe.montjoie@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index d6f3641e7933..2abd7918f64f 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -327,8 +327,6 @@ struct phy_c45_device_ids {
 /* phy_device: An instance of a PHY
  *
  * drv: Pointer to the driver for this PHY instance
- * bus: Pointer to the bus this PHY is on
- * dev: driver model device structure for this PHY
  * phy_id: UID for this device found during discovery
  * c45_ids: 802.3-c45 Device Identifers if is_c45.
  * is_c45:  Set to true if this phy uses clause 45 addressing.
@@ -338,7 +336,6 @@ struct phy_c45_device_ids {
  * suspended: Set to true if this phy has been suspended successfully.
  * state: state of the PHY for management purposes
  * dev_flags: Device-specific flags used by the PHY driver.
- * addr: Bus address of PHY
  * link_timeout: The number of timer firings to wait before the
  * giving up on the current attempt at acquiring a link
  * irq: IRQ number of the PHY's interrupt (-1 if none)
-- 
cgit v1.2.3


From 3c17578473b9be5a6e7680a45ea97e1d56e13249 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 11 Mar 2016 18:07:32 +0100
Subject: net: add MACsec netdevice priv_flags and helper

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 41df0b450757..be693b34662f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1328,6 +1328,7 @@ struct net_device_ops {
  * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured
  * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external
  *	entity (i.e. the master device for bridged veth)
+ * @IFF_MACSEC: device is a MACsec device
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1357,6 +1358,7 @@ enum netdev_priv_flags {
 	IFF_TEAM			= 1<<24,
 	IFF_RXFH_CONFIGURED		= 1<<25,
 	IFF_PHONY_HEADROOM		= 1<<26,
+	IFF_MACSEC			= 1<<27,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1385,6 +1387,7 @@ enum netdev_priv_flags {
 #define IFF_L3MDEV_SLAVE		IFF_L3MDEV_SLAVE
 #define IFF_TEAM			IFF_TEAM
 #define IFF_RXFH_CONFIGURED		IFF_RXFH_CONFIGURED
+#define IFF_MACSEC			IFF_MACSEC
 
 /**
  *	struct net_device - The DEVICE structure.
@@ -4045,6 +4048,11 @@ static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol,
 	skb->mac_len = mac_len;
 }
 
+static inline bool netif_is_macsec(const struct net_device *dev)
+{
+	return dev->priv_flags & IFF_MACSEC;
+}
+
 static inline bool netif_is_macvlan(const struct net_device *dev)
 {
 	return dev->priv_flags & IFF_MACVLAN;
-- 
cgit v1.2.3


From 949a852e46dda07caaa0ff02e181f55d24e3ebf8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Mar 2016 14:20:52 -0500
Subject: namei: teach lookup_slow() to skip revalidate

... and make mountpoint_last() use it.  That makes all
candidates for lookup with parent locked shared go
through lookup_slow().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c            | 58 +++++++++++++++++++++++++++++++--------------------
 include/linux/namei.h |  1 +
 2 files changed, 36 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index cb70a817d439..dbb8ec1a2006 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1605,7 +1605,29 @@ static struct dentry *lookup_slow(const struct qstr *name,
 {
 	struct dentry *dentry;
 	inode_lock(dir->d_inode);
-	dentry = __lookup_hash(name, dir, flags);
+	dentry = d_lookup(dir, name);
+	if (unlikely(dentry)) {
+		if ((dentry->d_flags & DCACHE_OP_REVALIDATE) &&
+		    !(flags & LOOKUP_NO_REVAL)) {
+			int error = d_revalidate(dentry, flags);
+			if (unlikely(error <= 0)) {
+				if (!error)
+					d_invalidate(dentry);
+				dput(dentry);
+				dentry = ERR_PTR(error);
+			}
+		}
+		if (dentry) {
+			inode_unlock(dir->d_inode);
+			return dentry;
+		}
+	}
+	dentry = d_alloc(dir, name);
+	if (unlikely(!dentry)) {
+		inode_unlock(dir->d_inode);
+		return ERR_PTR(-ENOMEM);
+	}
+	dentry = lookup_real(dir->d_inode, dentry, flags);
 	inode_unlock(dir->d_inode);
 	return dentry;
 }
@@ -2425,31 +2447,21 @@ mountpoint_last(struct nameidata *nd, struct path *path)
 		if (error)
 			return error;
 		dentry = dget(nd->path.dentry);
-		goto done;
-	}
-
-	inode_lock(dir->d_inode);
-	dentry = d_lookup(dir, &nd->last);
-	if (!dentry) {
-		/*
-		 * No cached dentry. Mounted dentries are pinned in the cache,
-		 * so that means that this dentry is probably a symlink or the
-		 * path doesn't actually point to a mounted dentry.
-		 */
-		dentry = d_alloc(dir, &nd->last);
+	} else {
+		dentry = d_lookup(dir, &nd->last);
 		if (!dentry) {
-			inode_unlock(dir->d_inode);
-			return -ENOMEM;
-		}
-		dentry = lookup_real(dir->d_inode, dentry, nd->flags);
-		if (IS_ERR(dentry)) {
-			inode_unlock(dir->d_inode);
-			return PTR_ERR(dentry);
+			/*
+			 * No cached dentry. Mounted dentries are pinned in the
+			 * cache, so that means that this dentry is probably
+			 * a symlink or the path doesn't actually point
+			 * to a mounted dentry.
+			 */
+			dentry = lookup_slow(&nd->last, dir,
+					     nd->flags | LOOKUP_NO_REVAL);
+			if (IS_ERR(dentry))
+				return PTR_ERR(dentry);
 		}
 	}
-	inode_unlock(dir->d_inode);
-
-done:
 	if (d_is_negative(dentry)) {
 		dput(dentry);
 		return -ENOENT;
diff --git a/include/linux/namei.h b/include/linux/namei.h
index d0f25d81b46a..77d01700daf7 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -31,6 +31,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_PARENT		0x0010
 #define LOOKUP_REVAL		0x0020
 #define LOOKUP_RCU		0x0040
+#define LOOKUP_NO_REVAL		0x0080
 
 /*
  * Intent data
-- 
cgit v1.2.3


From 9d95afd5971918b1aa8db1960ba24532c2d6ec89 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 1 Mar 2016 15:35:06 -0500
Subject: kill dentry_unhash()

the last user is gone

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c         | 25 -------------------------
 include/linux/fs.h |  5 -----
 2 files changed, 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index dbb8ec1a2006..794f81dce766 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3685,31 +3685,6 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
 	return sys_mkdirat(AT_FDCWD, pathname, mode);
 }
 
-/*
- * The dentry_unhash() helper will try to drop the dentry early: we
- * should have a usage count of 1 if we're the only user of this
- * dentry, and if that is true (possibly after pruning the dcache),
- * then we drop the dentry now.
- *
- * A low-level filesystem can, if it choses, legally
- * do a
- *
- *	if (!d_unhashed(dentry))
- *		return -EBUSY;
- *
- * if it cannot handle the case of removing a directory
- * that is still in use by something else..
- */
-void dentry_unhash(struct dentry *dentry)
-{
-	shrink_dcache_parent(dentry);
-	spin_lock(&dentry->d_lock);
-	if (dentry->d_lockref.count == 1)
-		__d_drop(dentry);
-	spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL(dentry_unhash);
-
 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	int error = may_delete(dir, dentry, 1);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ae681002100a..c577923cd400 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1539,11 +1539,6 @@ extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
 extern int vfs_whiteout(struct inode *, struct dentry *);
 
-/*
- * VFS dentry helper functions.
- */
-extern void dentry_unhash(struct dentry *dentry);
-
 /*
  * VFS file helper functions.
  */
-- 
cgit v1.2.3


From 668d0cd56ef7bc71be6dd8c081007221e09d9a86 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Mar 2016 12:44:17 -0500
Subject: replace d_add_unique() with saner primitive

new primitive: d_exact_alias(dentry, inode).  If there is an unhashed
dentry with the same name/parent and given inode, rehash, grab and
return it.  Otherwise, return NULL.  The only caller of d_add_unique()
switched to d_exact_alias() + d_splice_alias().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 125 ++++++++++++++++++++-----------------------------
 fs/nfs/nfs4proc.c      |  13 ++---
 include/linux/dcache.h |  18 +------
 3 files changed, 58 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 2398f9f94337..4d20bf5c609b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1781,81 +1781,6 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
 }
 EXPORT_SYMBOL(d_instantiate);
 
-/**
- * d_instantiate_unique - instantiate a non-aliased dentry
- * @entry: dentry to instantiate
- * @inode: inode to attach to this dentry
- *
- * Fill in inode information in the entry. On success, it returns NULL.
- * If an unhashed alias of "entry" already exists, then we return the
- * aliased dentry instead and drop one reference to inode.
- *
- * Note that in order to avoid conflicts with rename() etc, the caller
- * had better be holding the parent directory semaphore.
- *
- * This also assumes that the inode count has been incremented
- * (or otherwise set) by the caller to indicate that it is now
- * in use by the dcache.
- */
-static struct dentry *__d_instantiate_unique(struct dentry *entry,
-					     struct inode *inode)
-{
-	struct dentry *alias;
-	int len = entry->d_name.len;
-	const char *name = entry->d_name.name;
-	unsigned int hash = entry->d_name.hash;
-
-	if (!inode) {
-		__d_instantiate(entry, NULL);
-		return NULL;
-	}
-
-	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
-		/*
-		 * Don't need alias->d_lock here, because aliases with
-		 * d_parent == entry->d_parent are not subject to name or
-		 * parent changes, because the parent inode i_mutex is held.
-		 */
-		if (alias->d_name.hash != hash)
-			continue;
-		if (alias->d_parent != entry->d_parent)
-			continue;
-		if (alias->d_name.len != len)
-			continue;
-		if (dentry_cmp(alias, name, len))
-			continue;
-		__dget(alias);
-		return alias;
-	}
-
-	__d_instantiate(entry, inode);
-	return NULL;
-}
-
-struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
-{
-	struct dentry *result;
-
-	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
-
-	if (inode)
-		spin_lock(&inode->i_lock);
-	result = __d_instantiate_unique(entry, inode);
-	if (inode)
-		spin_unlock(&inode->i_lock);
-
-	if (!result) {
-		security_d_instantiate(entry, inode);
-		return NULL;
-	}
-
-	BUG_ON(!d_unhashed(result));
-	iput(inode);
-	return result;
-}
-
-EXPORT_SYMBOL(d_instantiate_unique);
-
 /**
  * d_instantiate_no_diralias - instantiate a non-aliased dentry
  * @entry: dentry to complete
@@ -2436,6 +2361,56 @@ void d_rehash(struct dentry * entry)
 }
 EXPORT_SYMBOL(d_rehash);
 
+/**
+ * d_exact_alias - find and hash an exact unhashed alias
+ * @entry: dentry to add
+ * @inode: The inode to go with this dentry
+ *
+ * If an unhashed dentry with the same name/parent and desired
+ * inode already exists, hash and return it.  Otherwise, return
+ * NULL.
+ *
+ * Parent directory should be locked.
+ */
+struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
+{
+	struct dentry *alias;
+	int len = entry->d_name.len;
+	const char *name = entry->d_name.name;
+	unsigned int hash = entry->d_name.hash;
+
+	spin_lock(&inode->i_lock);
+	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
+		/*
+		 * Don't need alias->d_lock here, because aliases with
+		 * d_parent == entry->d_parent are not subject to name or
+		 * parent changes, because the parent inode i_mutex is held.
+		 */
+		if (alias->d_name.hash != hash)
+			continue;
+		if (alias->d_parent != entry->d_parent)
+			continue;
+		if (alias->d_name.len != len)
+			continue;
+		if (dentry_cmp(alias, name, len))
+			continue;
+		spin_lock(&alias->d_lock);
+		if (!d_unhashed(alias)) {
+			spin_unlock(&alias->d_lock);
+			alias = NULL;
+		} else {
+			__dget_dlock(alias);
+			_d_rehash(alias);
+			spin_unlock(&alias->d_lock);
+		}
+		spin_unlock(&inode->i_lock);
+		return alias;
+	}
+	spin_unlock(&inode->i_lock);
+	return NULL;
+}
+EXPORT_SYMBOL(d_exact_alias);
+
 /**
  * dentry_update_name_case - update case insensitive dentry with a new name
  * @dentry: dentry to be updated
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4bfc33ad0563..400a70b3be7b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2461,14 +2461,15 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
 
 	dentry = opendata->dentry;
 	if (d_really_is_negative(dentry)) {
-		/* FIXME: Is this d_drop() ever needed? */
+		struct dentry *alias;
 		d_drop(dentry);
-		dentry = d_add_unique(dentry, igrab(state->inode));
-		if (dentry == NULL) {
-			dentry = opendata->dentry;
-		} else if (dentry != ctx->dentry) {
+		alias = d_exact_alias(dentry, state->inode);
+		if (!alias)
+			alias = d_splice_alias(igrab(state->inode), dentry);
+		/* d_splice_alias() can't fail here - it's a non-directory */
+		if (alias) {
 			dput(ctx->dentry);
-			ctx->dentry = dget(dentry);
+			ctx->dentry = dentry = alias;
 		}
 		nfs_set_verifier(dentry,
 				nfs_save_change_attribute(d_inode(opendata->dir)));
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index c4b5f4b3f8f8..bda4ec53886b 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -246,6 +246,7 @@ extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
 extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
+extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
 extern struct dentry *d_find_any_alias(struct inode *inode);
 extern struct dentry * d_obtain_alias(struct inode *);
 extern struct dentry * d_obtain_root(struct inode *);
@@ -288,23 +289,6 @@ static inline void d_add(struct dentry *entry, struct inode *inode)
 	d_rehash(entry);
 }
 
-/**
- * d_add_unique - add dentry to hash queues without aliasing
- * @entry: dentry to add
- * @inode: The inode to attach to this dentry
- *
- * This adds the entry to the hash queues and initializes @inode.
- * The entry was actually filled in earlier during d_alloc().
- */
-static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *inode)
-{
-	struct dentry *res;
-
-	res = d_instantiate_unique(entry, inode);
-	d_rehash(res != NULL ? res : entry);
-	return res;
-}
-
 extern void dentry_update_name_case(struct dentry *, struct qstr *);
 
 /* used for rename() and baskets */
-- 
cgit v1.2.3


From 34d0d19dc0929ccc326448737f05a8fae3d47b8a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Mar 2016 21:01:03 -0500
Subject: uninline d_add()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 16 ++++++++++++++++
 include/linux/dcache.h | 15 +--------------
 2 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 4d20bf5c609b..12280df07837 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2361,6 +2361,22 @@ void d_rehash(struct dentry * entry)
 }
 EXPORT_SYMBOL(d_rehash);
 
+/**
+ * d_add - add dentry to hash queues
+ * @entry: dentry to add
+ * @inode: The inode to attach to this dentry
+ *
+ * This adds the entry to the hash queues and initializes @inode.
+ * The entry was actually filled in earlier during d_alloc().
+ */
+
+void d_add(struct dentry *entry, struct inode *inode)
+{
+	d_instantiate(entry, inode);
+	d_rehash(entry);
+}
+EXPORT_SYMBOL(d_add);
+
 /**
  * d_exact_alias - find and hash an exact unhashed alias
  * @entry: dentry to add
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index bda4ec53886b..1c51d2d84a32 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -273,21 +273,8 @@ extern int have_submounts(struct dentry *);
  * This adds the entry to the hash queues.
  */
 extern void d_rehash(struct dentry *);
-
-/**
- * d_add - add dentry to hash queues
- * @entry: dentry to add
- * @inode: The inode to attach to this dentry
- *
- * This adds the entry to the hash queues and initializes @inode.
- * The entry was actually filled in earlier during d_alloc().
- */
  
-static inline void d_add(struct dentry *entry, struct inode *inode)
-{
-	d_instantiate(entry, inode);
-	d_rehash(entry);
-}
+extern void d_add(struct dentry *, struct inode *);
 
 extern void dentry_update_name_case(struct dentry *, struct qstr *);
 
-- 
cgit v1.2.3


From 27f203f655a2e1dab66598a5b19afb637c587f0b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 9 Mar 2016 17:58:49 -0500
Subject: untangle fsnotify_d_instantiate() a bit

First of all, don't bother calling it if inode is NULL -
that makes inode argument unused.  Moreover, do it *before*
dropping ->d_lock, not right after that (and don't bother
grabbing ->d_lock in it, of course).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c                      | 3 ++-
 include/linux/fsnotify.h         | 9 ---------
 include/linux/fsnotify_backend.h | 9 ++-------
 3 files changed, 4 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 12280df07837..244fd2487fe9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1750,8 +1750,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 	raw_write_seqcount_begin(&dentry->d_seq);
 	__d_set_inode_and_type(dentry, inode, add_flags);
 	raw_write_seqcount_end(&dentry->d_seq);
+	if (inode)
+		__fsnotify_d_instantiate(dentry);
 	spin_unlock(&dentry->d_lock);
-	fsnotify_d_instantiate(dentry, inode);
 }
 
 /**
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 7ee1774edee5..0141f257d67b 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -16,15 +16,6 @@
 #include <linux/slab.h>
 #include <linux/bug.h>
 
-/*
- * fsnotify_d_instantiate - instantiate a dentry for inode
- */
-static inline void fsnotify_d_instantiate(struct dentry *dentry,
-					  struct inode *inode)
-{
-	__fsnotify_d_instantiate(dentry, inode);
-}
-
 /* Notify this dentry's parent about a child's events. */
 static inline int fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
 {
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 6b7e89f45aa4..827b249259f7 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -293,14 +293,9 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
 /*
  * fsnotify_d_instantiate - instantiate a dentry for inode
  */
-static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
+static inline void __fsnotify_d_instantiate(struct dentry *dentry)
 {
-	if (!inode)
-		return;
-
-	spin_lock(&dentry->d_lock);
 	__fsnotify_update_dcache_flags(dentry);
-	spin_unlock(&dentry->d_lock);
 }
 
 /* called from fsnotify listeners, such as fanotify or dnotify */
@@ -399,7 +394,7 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
 static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
 {}
 
-static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
+static inline void __fsnotify_d_instantiate(struct dentry *dentry)
 {}
 
 static inline u32 fsnotify_get_cookie(void)
-- 
cgit v1.2.3


From 70b2563b35b3edcf67819b99e560f3f0f6ce4a9f Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 21 Jan 2016 23:11:25 +0100
Subject: leds: core: fix misleading comment after workqueue removal from
 drivers

Now that workqueues have been removed from individual drivers and
were replaced with a core-internal workqueue we shouldn't
encourage people to add new workqueues to drivers.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
---
 include/linux/leds.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index bc1476fda96e..e3470e732ede 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -49,8 +49,10 @@ struct led_classdev {
 #define LED_SYSFS_DISABLE	(1 << 22)
 #define LED_DEV_CAP_FLASH	(1 << 23)
 
-	/* Set LED brightness level */
-	/* Must not sleep, use a workqueue if needed */
+	/* Set LED brightness level
+	 * Must not sleep. Use brightness_set_blocking for drivers
+	 * that can sleep while setting brightness.
+	 */
 	void		(*brightness_set)(struct led_classdev *led_cdev,
 					  enum led_brightness brightness);
 	/*
-- 
cgit v1.2.3


From d84d80f38f0ff4eb4becf1a3569c8e7b2c463b61 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 22 Jan 2016 21:43:48 +0100
Subject: leds: core: avoid error message when a USB LED device is unplugged

When a USB LED device is unplugged the remove call chain calls
led_classdev_unregister which tries to switch the LED off.
As the device has been removed already this results in a ENODEV
error message in dmesg.
Avoid this error message by ignoring ENODEV in calls from
led_classdev_unregister if the LED device is flagged as pluggable.

Therefore a new flag LED_HW_PLUGGABLE was introduced which should be set by
all LED drivers handling pluggable LED devices (mainly USB LED devices).

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
---
 drivers/leds/led-class.c | 2 ++
 drivers/leds/led-core.c  | 5 ++++-
 include/linux/leds.h     | 2 ++
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 14139c337312..aa84e5b37593 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -245,6 +245,8 @@ void led_classdev_unregister(struct led_classdev *led_cdev)
 	up_write(&led_cdev->trigger_lock);
 #endif
 
+	led_cdev->flags |= LED_UNREGISTERING;
+
 	/* Stop blinking */
 	led_stop_software_blink(led_cdev);
 
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 19e1e60dfaa3..ad684b6d0b72 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -98,7 +98,10 @@ static void set_brightness_delayed(struct work_struct *ws)
 						led_cdev->delayed_set_value);
 	else
 		ret = -ENOTSUPP;
-	if (ret < 0)
+	if (ret < 0 &&
+	    /* LED HW might have been unplugged, therefore don't warn */
+	    !(ret == -ENODEV && (led_cdev->flags & LED_UNREGISTERING) &&
+	    (led_cdev->flags & LED_HW_PLUGGABLE)))
 		dev_err(led_cdev->dev,
 			"Setting an LED's brightness failed (%d)\n", ret);
 }
diff --git a/include/linux/leds.h b/include/linux/leds.h
index e3470e732ede..f203a8f89d30 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -39,6 +39,7 @@ struct led_classdev {
 
 	/* Lower 16 bits reflect status */
 #define LED_SUSPENDED		(1 << 0)
+#define LED_UNREGISTERING	(1 << 1)
 	/* Upper 16 bits reflect control information */
 #define LED_CORE_SUSPENDRESUME	(1 << 16)
 #define LED_BLINK_ONESHOT	(1 << 17)
@@ -48,6 +49,7 @@ struct led_classdev {
 #define LED_BLINK_DISABLE	(1 << 21)
 #define LED_SYSFS_DISABLE	(1 << 22)
 #define LED_DEV_CAP_FLASH	(1 << 23)
+#define LED_HW_PLUGGABLE	(1 << 24)
 
 	/* Set LED brightness level
 	 * Must not sleep. Use brightness_set_blocking for drivers
-- 
cgit v1.2.3


From c988cabe2534b6efbf0cfab6d69e18855dc3409e Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Thu, 25 Feb 2016 09:56:04 -0800
Subject: iscsi_ibft: Add prefix-len attr and display netmask

The iBFT table only specifies a prefix length, not a netmask.
And the netmask is pretty much pointless for IPv6.
So introduce a new attribute 'prefix-len'.

Some older user-space code might rely on the netmask attribute
being present, so we should always display it.

Changes from v1:
 - Combined two patches into one

Changes from v2:
 - Cleaned up/corrected wording for patch description

v3: [Put Hannes back as author]

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Lee Duncan <lduncan@suse.com>
Reviewed-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Konrad Rzeszutek Wilk <konrad@kernel.org>
---
 drivers/firmware/iscsi_ibft.c    | 4 ++++
 drivers/scsi/iscsi_boot_sysfs.c  | 5 +++++
 include/linux/iscsi_boot_sysfs.h | 1 +
 3 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index 72791232e46b..81037e5fe301 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -319,6 +319,9 @@ static ssize_t ibft_attr_show_nic(void *data, int type, char *buf)
 		val = cpu_to_be32(~((1 << (32-nic->subnet_mask_prefix))-1));
 		str += sprintf(str, "%pI4", &val);
 		break;
+	case ISCSI_BOOT_ETH_PREFIX_LEN:
+		str += sprintf(str, "%d\n", nic->subnet_mask_prefix);
+		break;
 	case ISCSI_BOOT_ETH_ORIGIN:
 		str += sprintf(str, "%d\n", nic->origin);
 		break;
@@ -460,6 +463,7 @@ static umode_t ibft_check_nic_for(void *data, int type)
 		if (address_not_null(nic->ip_addr))
 			rc = S_IRUGO;
 		break;
+	case ISCSI_BOOT_ETH_PREFIX_LEN:
 	case ISCSI_BOOT_ETH_SUBNET_MASK:
 		if (nic->subnet_mask_prefix)
 			rc = S_IRUGO;
diff --git a/drivers/scsi/iscsi_boot_sysfs.c b/drivers/scsi/iscsi_boot_sysfs.c
index 680bf6f0ce76..8f0ea97cf31f 100644
--- a/drivers/scsi/iscsi_boot_sysfs.c
+++ b/drivers/scsi/iscsi_boot_sysfs.c
@@ -166,6 +166,7 @@ static struct attribute_group iscsi_boot_target_attr_group = {
 iscsi_boot_rd_attr(eth_index, index, ISCSI_BOOT_ETH_INDEX);
 iscsi_boot_rd_attr(eth_flags, flags, ISCSI_BOOT_ETH_FLAGS);
 iscsi_boot_rd_attr(eth_ip, ip-addr, ISCSI_BOOT_ETH_IP_ADDR);
+iscsi_boot_rd_attr(eth_prefix, prefix-len, ISCSI_BOOT_ETH_PREFIX_LEN);
 iscsi_boot_rd_attr(eth_subnet, subnet-mask, ISCSI_BOOT_ETH_SUBNET_MASK);
 iscsi_boot_rd_attr(eth_origin, origin, ISCSI_BOOT_ETH_ORIGIN);
 iscsi_boot_rd_attr(eth_gateway, gateway, ISCSI_BOOT_ETH_GATEWAY);
@@ -181,6 +182,7 @@ static struct attribute *ethernet_attrs[] = {
 	&iscsi_boot_attr_eth_index.attr,
 	&iscsi_boot_attr_eth_flags.attr,
 	&iscsi_boot_attr_eth_ip.attr,
+	&iscsi_boot_attr_eth_prefix.attr,
 	&iscsi_boot_attr_eth_subnet.attr,
 	&iscsi_boot_attr_eth_origin.attr,
 	&iscsi_boot_attr_eth_gateway.attr,
@@ -208,6 +210,9 @@ static umode_t iscsi_boot_eth_attr_is_visible(struct kobject *kobj,
 	else if (attr ==  &iscsi_boot_attr_eth_ip.attr)
 		return boot_kobj->is_visible(boot_kobj->data,
 					     ISCSI_BOOT_ETH_IP_ADDR);
+	else if (attr ==  &iscsi_boot_attr_eth_prefix.attr)
+		return boot_kobj->is_visible(boot_kobj->data,
+					     ISCSI_BOOT_ETH_PREFIX_LEN);
 	else if (attr ==  &iscsi_boot_attr_eth_subnet.attr)
 		return boot_kobj->is_visible(boot_kobj->data,
 					     ISCSI_BOOT_ETH_SUBNET_MASK);
diff --git a/include/linux/iscsi_boot_sysfs.h b/include/linux/iscsi_boot_sysfs.h
index 2a8b1659bf35..548d55395488 100644
--- a/include/linux/iscsi_boot_sysfs.h
+++ b/include/linux/iscsi_boot_sysfs.h
@@ -23,6 +23,7 @@ enum iscsi_boot_eth_properties_enum {
 	ISCSI_BOOT_ETH_INDEX,
 	ISCSI_BOOT_ETH_FLAGS,
 	ISCSI_BOOT_ETH_IP_ADDR,
+	ISCSI_BOOT_ETH_PREFIX_LEN,
 	ISCSI_BOOT_ETH_SUBNET_MASK,
 	ISCSI_BOOT_ETH_ORIGIN,
 	ISCSI_BOOT_ETH_GATEWAY,
-- 
cgit v1.2.3


From f3937549a975fadec9b517b059616f08f9cb7653 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Tue, 9 Feb 2016 22:56:34 +0530
Subject: rtc: max77686: move initialisation of rtc regmap, irq chip locally

To make RTC block of MAX77686/MAX77802 as independent driver,
move the registration of i2c device, regmap for register access
and irq_chip for interrupt support inside the RTC driver.
Removed the same initialisation from MFD driver.

Having this change will allow to reuse this driver for different
PMIC/devices from Maxim Semiconductor if they kept same RTC IP on
different PMIC. Some of examples as PMIC MAX77620, MAX20024 where
same RTC IP used and hence driver for these chips will use this
driver only for RTC support.

Suggested-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Tested-by: Javier Martinez Canillas <javier@osg.samsung.com>
Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Tested-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/mfd/max77686.c               |  85 +-------------------
 drivers/rtc/rtc-max77686.c           | 148 ++++++++++++++++++++++++++++++-----
 include/linux/mfd/max77686-private.h |   3 -
 3 files changed, 130 insertions(+), 106 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c
index bda6fd7b093b..98ecd136a21b 100644
--- a/drivers/mfd/max77686.c
+++ b/drivers/mfd/max77686.c
@@ -35,8 +35,6 @@
 #include <linux/err.h>
 #include <linux/of.h>
 
-#define I2C_ADDR_RTC	(0x0C >> 1)
-
 static const struct mfd_cell max77686_devs[] = {
 	{ .name = "max77686-pmic", },
 	{ .name = "max77686-rtc", },
@@ -116,11 +114,6 @@ static const struct regmap_config max77686_regmap_config = {
 	.val_bits = 8,
 };
 
-static const struct regmap_config max77686_rtc_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
-};
-
 static const struct regmap_config max77802_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -156,25 +149,6 @@ static const struct regmap_irq_chip max77686_irq_chip = {
 	.num_irqs		= ARRAY_SIZE(max77686_irqs),
 };
 
-static const struct regmap_irq max77686_rtc_irqs[] = {
-	/* RTC interrupts */
-	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTC60S_MSK, },
-	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTCA1_MSK, },
-	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTCA2_MSK, },
-	{ .reg_offset = 0, .mask = MAX77686_RTCINT_SMPL_MSK, },
-	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTC1S_MSK, },
-	{ .reg_offset = 0, .mask = MAX77686_RTCINT_WTSR_MSK, },
-};
-
-static const struct regmap_irq_chip max77686_rtc_irq_chip = {
-	.name			= "max77686-rtc",
-	.status_base		= MAX77686_RTC_INT,
-	.mask_base		= MAX77686_RTC_INTM,
-	.num_regs		= 1,
-	.irqs			= max77686_rtc_irqs,
-	.num_irqs		= ARRAY_SIZE(max77686_rtc_irqs),
-};
-
 static const struct regmap_irq_chip max77802_irq_chip = {
 	.name			= "max77802-pmic",
 	.status_base		= MAX77802_REG_INT1,
@@ -184,15 +158,6 @@ static const struct regmap_irq_chip max77802_irq_chip = {
 	.num_irqs		= ARRAY_SIZE(max77686_irqs),
 };
 
-static const struct regmap_irq_chip max77802_rtc_irq_chip = {
-	.name			= "max77802-rtc",
-	.status_base		= MAX77802_RTC_INT,
-	.mask_base		= MAX77802_RTC_INTM,
-	.num_regs		= 1,
-	.irqs			= max77686_rtc_irqs, /* same masks as 77686 */
-	.num_irqs		= ARRAY_SIZE(max77686_rtc_irqs),
-};
-
 static const struct of_device_id max77686_pmic_dt_match[] = {
 	{
 		.compatible = "maxim,max77686",
@@ -214,8 +179,6 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 	int ret = 0;
 	const struct regmap_config *config;
 	const struct regmap_irq_chip *irq_chip;
-	const struct regmap_irq_chip *rtc_irq_chip;
-	struct regmap **rtc_regmap;
 	const struct mfd_cell *cells;
 	int n_devs;
 
@@ -242,15 +205,11 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 	if (max77686->type == TYPE_MAX77686) {
 		config = &max77686_regmap_config;
 		irq_chip = &max77686_irq_chip;
-		rtc_irq_chip = &max77686_rtc_irq_chip;
-		rtc_regmap = &max77686->rtc_regmap;
 		cells =  max77686_devs;
 		n_devs = ARRAY_SIZE(max77686_devs);
 	} else {
 		config = &max77802_regmap_config;
 		irq_chip = &max77802_irq_chip;
-		rtc_irq_chip = &max77802_rtc_irq_chip;
-		rtc_regmap = &max77686->regmap;
 		cells =  max77802_devs;
 		n_devs = ARRAY_SIZE(max77802_devs);
 	}
@@ -270,59 +229,25 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 		return -ENODEV;
 	}
 
-	if (max77686->type == TYPE_MAX77686) {
-		max77686->rtc = i2c_new_dummy(i2c->adapter, I2C_ADDR_RTC);
-		if (!max77686->rtc) {
-			dev_err(max77686->dev,
-				"Failed to allocate I2C device for RTC\n");
-			return -ENODEV;
-		}
-
-		max77686->rtc_regmap =
-			devm_regmap_init_i2c(max77686->rtc,
-					     &max77686_rtc_regmap_config);
-		if (IS_ERR(max77686->rtc_regmap)) {
-			ret = PTR_ERR(max77686->rtc_regmap);
-			dev_err(max77686->dev,
-				"failed to allocate RTC regmap: %d\n",
-				ret);
-			goto err_unregister_i2c;
-		}
-	}
-
 	ret = regmap_add_irq_chip(max77686->regmap, max77686->irq,
 				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
 				  IRQF_SHARED, 0, irq_chip,
 				  &max77686->irq_data);
-	if (ret) {
+	if (ret < 0) {
 		dev_err(&i2c->dev, "failed to add PMIC irq chip: %d\n", ret);
-		goto err_unregister_i2c;
-	}
-
-	ret = regmap_add_irq_chip(*rtc_regmap, max77686->irq,
-				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
-				  IRQF_SHARED, 0, rtc_irq_chip,
-				  &max77686->rtc_irq_data);
-	if (ret) {
-		dev_err(&i2c->dev, "failed to add RTC irq chip: %d\n", ret);
-		goto err_del_irqc;
+		return ret;
 	}
 
 	ret = mfd_add_devices(max77686->dev, -1, cells, n_devs, NULL, 0, NULL);
 	if (ret < 0) {
 		dev_err(&i2c->dev, "failed to add MFD devices: %d\n", ret);
-		goto err_del_rtc_irqc;
+		goto err_del_irqc;
 	}
 
 	return 0;
 
-err_del_rtc_irqc:
-	regmap_del_irq_chip(max77686->irq, max77686->rtc_irq_data);
 err_del_irqc:
 	regmap_del_irq_chip(max77686->irq, max77686->irq_data);
-err_unregister_i2c:
-	if (max77686->type == TYPE_MAX77686)
-		i2c_unregister_device(max77686->rtc);
 
 	return ret;
 }
@@ -333,12 +258,8 @@ static int max77686_i2c_remove(struct i2c_client *i2c)
 
 	mfd_remove_devices(max77686->dev);
 
-	regmap_del_irq_chip(max77686->irq, max77686->rtc_irq_data);
 	regmap_del_irq_chip(max77686->irq, max77686->irq_data);
 
-	if (max77686->type == TYPE_MAX77686)
-		i2c_unregister_device(max77686->rtc);
-
 	return 0;
 }
 
diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c
index 8fe1092c4795..5e924f3cde90 100644
--- a/drivers/rtc/rtc-max77686.c
+++ b/drivers/rtc/rtc-max77686.c
@@ -12,6 +12,7 @@
  *
  */
 
+#include <linux/i2c.h>
 #include <linux/slab.h>
 #include <linux/rtc.h>
 #include <linux/delay.h>
@@ -22,6 +23,9 @@
 #include <linux/irqdomain.h>
 #include <linux/regmap.h>
 
+#define MAX77686_I2C_ADDR_RTC		(0x0C >> 1)
+#define MAX77686_INVALID_I2C_ADDR	(-1)
+
 /* RTC Control Register */
 #define BCD_EN_SHIFT			0
 #define BCD_EN_MASK			BIT(BCD_EN_SHIFT)
@@ -68,8 +72,10 @@ struct max77686_rtc_driver_data {
 	const unsigned int	*map;
 	/* Has a separate alarm enable register? */
 	bool			alarm_enable_reg;
-	/* Has a separate I2C regmap for the RTC? */
-	bool			separate_i2c_addr;
+	/* I2C address for RTC block */
+	int			rtc_i2c_addr;
+	/* RTC IRQ CHIP for regmap */
+	const struct regmap_irq_chip *rtc_irq_chip;
 };
 
 struct max77686_rtc_info {
@@ -82,7 +88,9 @@ struct max77686_rtc_info {
 	struct regmap		*rtc_regmap;
 
 	const struct max77686_rtc_driver_data *drv_data;
+	struct regmap_irq_chip_data *rtc_irq_data;
 
+	int rtc_irq;
 	int virq;
 	int rtc_24hr_mode;
 };
@@ -153,12 +161,32 @@ static const unsigned int max77686_map[REG_RTC_END] = {
 	[REG_RTC_AE1]	     = REG_RTC_NONE,
 };
 
+static const struct regmap_irq max77686_rtc_irqs[] = {
+	/* RTC interrupts */
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTC60S_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTCA1_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTCA2_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_SMPL_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTC1S_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_WTSR_MSK, },
+};
+
+static const struct regmap_irq_chip max77686_rtc_irq_chip = {
+	.name		= "max77686-rtc",
+	.status_base	= MAX77686_RTC_INT,
+	.mask_base	= MAX77686_RTC_INTM,
+	.num_regs	= 1,
+	.irqs		= max77686_rtc_irqs,
+	.num_irqs	= ARRAY_SIZE(max77686_rtc_irqs),
+};
+
 static const struct max77686_rtc_driver_data max77686_drv_data = {
 	.delay = 16000,
 	.mask  = 0x7f,
 	.map   = max77686_map,
 	.alarm_enable_reg  = false,
-	.separate_i2c_addr = true,
+	.rtc_i2c_addr = MAX77686_I2C_ADDR_RTC,
+	.rtc_irq_chip = &max77686_rtc_irq_chip,
 };
 
 static const unsigned int max77802_map[REG_RTC_END] = {
@@ -190,12 +218,22 @@ static const unsigned int max77802_map[REG_RTC_END] = {
 	[REG_RTC_AE1]	     = MAX77802_RTC_AE1,
 };
 
+static const struct regmap_irq_chip max77802_rtc_irq_chip = {
+	.name		= "max77802-rtc",
+	.status_base	= MAX77802_RTC_INT,
+	.mask_base	= MAX77802_RTC_INTM,
+	.num_regs	= 1,
+	.irqs		= max77686_rtc_irqs, /* same masks as 77686 */
+	.num_irqs	= ARRAY_SIZE(max77686_rtc_irqs),
+};
+
 static const struct max77686_rtc_driver_data max77802_drv_data = {
 	.delay = 200,
 	.mask  = 0xff,
 	.map   = max77802_map,
 	.alarm_enable_reg  = true,
-	.separate_i2c_addr = false,
+	.rtc_i2c_addr = MAX77686_INVALID_I2C_ADDR,
+	.rtc_irq_chip = &max77802_rtc_irq_chip,
 };
 
 static void max77686_rtc_data_to_tm(u8 *data, struct rtc_time *tm,
@@ -599,9 +637,65 @@ static int max77686_rtc_init_reg(struct max77686_rtc_info *info)
 	return ret;
 }
 
+static const struct regmap_config max77686_rtc_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
+static int max77686_init_rtc_regmap(struct max77686_rtc_info *info)
+{
+	struct device *parent = info->dev->parent;
+	struct i2c_client *parent_i2c = to_i2c_client(parent);
+	int ret;
+
+	info->rtc_irq = parent_i2c->irq;
+
+	info->regmap = dev_get_regmap(parent, NULL);
+	if (!info->regmap) {
+		dev_err(info->dev, "Failed to get rtc regmap\n");
+		return -ENODEV;
+	}
+
+	if (info->drv_data->rtc_i2c_addr == MAX77686_INVALID_I2C_ADDR) {
+		info->rtc_regmap = info->regmap;
+		goto add_rtc_irq;
+	}
+
+	info->rtc = i2c_new_dummy(parent_i2c->adapter,
+				  info->drv_data->rtc_i2c_addr);
+	if (!info->rtc) {
+		dev_err(info->dev, "Failed to allocate I2C device for RTC\n");
+		return -ENODEV;
+	}
+
+	info->rtc_regmap = devm_regmap_init_i2c(info->rtc,
+						&max77686_rtc_regmap_config);
+	if (IS_ERR(info->rtc_regmap)) {
+		ret = PTR_ERR(info->rtc_regmap);
+		dev_err(info->dev, "Failed to allocate RTC regmap: %d\n", ret);
+		goto err_unregister_i2c;
+	}
+
+add_rtc_irq:
+	ret = regmap_add_irq_chip(info->rtc_regmap, info->rtc_irq,
+				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
+				  IRQF_SHARED, 0, info->drv_data->rtc_irq_chip,
+				  &info->rtc_irq_data);
+	if (ret < 0) {
+		dev_err(info->dev, "Failed to add RTC irq chip: %d\n", ret);
+		goto err_unregister_i2c;
+	}
+
+	return 0;
+
+err_unregister_i2c:
+	if (info->rtc)
+		i2c_unregister_device(info->rtc);
+	return ret;
+}
+
 static int max77686_rtc_probe(struct platform_device *pdev)
 {
-	struct max77686_dev *max77686 = dev_get_drvdata(pdev->dev.parent);
 	struct max77686_rtc_info *info;
 	const struct platform_device_id *id = platform_get_device_id(pdev);
 	int ret;
@@ -613,18 +707,16 @@ static int max77686_rtc_probe(struct platform_device *pdev)
 
 	mutex_init(&info->lock);
 	info->dev = &pdev->dev;
-	info->rtc = max77686->rtc;
 	info->drv_data = (const struct max77686_rtc_driver_data *)
 		id->driver_data;
 
-	info->regmap = max77686->regmap;
-	info->rtc_regmap = (info->drv_data->separate_i2c_addr) ?
-			    max77686->rtc_regmap : info->regmap;
+	ret = max77686_init_rtc_regmap(info);
+	if (ret < 0)
+		return ret;
 
 	platform_set_drvdata(pdev, info);
 
 	ret = max77686_rtc_init_reg(info);
-
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to initialize RTC reg:%d\n", ret);
 		goto err_rtc;
@@ -643,30 +735,43 @@ static int max77686_rtc_probe(struct platform_device *pdev)
 		goto err_rtc;
 	}
 
-	if (!max77686->rtc_irq_data) {
-		ret = -EINVAL;
-		dev_err(&pdev->dev, "No RTC regmap IRQ chip\n");
-		goto err_rtc;
-	}
-
-	info->virq = regmap_irq_get_virq(max77686->rtc_irq_data,
+	info->virq = regmap_irq_get_virq(info->rtc_irq_data,
 					 MAX77686_RTCIRQ_RTCA1);
 	if (info->virq <= 0) {
 		ret = -ENXIO;
 		goto err_rtc;
 	}
 
-	ret = devm_request_threaded_irq(&pdev->dev, info->virq, NULL,
-					max77686_rtc_alarm_irq, 0,
-					"rtc-alarm1", info);
-	if (ret < 0)
+	ret = request_threaded_irq(info->virq, NULL, max77686_rtc_alarm_irq, 0,
+				   "rtc-alarm1", info);
+	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n",
 			info->virq, ret);
+		goto err_rtc;
+	}
+
+	return 0;
 
 err_rtc:
+	regmap_del_irq_chip(info->rtc_irq, info->rtc_irq_data);
+	if (info->rtc)
+		i2c_unregister_device(info->rtc);
+
 	return ret;
 }
 
+static int max77686_rtc_remove(struct platform_device *pdev)
+{
+	struct max77686_rtc_info *info = platform_get_drvdata(pdev);
+
+	free_irq(info->virq, info);
+	regmap_del_irq_chip(info->rtc_irq, info->rtc_irq_data);
+	if (info->rtc)
+		i2c_unregister_device(info->rtc);
+
+	return 0;
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int max77686_rtc_suspend(struct device *dev)
 {
@@ -707,6 +812,7 @@ static struct platform_driver max77686_rtc_driver = {
 		.pm	= &max77686_rtc_pm_ops,
 	},
 	.probe		= max77686_rtc_probe,
+	.remove		= max77686_rtc_remove,
 	.id_table	= rtc_id,
 };
 
diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h
index f5043490d67c..643dae777b43 100644
--- a/include/linux/mfd/max77686-private.h
+++ b/include/linux/mfd/max77686-private.h
@@ -437,14 +437,11 @@ enum max77686_irq {
 struct max77686_dev {
 	struct device *dev;
 	struct i2c_client *i2c; /* 0xcc / PMIC, Battery Control, and FLASH */
-	struct i2c_client *rtc; /* slave addr 0x0c */
 
 	unsigned long type;
 
 	struct regmap *regmap;		/* regmap for mfd */
-	struct regmap *rtc_regmap;	/* regmap for rtc */
 	struct regmap_irq_chip_data *irq_data;
-	struct regmap_irq_chip_data *rtc_irq_data;
 
 	int irq;
 	struct mutex irqlock;
-- 
cgit v1.2.3


From b3967067c273359c7462f4be9a887c3d1a515f49 Mon Sep 17 00:00:00 2001
From: Joshua Clayton <stillcompiling@gmail.com>
Date: Fri, 5 Feb 2016 12:41:11 -0800
Subject: rtc: Add functions to set and read rtc offset

A number of rtc devices, such as the NXP pcf2123 include a facility
to adjust the clock in order to compensate for temperature or a
crystal, capacitor, etc, that results in the rtc clock not running
at exactly 32.768 kHz.

Data sheets I have seen refer to this as a clock offset, and measure it
in parts per million, however they often reference ppm to 2 digits of
precision, which makes integer ppm less than ideal.

We use parts per billion, which more than covers the precision needed
and works nicely within 32 bits

Signed-off-by: Joshua Clayton <stillcompiling@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 drivers/rtc/interface.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rtc.h     |  4 ++++
 2 files changed, 58 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 5836751b8203..9ef5f6f89f98 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -939,4 +939,58 @@ void rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer *timer)
 	mutex_unlock(&rtc->ops_lock);
 }
 
+/**
+ * rtc_read_offset - Read the amount of rtc offset in parts per billion
+ * @ rtc: rtc device to be used
+ * @ offset: the offset in parts per billion
+ *
+ * see below for details.
+ *
+ * Kernel interface to read rtc clock offset
+ * Returns 0 on success, or a negative number on error.
+ * If read_offset() is not implemented for the rtc, return -EINVAL
+ */
+int rtc_read_offset(struct rtc_device *rtc, long *offset)
+{
+	int ret;
+
+	if (!rtc->ops)
+		return -ENODEV;
+
+	if (!rtc->ops->read_offset)
+		return -EINVAL;
+
+	mutex_lock(&rtc->ops_lock);
+	ret = rtc->ops->read_offset(rtc->dev.parent, offset);
+	mutex_unlock(&rtc->ops_lock);
+	return ret;
+}
 
+/**
+ * rtc_set_offset - Adjusts the duration of the average second
+ * @ rtc: rtc device to be used
+ * @ offset: the offset in parts per billion
+ *
+ * Some rtc's allow an adjustment to the average duration of a second
+ * to compensate for differences in the actual clock rate due to temperature,
+ * the crystal, capacitor, etc.
+ *
+ * Kernel interface to adjust an rtc clock offset.
+ * Return 0 on success, or a negative number on error.
+ * If the rtc offset is not setable (or not implemented), return -EINVAL
+ */
+int rtc_set_offset(struct rtc_device *rtc, long offset)
+{
+	int ret;
+
+	if (!rtc->ops)
+		return -ENODEV;
+
+	if (!rtc->ops->set_offset)
+		return -EINVAL;
+
+	mutex_lock(&rtc->ops_lock);
+	ret = rtc->ops->set_offset(rtc->dev.parent, offset);
+	mutex_unlock(&rtc->ops_lock);
+	return ret;
+}
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 3359f0422c6b..b693adac853b 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -89,6 +89,8 @@ struct rtc_class_ops {
 	int (*set_mmss)(struct device *, unsigned long secs);
 	int (*read_callback)(struct device *, int data);
 	int (*alarm_irq_enable)(struct device *, unsigned int enabled);
+	int (*read_offset)(struct device *, long *offset);
+	int (*set_offset)(struct device *, long offset);
 };
 
 #define RTC_DEVICE_NAME_SIZE 20
@@ -208,6 +210,8 @@ void rtc_timer_init(struct rtc_timer *timer, void (*f)(void *p), void *data);
 int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer *timer,
 		    ktime_t expires, ktime_t period);
 void rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer *timer);
+int rtc_read_offset(struct rtc_device *rtc, long *offset);
+int rtc_set_offset(struct rtc_device *rtc, long offset);
 void rtc_timer_do_work(struct work_struct *work);
 
 static inline bool is_leap_year(unsigned int year)
-- 
cgit v1.2.3


From f2900acea8018c4525ddaa86c7f7cd8afd3f0cc4 Mon Sep 17 00:00:00 2001
From: Marcin Wojtas <mw@semihalf.com>
Date: Mon, 14 Mar 2016 09:39:02 +0100
Subject: bus: mvebu-mbus: provide api for obtaining IO and DRAM window
 information

This commit enables finding appropriate mbus window and obtaining its
target id and attribute for given physical address in two separate
routines, both for IO and DRAM windows. This functionality
is needed for Armada XP/38x Network Controller's Buffer Manager and
PnC configuration.

[gregory.clement@free-electrons.com: Fix size test for
mvebu_mbus_get_dram_win_info]

Signed-off-by: Marcin Wojtas <mw@semihalf.com>
[DRAM window information reference in LKv3.10]
Signed-off-by: Evan Wang <xswang@marvell.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/bus/mvebu-mbus.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mbus.h     |  3 +++
 2 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c
index c43c3d2baf73..c2e52864bb03 100644
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@@ -948,6 +948,58 @@ void mvebu_mbus_get_pcie_io_aperture(struct resource *res)
 	*res = mbus_state.pcie_io_aperture;
 }
 
+int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, u8 *attr)
+{
+	const struct mbus_dram_target_info *dram;
+	int i;
+
+	/* Get dram info */
+	dram = mv_mbus_dram_info();
+	if (!dram) {
+		pr_err("missing DRAM information\n");
+		return -ENODEV;
+	}
+
+	/* Try to find matching DRAM window for phyaddr */
+	for (i = 0; i < dram->num_cs; i++) {
+		const struct mbus_dram_window *cs = dram->cs + i;
+
+		if (cs->base <= phyaddr &&
+			phyaddr <= (cs->base + cs->size - 1)) {
+			*target = dram->mbus_dram_target_id;
+			*attr = cs->mbus_attr;
+			return 0;
+		}
+	}
+
+	pr_err("invalid dram address 0x%x\n", phyaddr);
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(mvebu_mbus_get_dram_win_info);
+
+int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, u8 *target,
+			       u8 *attr)
+{
+	int win;
+
+	for (win = 0; win < mbus_state.soc->num_wins; win++) {
+		u64 wbase;
+		int enabled;
+
+		mvebu_mbus_read_window(&mbus_state, win, &enabled, &wbase,
+				       size, target, attr, NULL);
+
+		if (!enabled)
+			continue;
+
+		if (wbase <= phyaddr && phyaddr <= wbase + *size)
+			return win;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(mvebu_mbus_get_io_win_info);
+
 static __init int mvebu_mbus_debugfs_init(void)
 {
 	struct mvebu_mbus_state *s = &mbus_state;
diff --git a/include/linux/mbus.h b/include/linux/mbus.h
index 1f7bc630d225..ea34a867caa0 100644
--- a/include/linux/mbus.h
+++ b/include/linux/mbus.h
@@ -69,6 +69,9 @@ static inline const struct mbus_dram_target_info *mv_mbus_dram_info_nooverlap(vo
 int mvebu_mbus_save_cpu_target(u32 *store_addr);
 void mvebu_mbus_get_pcie_mem_aperture(struct resource *res);
 void mvebu_mbus_get_pcie_io_aperture(struct resource *res);
+int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, u8 *attr);
+int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, u8 *target,
+			       u8 *attr);
 int mvebu_mbus_add_window_remap_by_id(unsigned int target,
 				      unsigned int attribute,
 				      phys_addr_t base, size_t size,
-- 
cgit v1.2.3


From a44d6eacdaf56f74fad699af7f4925a5f5ac0e7f Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 14 Mar 2016 10:52:15 -0700
Subject: tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In

Per RFC4898, they count segments sent/received
containing a positive length data segment (that includes
retransmission segments carrying data).  Unlike
tcpi_segs_out/in, tcpi_data_segs_out/in excludes segments
carrying no data (e.g. pure ack).

The patch also updates the segs_in in tcp_fastopen_add_skb()
so that segs_in >= data_segs_in property is kept.

Together with retransmission data, tcpi_data_segs_out
gives a better signal on the rxmit rate.

v6: Rebase on the latest net-next

v5: Eric pointed out that checking skb->len is still needed in
tcp_fastopen_add_skb() because skb can carry a FIN without data.
Hence, instead of open coding segs_in and data_segs_in, tcp_segs_in()
helper is used.  Comment is added to the fastopen case to explain why
segs_in has to be reset and tcp_segs_in() has to be called before
__skb_pull().

v4: Add comment to the changes in tcp_fastopen_add_skb()
and also add remark on this case in the commit message.

v3: Add const modifier to the skb parameter in tcp_segs_in()

v2: Rework based on recent fix by Eric:
commit a9d99ce28ed3 ("tcp: fix tcpi_segs_in after connection establishment")

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Chris Rapier <rapier@psc.edu>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Marcelo Ricardo Leitner <mleitner@redhat.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  6 ++++++
 include/net/tcp.h        | 10 ++++++++++
 include/uapi/linux/tcp.h |  2 ++
 net/ipv4/tcp.c           |  2 ++
 net/ipv4/tcp_fastopen.c  |  8 ++++++++
 net/ipv4/tcp_ipv4.c      |  2 +-
 net/ipv4/tcp_minisocks.c |  2 +-
 net/ipv4/tcp_output.c    |  4 +++-
 net/ipv6/tcp_ipv6.c      |  2 +-
 9 files changed, 34 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bcbf51da4e1e..7be9b1242354 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -158,6 +158,9 @@ struct tcp_sock {
 	u32	segs_in;	/* RFC4898 tcpEStatsPerfSegsIn
 				 * total number of segments in.
 				 */
+	u32	data_segs_in;	/* RFC4898 tcpEStatsPerfDataSegsIn
+				 * total number of data segments in.
+				 */
  	u32	rcv_nxt;	/* What we want to receive next 	*/
 	u32	copied_seq;	/* Head of yet unread data		*/
 	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
@@ -165,6 +168,9 @@ struct tcp_sock {
 	u32	segs_out;	/* RFC4898 tcpEStatsPerfSegsOut
 				 * The total number of segments sent.
 				 */
+	u32	data_segs_out;	/* RFC4898 tcpEStatsPerfDataSegsOut
+				 * total number of data segments sent.
+				 */
 	u64	bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
 				 * sum(delta(snd_una)), or how many bytes
 				 * were acked.
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0302636af98c..c8dbd293daae 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1840,4 +1840,14 @@ static inline int tcp_inq(struct sock *sk)
 	return answ;
 }
 
+static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb)
+{
+	u16 segs_in;
+
+	segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+	tp->segs_in += segs_in;
+	if (skb->len > tcp_hdrlen(skb))
+		tp->data_segs_in += segs_in;
+}
+
 #endif	/* _TCP_H */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index fe95446e9abf..53e8e3fe6b1b 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -199,6 +199,8 @@ struct tcp_info {
 
 	__u32	tcpi_notsent_bytes;
 	__u32	tcpi_min_rtt;
+	__u32	tcpi_data_segs_in;	/* RFC4898 tcpEStatsDataSegsIn */
+	__u32	tcpi_data_segs_out;	/* RFC4898 tcpEStatsDataSegsOut */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a265f00b9df9..992b3103ec3e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2715,6 +2715,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_notsent_bytes = max(0, notsent_bytes);
 
 	info->tcpi_min_rtt = tcp_min_rtt(tp);
+	info->tcpi_data_segs_in = tp->data_segs_in;
+	info->tcpi_data_segs_out = tp->data_segs_out;
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index fdb286ddba04..4fc0061bebf4 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -140,6 +140,14 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
 		return;
 
 	skb_dst_drop(skb);
+	/* segs_in has been initialized to 1 in tcp_create_openreq_child().
+	 * Hence, reset segs_in to 0 before calling tcp_segs_in()
+	 * to avoid double counting.  Also, tcp_segs_in() expects
+	 * skb->len to include the tcp_hdrlen.  Hence, it should
+	 * be called before __skb_pull().
+	 */
+	tp->segs_in = 0;
+	tcp_segs_in(tp, skb);
 	__skb_pull(skb, tcp_hdrlen(skb));
 	skb_set_owner_r(skb, sk);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4c8d58dfac9b..0b02ef773705 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1650,7 +1650,7 @@ process:
 	sk_incoming_cpu_update(sk);
 
 	bh_lock_sock_nested(sk);
-	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
 		if (!tcp_prequeue(sk, skb))
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index ae90e4b34bd3..acb366dd61e6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -812,7 +812,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 	int ret = 0;
 	int state = child->sk_state;
 
-	tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+	tcp_segs_in(tcp_sk(child), skb);
 	if (!sock_owned_by_user(child)) {
 		ret = tcp_rcv_state_process(child, skb);
 		/* Wakeup parent, send SIGIO */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7d2c7a400456..7d2dc015cd19 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1003,8 +1003,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	if (likely(tcb->tcp_flags & TCPHDR_ACK))
 		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
 
-	if (skb->len != tcp_header_size)
+	if (skb->len != tcp_header_size) {
 		tcp_event_data_sent(tp, sk);
+		tp->data_segs_out += tcp_skb_pcount(skb);
+	}
 
 	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
 		TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 33f2820181f9..9c16565b70cc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1443,7 +1443,7 @@ process:
 	sk_incoming_cpu_update(sk);
 
 	bh_lock_sock_nested(sk);
-	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
 		if (!tcp_prequeue(sk, skb))
-- 
cgit v1.2.3


From 66cede527924543e0ff9414e6be65a923cbce10a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 4 Mar 2016 11:27:26 -0500
Subject: xprtrdma: Clean up unused RPCRDMA_INLINE_PAD_THRESH macro

Fixes: b3221d6a53c4 ('xprtrdma: Remove logic that constructs...')
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xprtrdma.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index b7b279b54504..767190b01363 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -54,8 +54,6 @@
 
 #define RPCRDMA_DEF_INLINE  (1024)	/* default inline max */
 
-#define RPCRDMA_INLINE_PAD_THRESH  (512)/* payload threshold to pad (bytes) */
-
 /* Memory registration strategies, by number.
  * This is part of a kernel / user space API. Do not remove. */
 enum rpcrdma_memreg {
-- 
cgit v1.2.3


From d8647f2d3c381a6f543358710eb07dafa7070854 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 4 Mar 2016 11:28:09 -0500
Subject: rpcrdma: Add RPCRDMA_HDRLEN_ERR

Error headers are shorter than either RDMA_MSG or RDMA_NOMSG.

Since HDRLEN_MIN is already used in several other places that would
be annoying to change, add RPCRDMA_HDRLEN_ERR for the one or two
spots where the shorter length is needed.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Devesh Sharma <devesh.sharma@broadcom.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/rpc_rdma.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
index f33c5a4d6fe4..8c6d23cb0cae 100644
--- a/include/linux/sunrpc/rpc_rdma.h
+++ b/include/linux/sunrpc/rpc_rdma.h
@@ -102,6 +102,7 @@ struct rpcrdma_msg {
  * Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks
  */
 #define RPCRDMA_HDRLEN_MIN	(sizeof(__be32) * 7)
+#define RPCRDMA_HDRLEN_ERR	(sizeof(__be32) * 5)
 
 enum rpcrdma_errcode {
 	ERR_VERS = 1,
-- 
cgit v1.2.3


From 59aa1f9a3cce388b4d7d842d6963df11d92a407e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 4 Mar 2016 11:28:18 -0500
Subject: xprtrdma: Properly handle RDMA_ERROR replies

These are shorter than RPCRDMA_HDRLEN_MIN, and they need to
complete the waiting RPC.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/rpc_rdma.h | 11 +++++----
 net/sunrpc/xprtrdma/rpc_rdma.c  | 51 ++++++++++++++++++++++++++++++++++-------
 2 files changed, 49 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
index 8c6d23cb0cae..3b1ff38f0c37 100644
--- a/include/linux/sunrpc/rpc_rdma.h
+++ b/include/linux/sunrpc/rpc_rdma.h
@@ -93,6 +93,12 @@ struct rpcrdma_msg {
 			__be32 rm_pempty[3];	/* 3 empty chunk lists */
 		} rm_padded;
 
+		struct {
+			__be32 rm_err;
+			__be32 rm_vers_low;
+			__be32 rm_vers_high;
+		} rm_error;
+
 		__be32 rm_chunks[0];	/* read, write and reply chunks */
 
 	} rm_body;
@@ -109,11 +115,6 @@ enum rpcrdma_errcode {
 	ERR_CHUNK = 2
 };
 
-struct rpcrdma_err_vers {
-	uint32_t rdma_vers_low;	/* Version range supported by peer */
-	uint32_t rdma_vers_high;
-};
-
 enum rpcrdma_proc {
 	RDMA_MSG = 0,		/* An RPC call or reply msg */
 	RDMA_NOMSG = 1,		/* An RPC call or reply msg - separate body */
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 060739144552..35f810899729 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -795,7 +795,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 	struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
 	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
 	__be32 *iptr;
-	int rdmalen, status;
+	int rdmalen, status, rmerr;
 	unsigned long cwnd;
 	u32 credits;
 
@@ -803,12 +803,10 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 
 	if (rep->rr_len == RPCRDMA_BAD_LEN)
 		goto out_badstatus;
-	if (rep->rr_len < RPCRDMA_HDRLEN_MIN)
+	if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
 		goto out_shortreply;
 
 	headerp = rdmab_to_msg(rep->rr_rdmabuf);
-	if (headerp->rm_vers != rpcrdma_version)
-		goto out_badversion;
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	if (rpcrdma_is_bcall(headerp))
 		goto out_bcall;
@@ -838,6 +836,9 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 	req->rl_reply = rep;
 	xprt->reestablish_timeout = 0;
 
+	if (headerp->rm_vers != rpcrdma_version)
+		goto out_badversion;
+
 	/* check for expected message types */
 	/* The order of some of these tests is important. */
 	switch (headerp->rm_type) {
@@ -898,6 +899,9 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 		status = rdmalen;
 		break;
 
+	case rdma_error:
+		goto out_rdmaerr;
+
 badheader:
 	default:
 		dprintk("%s: invalid rpcrdma reply header (type %d):"
@@ -913,6 +917,7 @@ badheader:
 		break;
 	}
 
+out:
 	/* Invalidate and flush the data payloads before waking the
 	 * waiting application. This guarantees the memory region is
 	 * properly fenced from the server before the application
@@ -955,13 +960,43 @@ out_bcall:
 	return;
 #endif
 
-out_shortreply:
-	dprintk("RPC:       %s: short/invalid reply\n", __func__);
-	goto repost;
-
+/* If the incoming reply terminated a pending RPC, the next
+ * RPC call will post a replacement receive buffer as it is
+ * being marshaled.
+ */
 out_badversion:
 	dprintk("RPC:       %s: invalid version %d\n",
 		__func__, be32_to_cpu(headerp->rm_vers));
+	status = -EIO;
+	r_xprt->rx_stats.bad_reply_count++;
+	goto out;
+
+out_rdmaerr:
+	rmerr = be32_to_cpu(headerp->rm_body.rm_error.rm_err);
+	switch (rmerr) {
+	case ERR_VERS:
+		pr_err("%s: server reports header version error (%u-%u)\n",
+		       __func__,
+		       be32_to_cpu(headerp->rm_body.rm_error.rm_vers_low),
+		       be32_to_cpu(headerp->rm_body.rm_error.rm_vers_high));
+		break;
+	case ERR_CHUNK:
+		pr_err("%s: server reports header decoding error\n",
+		       __func__);
+		break;
+	default:
+		pr_err("%s: server reports unknown error %d\n",
+		       __func__, rmerr);
+	}
+	status = -EREMOTEIO;
+	r_xprt->rx_stats.bad_reply_count++;
+	goto out;
+
+/* If no pending RPC transaction was matched, post a replacement
+ * receive buffer before returning.
+ */
+out_shortreply:
+	dprintk("RPC:       %s: short/invalid reply\n", __func__);
 	goto repost;
 
 out_nomatch:
-- 
cgit v1.2.3


From 5bcbe0f35fb13e31fdd0b2dc9695f19ab0208207 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 12 Mar 2016 00:01:40 +0100
Subject: phy: fixed: Fix removal of phys.

The fixed phys delete function simply removed the fixed phy from the
internal linked list and freed the memory. It however did not
unregister the associated phy device. This meant it was still possible
to find the phy device on the mdio bus.

Make fixed_phy_del() an internal function and add a
fixed_phy_unregister() to unregisters the phy device and then uses
fixed_phy_del() to free resources.

Modify DSA to use this new API function, so we don't leak phys.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/fixed_phy.c | 11 +++++++++--
 include/linux/phy_fixed.h   |  5 ++---
 net/dsa/dsa.c               |  4 +---
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index ab9c473d75ea..fc07a8866020 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -285,7 +285,7 @@ err_regs:
 }
 EXPORT_SYMBOL_GPL(fixed_phy_add);
 
-void fixed_phy_del(int phy_addr)
+static void fixed_phy_del(int phy_addr)
 {
 	struct fixed_mdio_bus *fmb = &platform_fmb;
 	struct fixed_phy *fp, *tmp;
@@ -300,7 +300,6 @@ void fixed_phy_del(int phy_addr)
 		}
 	}
 }
-EXPORT_SYMBOL_GPL(fixed_phy_del);
 
 static int phy_fixed_addr;
 static DEFINE_SPINLOCK(phy_fixed_addr_lock);
@@ -371,6 +370,14 @@ struct phy_device *fixed_phy_register(unsigned int irq,
 }
 EXPORT_SYMBOL_GPL(fixed_phy_register);
 
+void fixed_phy_unregister(struct phy_device *phy)
+{
+	phy_device_remove(phy);
+
+	fixed_phy_del(phy->mdio.addr);
+}
+EXPORT_SYMBOL_GPL(fixed_phy_unregister);
+
 static int __init fixed_mdio_bus_init(void)
 {
 	struct fixed_mdio_bus *fmb = &platform_fmb;
diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 2400d2ea4f34..1d41ec44e39d 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -19,7 +19,7 @@ extern struct phy_device *fixed_phy_register(unsigned int irq,
 					     struct fixed_phy_status *status,
 					     int link_gpio,
 					     struct device_node *np);
-extern void fixed_phy_del(int phy_addr);
+extern void fixed_phy_unregister(struct phy_device *phydev);
 extern int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
 					   struct fixed_phy_status *));
@@ -40,9 +40,8 @@ static inline struct phy_device *fixed_phy_register(unsigned int irq,
 {
 	return ERR_PTR(-ENODEV);
 }
-static inline int fixed_phy_del(int phy_addr)
+static inline void fixed_phy_unregister(struct phy_device *phydev)
 {
-	return -ENODEV;
 }
 static inline int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index f100f340d93f..c28c47463b7e 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -447,11 +447,9 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
 		if (of_phy_is_fixed_link(port_dn)) {
 			phydev = of_phy_find_device(port_dn);
 			if (phydev) {
-				int addr = phydev->mdio.addr;
-
 				phy_device_free(phydev);
 				of_node_put(port_dn);
-				fixed_phy_del(addr);
+				fixed_phy_unregister(phydev);
 			}
 		}
 	}
-- 
cgit v1.2.3


From 7b78f48a0443eceae435870b14e86d586f8c2a3e Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Tue, 15 Mar 2016 14:06:00 +0200
Subject: PCI: Add PCI_CLASS_SERIAL_USB_DEVICE definition

PCI-SIG has defined Interface FEh for Base Class 0Ch, Sub-Class 03h as "USB
Device (not host controller)".  It is already being used in various USB
device controller drivers for matching, so add PCI_CLASS_SERIAL_USB_DEVICE
and use it.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/quirks.c                | 2 +-
 drivers/usb/gadget/udc/amd5536udc.c | 2 +-
 drivers/usb/gadget/udc/goku_udc.c   | 2 +-
 drivers/usb/gadget/udc/net2280.c    | 8 ++++----
 drivers/usb/gadget/udc/pch_udc.c    | 8 ++++----
 include/linux/pci_ids.h             | 1 +
 6 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 0575a1e026b4..5714fcfde302 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -438,7 +438,7 @@ static void quirk_amd_nl_class(struct pci_dev *pdev)
 	u32 class = pdev->class;
 
 	/* Use "USB Device (not host controller)" class */
-	pdev->class = (PCI_CLASS_SERIAL_USB << 8) | 0xfe;
+	pdev->class = PCI_CLASS_SERIAL_USB_DEVICE;
 	dev_info(&pdev->dev, "PCI class overridden (%#08x -> %#08x) so dwc3 driver can claim this instead of xhci\n",
 		 class, pdev->class);
 }
diff --git a/drivers/usb/gadget/udc/amd5536udc.c b/drivers/usb/gadget/udc/amd5536udc.c
index cd8764150861..39d70b4a8958 100644
--- a/drivers/usb/gadget/udc/amd5536udc.c
+++ b/drivers/usb/gadget/udc/amd5536udc.c
@@ -3397,7 +3397,7 @@ err_pcidev:
 static const struct pci_device_id pci_id[] = {
 	{
 		PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x2096),
-		.class =	(PCI_CLASS_SERIAL_USB << 8) | 0xfe,
+		.class =	PCI_CLASS_SERIAL_USB_DEVICE,
 		.class_mask =	0xffffffff,
 	},
 	{},
diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c
index 1fdfec14a3ba..d2205d9e0c8b 100644
--- a/drivers/usb/gadget/udc/goku_udc.c
+++ b/drivers/usb/gadget/udc/goku_udc.c
@@ -1846,7 +1846,7 @@ err:
 /*-------------------------------------------------------------------------*/
 
 static const struct pci_device_id pci_ids[] = { {
-	.class =	((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
+	.class =	PCI_CLASS_SERIAL_USB_DEVICE,
 	.class_mask =	~0,
 	.vendor =	0x102f,		/* Toshiba */
 	.device =	0x0107,		/* this UDC */
diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index 6706aef907f4..c894b94b234b 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -3735,7 +3735,7 @@ static void net2280_shutdown(struct pci_dev *pdev)
 /*-------------------------------------------------------------------------*/
 
 static const struct pci_device_id pci_ids[] = { {
-	.class =	((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
+	.class =	PCI_CLASS_SERIAL_USB_DEVICE,
 	.class_mask =	~0,
 	.vendor =	PCI_VENDOR_ID_PLX_LEGACY,
 	.device =	0x2280,
@@ -3743,7 +3743,7 @@ static const struct pci_device_id pci_ids[] = { {
 	.subdevice =	PCI_ANY_ID,
 	.driver_data =	PLX_LEGACY | PLX_2280,
 	}, {
-	.class =	((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
+	.class =	PCI_CLASS_SERIAL_USB_DEVICE,
 	.class_mask =	~0,
 	.vendor =	PCI_VENDOR_ID_PLX_LEGACY,
 	.device =	0x2282,
@@ -3752,7 +3752,7 @@ static const struct pci_device_id pci_ids[] = { {
 	.driver_data =	PLX_LEGACY,
 	},
 	{
-	.class =	((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
+	.class =	PCI_CLASS_SERIAL_USB_DEVICE,
 	.class_mask =	~0,
 	.vendor =	PCI_VENDOR_ID_PLX,
 	.device =	0x3380,
@@ -3761,7 +3761,7 @@ static const struct pci_device_id pci_ids[] = { {
 	.driver_data =	PLX_SUPERSPEED,
 	 },
 	{
-	.class =	((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
+	.class =	PCI_CLASS_SERIAL_USB_DEVICE,
 	.class_mask =	~0,
 	.vendor =	PCI_VENDOR_ID_PLX,
 	.device =	0x3382,
diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c
index 7a04157ff579..9571ef54b86b 100644
--- a/drivers/usb/gadget/udc/pch_udc.c
+++ b/drivers/usb/gadget/udc/pch_udc.c
@@ -3234,22 +3234,22 @@ static const struct pci_device_id pch_udc_pcidev_id[] = {
 	{
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL,
 			   PCI_DEVICE_ID_INTEL_QUARK_X1000_UDC),
-		.class = (PCI_CLASS_SERIAL_USB << 8) | 0xfe,
+		.class = PCI_CLASS_SERIAL_USB_DEVICE,
 		.class_mask = 0xffffffff,
 	},
 	{
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EG20T_UDC),
-		.class = (PCI_CLASS_SERIAL_USB << 8) | 0xfe,
+		.class = PCI_CLASS_SERIAL_USB_DEVICE,
 		.class_mask = 0xffffffff,
 	},
 	{
 		PCI_DEVICE(PCI_VENDOR_ID_ROHM, PCI_DEVICE_ID_ML7213_IOH_UDC),
-		.class = (PCI_CLASS_SERIAL_USB << 8) | 0xfe,
+		.class = PCI_CLASS_SERIAL_USB_DEVICE,
 		.class_mask = 0xffffffff,
 	},
 	{
 		PCI_DEVICE(PCI_VENDOR_ID_ROHM, PCI_DEVICE_ID_ML7831_IOH_UDC),
-		.class = (PCI_CLASS_SERIAL_USB << 8) | 0xfe,
+		.class = PCI_CLASS_SERIAL_USB_DEVICE,
 		.class_mask = 0xffffffff,
 	},
 	{ 0 },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6d249d32c8b8..247da8c95860 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -110,6 +110,7 @@
 #define PCI_CLASS_SERIAL_USB_OHCI	0x0c0310
 #define PCI_CLASS_SERIAL_USB_EHCI	0x0c0320
 #define PCI_CLASS_SERIAL_USB_XHCI	0x0c0330
+#define PCI_CLASS_SERIAL_USB_DEVICE	0x0c03fe
 #define PCI_CLASS_SERIAL_FIBER		0x0c04
 #define PCI_CLASS_SERIAL_SMBUS		0x0c05
 
-- 
cgit v1.2.3


From fab9963a69dbd71304357dbfe4ec5345f14cebdd Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 15 Mar 2016 14:53:38 -0700
Subject: mm: fault-inject take over bootstrap kmem_cache check

Remove the SLAB specific function slab_should_failslab(), by moving the
check against fault-injection for the bootstrap slab, into the shared
function should_failslab() (used by both SLAB and SLUB).

This is a step towards sharing alloc_hook's between SLUB and SLAB.

This bootstrap slab "kmem_cache" is used for allocating struct
kmem_cache objects to the allocator itself.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fault-inject.h |  5 ++---
 mm/failslab.c                | 12 +++++++++---
 mm/slab.c                    | 12 ++----------
 mm/slab.h                    |  2 +-
 4 files changed, 14 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 3159a7dba034..9f4956d8601c 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -62,10 +62,9 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
 #endif /* CONFIG_FAULT_INJECTION */
 
 #ifdef CONFIG_FAILSLAB
-extern bool should_failslab(size_t size, gfp_t gfpflags, unsigned long flags);
+extern bool should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #else
-static inline bool should_failslab(size_t size, gfp_t gfpflags,
-				unsigned long flags)
+static inline bool should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
 	return false;
 }
diff --git a/mm/failslab.c b/mm/failslab.c
index 79171b4a5826..b0fac98cd938 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -1,5 +1,7 @@
 #include <linux/fault-inject.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
+#include "slab.h"
 
 static struct {
 	struct fault_attr attr;
@@ -11,18 +13,22 @@ static struct {
 	.cache_filter = false,
 };
 
-bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags)
+bool should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
+	/* No fault-injection for bootstrap cache */
+	if (unlikely(s == kmem_cache))
+		return false;
+
 	if (gfpflags & __GFP_NOFAIL)
 		return false;
 
 	if (failslab.ignore_gfp_reclaim && (gfpflags & __GFP_RECLAIM))
 		return false;
 
-	if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB))
+	if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
 		return false;
 
-	return should_fail(&failslab.attr, size);
+	return should_fail(&failslab.attr, s->object_size);
 }
 
 static int __init setup_failslab(char *str)
diff --git a/mm/slab.c b/mm/slab.c
index 621fbcb35a36..95f344d79453 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2926,14 +2926,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
 #endif
 
-static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
-{
-	if (unlikely(cachep == kmem_cache))
-		return false;
-
-	return should_failslab(cachep->object_size, flags, cachep->flags);
-}
-
 static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 {
 	void *objp;
@@ -3155,7 +3147,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 
 	lockdep_trace_alloc(flags);
 
-	if (slab_should_failslab(cachep, flags))
+	if (should_failslab(cachep, flags))
 		return NULL;
 
 	cachep = memcg_kmem_get_cache(cachep, flags);
@@ -3243,7 +3235,7 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
 
 	lockdep_trace_alloc(flags);
 
-	if (slab_should_failslab(cachep, flags))
+	if (should_failslab(cachep, flags))
 		return NULL;
 
 	cachep = memcg_kmem_get_cache(cachep, flags);
diff --git a/mm/slab.h b/mm/slab.h
index fd231c9f5f93..6c7f16a44386 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -360,7 +360,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
 	lockdep_trace_alloc(flags);
 	might_sleep_if(gfpflags_allow_blocking(flags));
 
-	if (should_failslab(s->object_size, flags, s->flags))
+	if (should_failslab(s, flags))
 		return NULL;
 
 	return memcg_kmem_get_cache(s, flags);
-- 
cgit v1.2.3


From ca257195511d536308700548de008b51729221eb Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 15 Mar 2016 14:54:00 -0700
Subject: mm: new API kfree_bulk() for SLAB+SLUB allocators

This patch introduce a new API call kfree_bulk() for bulk freeing memory
objects not bound to a single kmem_cache.

Christoph pointed out that it is possible to implement freeing of
objects, without knowing the kmem_cache pointer as that information is
available from the object's page->slab_cache.  Proposing to remove the
kmem_cache argument from the bulk free API.

Jesper demonstrated that these extra steps per object comes at a
performance cost.  It is only in the case CONFIG_MEMCG_KMEM is compiled
in and activated runtime that these steps are done anyhow.  The extra
cost is most visible for SLAB allocator, because the SLUB allocator does
the page lookup (virt_to_head_page()) anyhow.

Thus, the conclusion was to keep the kmem_cache free bulk API with a
kmem_cache pointer, but we can still implement a kfree_bulk() API fairly
easily.  Simply by handling if kmem_cache_free_bulk() gets called with a
kmem_cache NULL pointer.

This does increase the code size a bit, but implementing a separate
kfree_bulk() call would likely increase code size even more.

Below benchmarks cost of alloc+free (obj size 256 bytes) on CPU i7-4790K
@ 4.00GHz, no PREEMPT and CONFIG_MEMCG_KMEM=y.

Code size increase for SLAB:

 add/remove: 0/0 grow/shrink: 1/0 up/down: 74/0 (74)
 function                                     old     new   delta
 kmem_cache_free_bulk                         660     734     +74

SLAB fastpath: 87 cycles(tsc) 21.814
  sz - fallback             - kmem_cache_free_bulk - kfree_bulk
   1 - 103 cycles 25.878 ns -  41 cycles 10.498 ns - 81 cycles 20.312 ns
   2 -  94 cycles 23.673 ns -  26 cycles  6.682 ns - 42 cycles 10.649 ns
   3 -  92 cycles 23.181 ns -  21 cycles  5.325 ns - 39 cycles 9.950 ns
   4 -  90 cycles 22.727 ns -  18 cycles  4.673 ns - 26 cycles 6.693 ns
   8 -  89 cycles 22.270 ns -  14 cycles  3.664 ns - 23 cycles 5.835 ns
  16 -  88 cycles 22.038 ns -  14 cycles  3.503 ns - 22 cycles 5.543 ns
  30 -  89 cycles 22.284 ns -  13 cycles  3.310 ns - 20 cycles 5.197 ns
  32 -  88 cycles 22.249 ns -  13 cycles  3.420 ns - 20 cycles 5.166 ns
  34 -  88 cycles 22.224 ns -  14 cycles  3.643 ns - 20 cycles 5.170 ns
  48 -  88 cycles 22.088 ns -  14 cycles  3.507 ns - 20 cycles 5.203 ns
  64 -  88 cycles 22.063 ns -  13 cycles  3.428 ns - 20 cycles 5.152 ns
 128 -  89 cycles 22.483 ns -  15 cycles  3.891 ns - 23 cycles 5.885 ns
 158 -  89 cycles 22.381 ns -  15 cycles  3.779 ns - 22 cycles 5.548 ns
 250 -  91 cycles 22.798 ns -  16 cycles  4.152 ns - 23 cycles 5.967 ns

SLAB when enabling MEMCG_KMEM runtime:
 - kmemcg fastpath: 130 cycles(tsc) 32.684 ns (step:0)
 1 - 148 cycles 37.220 ns -  66 cycles 16.622 ns - 66 cycles 16.583 ns
 2 - 141 cycles 35.510 ns -  51 cycles 12.820 ns - 58 cycles 14.625 ns
 3 - 140 cycles 35.017 ns -  37 cycles 9.326 ns - 33 cycles 8.474 ns
 4 - 137 cycles 34.507 ns -  31 cycles 7.888 ns - 33 cycles 8.300 ns
 8 - 140 cycles 35.069 ns -  25 cycles 6.461 ns - 25 cycles 6.436 ns
 16 - 138 cycles 34.542 ns -  23 cycles 5.945 ns - 22 cycles 5.670 ns
 30 - 136 cycles 34.227 ns -  22 cycles 5.502 ns - 22 cycles 5.587 ns
 32 - 136 cycles 34.253 ns -  21 cycles 5.475 ns - 21 cycles 5.324 ns
 34 - 136 cycles 34.254 ns -  21 cycles 5.448 ns - 20 cycles 5.194 ns
 48 - 136 cycles 34.075 ns -  21 cycles 5.458 ns - 21 cycles 5.367 ns
 64 - 135 cycles 33.994 ns -  21 cycles 5.350 ns - 21 cycles 5.259 ns
 128 - 137 cycles 34.446 ns -  23 cycles 5.816 ns - 22 cycles 5.688 ns
 158 - 137 cycles 34.379 ns -  22 cycles 5.727 ns - 22 cycles 5.602 ns
 250 - 138 cycles 34.755 ns -  24 cycles 6.093 ns - 23 cycles 5.986 ns

Code size increase for SLUB:
 function                                     old     new   delta
 kmem_cache_free_bulk                         717     799     +82

SLUB benchmark:
 SLUB fastpath: 46 cycles(tsc) 11.691 ns (step:0)
  sz - fallback             - kmem_cache_free_bulk - kfree_bulk
   1 -  61 cycles 15.486 ns -  53 cycles 13.364 ns - 57 cycles 14.464 ns
   2 -  54 cycles 13.703 ns -  32 cycles  8.110 ns - 33 cycles 8.482 ns
   3 -  53 cycles 13.272 ns -  25 cycles  6.362 ns - 27 cycles 6.947 ns
   4 -  51 cycles 12.994 ns -  24 cycles  6.087 ns - 24 cycles 6.078 ns
   8 -  50 cycles 12.576 ns -  21 cycles  5.354 ns - 22 cycles 5.513 ns
  16 -  49 cycles 12.368 ns -  20 cycles  5.054 ns - 20 cycles 5.042 ns
  30 -  49 cycles 12.273 ns -  18 cycles  4.748 ns - 19 cycles 4.758 ns
  32 -  49 cycles 12.401 ns -  19 cycles  4.821 ns - 19 cycles 4.810 ns
  34 -  98 cycles 24.519 ns -  24 cycles  6.154 ns - 24 cycles 6.157 ns
  48 -  83 cycles 20.833 ns -  21 cycles  5.446 ns - 21 cycles 5.429 ns
  64 -  75 cycles 18.891 ns -  20 cycles  5.247 ns - 20 cycles 5.238 ns
 128 -  93 cycles 23.271 ns -  27 cycles  6.856 ns - 27 cycles 6.823 ns
 158 - 102 cycles 25.581 ns -  30 cycles  7.714 ns - 30 cycles 7.695 ns
 250 - 107 cycles 26.917 ns -  38 cycles  9.514 ns - 38 cycles 9.506 ns

SLUB when enabling MEMCG_KMEM runtime:
 - kmemcg fastpath: 71 cycles(tsc) 17.897 ns (step:0)
 1 - 85 cycles 21.484 ns -  78 cycles 19.569 ns - 75 cycles 18.938 ns
 2 - 81 cycles 20.363 ns -  45 cycles 11.258 ns - 44 cycles 11.076 ns
 3 - 78 cycles 19.709 ns -  33 cycles 8.354 ns - 32 cycles 8.044 ns
 4 - 77 cycles 19.430 ns -  28 cycles 7.216 ns - 28 cycles 7.003 ns
 8 - 101 cycles 25.288 ns -  23 cycles 5.849 ns - 23 cycles 5.787 ns
 16 - 76 cycles 19.148 ns -  20 cycles 5.162 ns - 20 cycles 5.081 ns
 30 - 76 cycles 19.067 ns -  19 cycles 4.868 ns - 19 cycles 4.821 ns
 32 - 76 cycles 19.052 ns -  19 cycles 4.857 ns - 19 cycles 4.815 ns
 34 - 121 cycles 30.291 ns -  25 cycles 6.333 ns - 25 cycles 6.268 ns
 48 - 108 cycles 27.111 ns -  21 cycles 5.498 ns - 21 cycles 5.458 ns
 64 - 100 cycles 25.164 ns -  20 cycles 5.242 ns - 20 cycles 5.229 ns
 128 - 155 cycles 38.976 ns -  27 cycles 6.886 ns - 27 cycles 6.892 ns
 158 - 132 cycles 33.034 ns -  30 cycles 7.711 ns - 30 cycles 7.728 ns
 250 - 130 cycles 32.612 ns -  38 cycles 9.560 ns - 38 cycles 9.549 ns

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h |  9 +++++++++
 mm/slab.c            |  5 ++++-
 mm/slab_common.c     |  8 ++++++--
 mm/slub.c            | 21 ++++++++++++++++++---
 4 files changed, 37 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 3627d5c1bc47..9d9a5bdb9b00 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -323,6 +323,15 @@ void kmem_cache_free(struct kmem_cache *, void *);
 void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
 int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
 
+/*
+ * Caller must not use kfree_bulk() on memory not originally allocated
+ * by kmalloc(), because the SLOB allocator cannot handle this.
+ */
+static __always_inline void kfree_bulk(size_t size, void **p)
+{
+	kmem_cache_free_bulk(NULL, size, p);
+}
+
 #ifdef CONFIG_NUMA
 void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment;
 void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment;
diff --git a/mm/slab.c b/mm/slab.c
index 2e075c0acfcf..b3eca034d0b4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3587,7 +3587,10 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
 	for (i = 0; i < size; i++) {
 		void *objp = p[i];
 
-		s = cache_from_obj(orig_s, objp);
+		if (!orig_s) /* called via kfree_bulk */
+			s = virt_to_cache(objp);
+		else
+			s = cache_from_obj(orig_s, objp);
 
 		debug_check_no_locks_freed(objp, s->object_size);
 		if (!(s->flags & SLAB_DEBUG_OBJECTS))
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 065b7bdabdc3..6afb2263a5c5 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -109,8 +109,12 @@ void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
 {
 	size_t i;
 
-	for (i = 0; i < nr; i++)
-		kmem_cache_free(s, p[i]);
+	for (i = 0; i < nr; i++) {
+		if (s)
+			kmem_cache_free(s, p[i]);
+		else
+			kfree(p[i]);
+	}
 }
 
 int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
diff --git a/mm/slub.c b/mm/slub.c
index 6dd04c0465c5..9620815da342 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2783,23 +2783,38 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
 	size_t first_skipped_index = 0;
 	int lookahead = 3;
 	void *object;
+	struct page *page;
 
 	/* Always re-init detached_freelist */
 	df->page = NULL;
 
 	do {
 		object = p[--size];
+		/* Do we need !ZERO_OR_NULL_PTR(object) here? (for kfree) */
 	} while (!object && size);
 
 	if (!object)
 		return 0;
 
-	/* Support for memcg, compiler can optimize this out */
-	df->s = cache_from_obj(s, object);
+	page = virt_to_head_page(object);
+	if (!s) {
+		/* Handle kalloc'ed objects */
+		if (unlikely(!PageSlab(page))) {
+			BUG_ON(!PageCompound(page));
+			kfree_hook(object);
+			__free_kmem_pages(page, compound_order(page));
+			p[size] = NULL; /* mark object processed */
+			return size;
+		}
+		/* Derive kmem_cache from object */
+		df->s = page->slab_cache;
+	} else {
+		df->s = cache_from_obj(s, object); /* Support for memcg */
+	}
 
 	/* Start new detached freelist */
+	df->page = page;
 	set_freepointer(df->s, object, NULL);
-	df->page = virt_to_head_page(object);
 	df->tail = object;
 	df->freelist = object;
 	p[size] = NULL; /* mark object processed */
-- 
cgit v1.2.3


From 9f706d6820d3ea776d6b3fc0c1de9f81eb0d021b Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 15 Mar 2016 14:54:03 -0700
Subject: mm: fix some spelling

Fix up trivial spelling errors, noticed while reading the code.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 +-
 include/linux/slab.h       | 2 +-
 mm/slab.h                  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 792c8981e633..30b02e79610e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -765,7 +765,7 @@ int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
 void __memcg_kmem_uncharge(struct page *page, int order);
 
 /*
- * helper for acessing a memcg's index. It will be used as an index in the
+ * helper for accessing a memcg's index. It will be used as an index in the
  * child cache array in kmem_cache, and also to derive its name. This function
  * will return -1 when this is not a kmem-limited memcg.
  */
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9d9a5bdb9b00..5d49f0c60dcb 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -314,7 +314,7 @@ void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment
 void kmem_cache_free(struct kmem_cache *, void *);
 
 /*
- * Bulk allocation and freeing operations. These are accellerated in an
+ * Bulk allocation and freeing operations. These are accelerated in an
  * allocator specific way to avoid taking locks repeatedly or building
  * metadata structures unnecessarily.
  *
diff --git a/mm/slab.h b/mm/slab.h
index 6c7f16a44386..e880bbe91973 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -172,7 +172,7 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer,
 /*
  * Generic implementation of bulk operations
  * These are useful for situations in which the allocator cannot
- * perform optimizations. In that case segments of the objecct listed
+ * perform optimizations. In that case segments of the object listed
  * may be allocated or freed using these operations.
  */
 void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
-- 
cgit v1.2.3


From 40b44137971c2e5865a78f9f7de274449983ccb5 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Tue, 15 Mar 2016 14:54:21 -0700
Subject: mm/slab: clean up DEBUG_PAGEALLOC processing code

Currently, open code for checking DEBUG_PAGEALLOC cache is spread to
some sites.  It makes code unreadable and hard to change.

This patch cleans up this code.  The following patch will change the
criteria for DEBUG_PAGEALLOC cache so this clean-up will help it, too.

[akpm@linux-foundation.org: fix build with CONFIG_DEBUG_PAGEALLOC=n]
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 12 ++++---
 mm/slab.c          | 97 +++++++++++++++++++++++++++---------------------------
 2 files changed, 57 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2b6e22782699..69fd6bbb8cce 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2195,14 +2195,18 @@ kernel_map_pages(struct page *page, int numpages, int enable)
 }
 #ifdef CONFIG_HIBERNATION
 extern bool kernel_page_present(struct page *page);
-#endif /* CONFIG_HIBERNATION */
-#else
+#endif	/* CONFIG_HIBERNATION */
+#else	/* CONFIG_DEBUG_PAGEALLOC */
 static inline void
 kernel_map_pages(struct page *page, int numpages, int enable) {}
 #ifdef CONFIG_HIBERNATION
 static inline bool kernel_page_present(struct page *page) { return true; }
-#endif /* CONFIG_HIBERNATION */
-#endif
+#endif	/* CONFIG_HIBERNATION */
+static inline bool debug_pagealloc_enabled(void)
+{
+	return false;
+}
+#endif	/* CONFIG_DEBUG_PAGEALLOC */
 
 #ifdef __HAVE_ARCH_GATE_AREA
 extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
diff --git a/mm/slab.c b/mm/slab.c
index 8bca9be5d557..3142ec3965cf 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1661,6 +1661,14 @@ static void kmem_rcu_free(struct rcu_head *head)
 }
 
 #if DEBUG
+static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
+{
+	if (debug_pagealloc_enabled() && OFF_SLAB(cachep) &&
+		(cachep->size % PAGE_SIZE) == 0)
+		return true;
+
+	return false;
+}
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
 static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
@@ -1694,6 +1702,23 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
 	}
 	*addr++ = 0x87654321;
 }
+
+static void slab_kernel_map(struct kmem_cache *cachep, void *objp,
+				int map, unsigned long caller)
+{
+	if (!is_debug_pagealloc_cache(cachep))
+		return;
+
+	if (caller)
+		store_stackinfo(cachep, objp, caller);
+
+	kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
+}
+
+#else
+static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
+				int map, unsigned long caller) {}
+
 #endif
 
 static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
@@ -1772,6 +1797,9 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
 	int size, i;
 	int lines = 0;
 
+	if (is_debug_pagealloc_cache(cachep))
+		return;
+
 	realobj = (char *)objp + obj_offset(cachep);
 	size = cachep->object_size;
 
@@ -1837,17 +1865,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
 		void *objp = index_to_obj(cachep, page, i);
 
 		if (cachep->flags & SLAB_POISON) {
-#ifdef CONFIG_DEBUG_PAGEALLOC
-			if (debug_pagealloc_enabled() &&
-				cachep->size % PAGE_SIZE == 0 &&
-					OFF_SLAB(cachep))
-				kernel_map_pages(virt_to_page(objp),
-					cachep->size / PAGE_SIZE, 1);
-			else
-				check_poison_obj(cachep, objp);
-#else
 			check_poison_obj(cachep, objp);
-#endif
+			slab_kernel_map(cachep, objp, 1, 0);
 		}
 		if (cachep->flags & SLAB_RED_ZONE) {
 			if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
@@ -2226,16 +2245,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	if (flags & CFLGS_OFF_SLAB) {
 		/* really off slab. No need for manual alignment */
 		freelist_size = calculate_freelist_size(cachep->num, 0);
-
-#ifdef CONFIG_PAGE_POISONING
-		/* If we're going to use the generic kernel_map_pages()
-		 * poisoning, then it's going to smash the contents of
-		 * the redzone and userword anyhow, so switch them off.
-		 */
-		if (debug_pagealloc_enabled() &&
-			size % PAGE_SIZE == 0 && flags & SLAB_POISON)
-			flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
-#endif
 	}
 
 	cachep->colour_off = cache_line_size();
@@ -2251,7 +2260,19 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	cachep->size = size;
 	cachep->reciprocal_buffer_size = reciprocal_value(size);
 
-	if (flags & CFLGS_OFF_SLAB) {
+#if DEBUG
+	/*
+	 * If we're going to use the generic kernel_map_pages()
+	 * poisoning, then it's going to smash the contents of
+	 * the redzone and userword anyhow, so switch them off.
+	 */
+	if (IS_ENABLED(CONFIG_PAGE_POISONING) &&
+		(cachep->flags & SLAB_POISON) &&
+		is_debug_pagealloc_cache(cachep))
+		cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
+#endif
+
+	if (OFF_SLAB(cachep)) {
 		cachep->freelist_cache = kmalloc_slab(freelist_size, 0u);
 		/*
 		 * This is a possibility for one of the kmalloc_{dma,}_caches.
@@ -2475,9 +2496,6 @@ static void cache_init_objs(struct kmem_cache *cachep,
 	for (i = 0; i < cachep->num; i++) {
 		void *objp = index_to_obj(cachep, page, i);
 #if DEBUG
-		/* need to poison the objs? */
-		if (cachep->flags & SLAB_POISON)
-			poison_obj(cachep, objp, POISON_FREE);
 		if (cachep->flags & SLAB_STORE_USER)
 			*dbg_userword(cachep, objp) = NULL;
 
@@ -2501,10 +2519,11 @@ static void cache_init_objs(struct kmem_cache *cachep,
 				slab_error(cachep, "constructor overwrote the"
 					   " start of an object");
 		}
-		if ((cachep->size % PAGE_SIZE) == 0 &&
-			    OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
-			kernel_map_pages(virt_to_page(objp),
-					 cachep->size / PAGE_SIZE, 0);
+		/* need to poison the objs? */
+		if (cachep->flags & SLAB_POISON) {
+			poison_obj(cachep, objp, POISON_FREE);
+			slab_kernel_map(cachep, objp, 0, 0);
+		}
 #else
 		if (cachep->ctor)
 			cachep->ctor(objp);
@@ -2716,18 +2735,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
 
 	set_obj_status(page, objnr, OBJECT_FREE);
 	if (cachep->flags & SLAB_POISON) {
-#ifdef CONFIG_DEBUG_PAGEALLOC
-		if (debug_pagealloc_enabled() &&
-			(cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) {
-			store_stackinfo(cachep, objp, caller);
-			kernel_map_pages(virt_to_page(objp),
-					 cachep->size / PAGE_SIZE, 0);
-		} else {
-			poison_obj(cachep, objp, POISON_FREE);
-		}
-#else
 		poison_obj(cachep, objp, POISON_FREE);
-#endif
+		slab_kernel_map(cachep, objp, 0, caller);
 	}
 	return objp;
 }
@@ -2862,16 +2871,8 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 	if (!objp)
 		return objp;
 	if (cachep->flags & SLAB_POISON) {
-#ifdef CONFIG_DEBUG_PAGEALLOC
-		if (debug_pagealloc_enabled() &&
-			(cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
-			kernel_map_pages(virt_to_page(objp),
-					 cachep->size / PAGE_SIZE, 1);
-		else
-			check_poison_obj(cachep, objp);
-#else
 		check_poison_obj(cachep, objp);
-#endif
+		slab_kernel_map(cachep, objp, 1, 0);
 		poison_obj(cachep, objp, POISON_INUSE);
 	}
 	if (cachep->flags & SLAB_STORE_USER)
-- 
cgit v1.2.3


From d31676dfde257cb2b3e52d4e657d8ad2251e4d49 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Tue, 15 Mar 2016 14:54:24 -0700
Subject: mm/slab: alternative implementation for DEBUG_SLAB_LEAK

DEBUG_SLAB_LEAK is a debug option.  It's current implementation requires
status buffer so we need more memory to use it.  And, it cause
kmem_cache initialization step more complex.

To remove this extra memory usage and to simplify initialization step,
this patch implement this feature with another way.

When user requests to get slab object owner information, it marks that
getting information is started.  And then, all free objects in caches
are flushed to corresponding slab page.  Now, we can distinguish all
freed object so we can know all allocated objects, too.  After
collecting slab object owner information on allocated objects, mark is
checked that there is no free during the processing.  If true, we can be
sure that our information is correct so information is returned to user.

Although this way is rather complex, it has two important benefits
mentioned above.  So, I think it is worth changing.

There is one drawback that it takes more time to get slab object owner
information but it is just a debug option so it doesn't matter at all.

To help review, this patch implements new way only.  Following patch
will remove useless code.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab_def.h |  3 ++
 mm/slab.c                | 85 +++++++++++++++++++++++++++++++++++-------------
 2 files changed, 66 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index cf139d3fa513..e878ba35ae91 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -60,6 +60,9 @@ struct kmem_cache {
 	atomic_t allocmiss;
 	atomic_t freehit;
 	atomic_t freemiss;
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+	atomic_t store_user_clean;
+#endif
 
 	/*
 	 * If debugging is enabled, then the allocator can add additional
diff --git a/mm/slab.c b/mm/slab.c
index 3142ec3965cf..907abe9964bf 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -396,20 +396,25 @@ static void set_obj_status(struct page *page, int idx, int val)
 	status[idx] = val;
 }
 
-static inline unsigned int get_obj_status(struct page *page, int idx)
+static inline bool is_store_user_clean(struct kmem_cache *cachep)
 {
-	int freelist_size;
-	char *status;
-	struct kmem_cache *cachep = page->slab_cache;
+	return atomic_read(&cachep->store_user_clean) == 1;
+}
 
-	freelist_size = cachep->num * sizeof(freelist_idx_t);
-	status = (char *)page->freelist + freelist_size;
+static inline void set_store_user_clean(struct kmem_cache *cachep)
+{
+	atomic_set(&cachep->store_user_clean, 1);
+}
 
-	return status[idx];
+static inline void set_store_user_dirty(struct kmem_cache *cachep)
+{
+	if (is_store_user_clean(cachep))
+		atomic_set(&cachep->store_user_clean, 0);
 }
 
 #else
 static inline void set_obj_status(struct page *page, int idx, int val) {}
+static inline void set_store_user_dirty(struct kmem_cache *cachep) {}
 
 #endif
 
@@ -2550,6 +2555,11 @@ static void *slab_get_obj(struct kmem_cache *cachep, struct page *page)
 	objp = index_to_obj(cachep, page, get_free_obj(page, page->active));
 	page->active++;
 
+#if DEBUG
+	if (cachep->flags & SLAB_STORE_USER)
+		set_store_user_dirty(cachep);
+#endif
+
 	return objp;
 }
 
@@ -2725,8 +2735,10 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
 		*dbg_redzone1(cachep, objp) = RED_INACTIVE;
 		*dbg_redzone2(cachep, objp) = RED_INACTIVE;
 	}
-	if (cachep->flags & SLAB_STORE_USER)
+	if (cachep->flags & SLAB_STORE_USER) {
+		set_store_user_dirty(cachep);
 		*dbg_userword(cachep, objp) = (void *)caller;
+	}
 
 	objnr = obj_to_index(cachep, page, objp);
 
@@ -4119,15 +4131,34 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c,
 						struct page *page)
 {
 	void *p;
-	int i;
+	int i, j;
+	unsigned long v;
 
 	if (n[0] == n[1])
 		return;
 	for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) {
-		if (get_obj_status(page, i) != OBJECT_ACTIVE)
+		bool active = true;
+
+		for (j = page->active; j < c->num; j++) {
+			if (get_free_obj(page, j) == i) {
+				active = false;
+				break;
+			}
+		}
+
+		if (!active)
 			continue;
 
-		if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
+		/*
+		 * probe_kernel_read() is used for DEBUG_PAGEALLOC. page table
+		 * mapping is established when actual object allocation and
+		 * we could mistakenly access the unmapped object in the cpu
+		 * cache.
+		 */
+		if (probe_kernel_read(&v, dbg_userword(c, p), sizeof(v)))
+			continue;
+
+		if (!add_caller(n, v))
 			return;
 	}
 }
@@ -4163,21 +4194,31 @@ static int leaks_show(struct seq_file *m, void *p)
 	if (!(cachep->flags & SLAB_RED_ZONE))
 		return 0;
 
-	/* OK, we can do it */
+	/*
+	 * Set store_user_clean and start to grab stored user information
+	 * for all objects on this cache. If some alloc/free requests comes
+	 * during the processing, information would be wrong so restart
+	 * whole processing.
+	 */
+	do {
+		set_store_user_clean(cachep);
+		drain_cpu_caches(cachep);
+
+		x[1] = 0;
 
-	x[1] = 0;
+		for_each_kmem_cache_node(cachep, node, n) {
 
-	for_each_kmem_cache_node(cachep, node, n) {
+			check_irq_on();
+			spin_lock_irq(&n->list_lock);
 
-		check_irq_on();
-		spin_lock_irq(&n->list_lock);
+			list_for_each_entry(page, &n->slabs_full, lru)
+				handle_slab(x, cachep, page);
+			list_for_each_entry(page, &n->slabs_partial, lru)
+				handle_slab(x, cachep, page);
+			spin_unlock_irq(&n->list_lock);
+		}
+	} while (!is_store_user_clean(cachep));
 
-		list_for_each_entry(page, &n->slabs_full, lru)
-			handle_slab(x, cachep, page);
-		list_for_each_entry(page, &n->slabs_partial, lru)
-			handle_slab(x, cachep, page);
-		spin_unlock_irq(&n->list_lock);
-	}
 	name = cachep->name;
 	if (x[0] == x[1]) {
 		/* Increase the buffer size */
-- 
cgit v1.2.3


From becfda68abca673d61d5cc953e8e099816db99d9 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@fedoraproject.org>
Date: Tue, 15 Mar 2016 14:55:06 -0700
Subject: slub: convert SLAB_DEBUG_FREE to SLAB_CONSISTENCY_CHECKS

SLAB_DEBUG_FREE allows expensive consistency checks at free to be turned
on or off.  Expand its use to be able to turn off all consistency
checks.  This gives a nice speed up if you only want features such as
poisoning or tracing.

Credit to Mathias Krause for the original work which inspired this
series

Signed-off-by: Laura Abbott <labbott@fedoraproject.org>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/slub.txt |  4 +-
 include/linux/slab.h      |  2 +-
 mm/slab.h                 |  5 ++-
 mm/slub.c                 | 94 +++++++++++++++++++++++++++++------------------
 tools/vm/slabinfo.c       |  2 +-
 5 files changed, 66 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt
index f0d340959319..84652419bff2 100644
--- a/Documentation/vm/slub.txt
+++ b/Documentation/vm/slub.txt
@@ -35,8 +35,8 @@ slub_debug=<Debug-Options>,<slab name>
 				Enable options only for select slabs
 
 Possible debug options are
-	F		Sanity checks on (enables SLAB_DEBUG_FREE. Sorry
-			SLAB legacy issues)
+	F		Sanity checks on (enables SLAB_DEBUG_CONSISTENCY_CHECKS
+			Sorry SLAB legacy issues)
 	Z		Red zoning
 	P		Poisoning (object and padding)
 	U		User tracking (free and alloc)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 5d49f0c60dcb..e4b568738ca3 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -20,7 +20,7 @@
  * Flags to pass to kmem_cache_create().
  * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set.
  */
-#define SLAB_DEBUG_FREE		0x00000100UL	/* DEBUG: Perform (expensive) checks on free */
+#define SLAB_CONSISTENCY_CHECKS	0x00000100UL	/* DEBUG: Perform (expensive) checks on alloc/free */
 #define SLAB_RED_ZONE		0x00000400UL	/* DEBUG: Red zone objs in a cache */
 #define SLAB_POISON		0x00000800UL	/* DEBUG: Poison objects */
 #define SLAB_HWCACHE_ALIGN	0x00002000UL	/* Align objs on cache lines */
diff --git a/mm/slab.h b/mm/slab.h
index e880bbe91973..b7934361f026 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -125,7 +125,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size,
 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
 #elif defined(CONFIG_SLUB_DEBUG)
 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
-			  SLAB_TRACE | SLAB_DEBUG_FREE)
+			  SLAB_TRACE | SLAB_CONSISTENCY_CHECKS)
 #else
 #define SLAB_DEBUG_FLAGS (0)
 #endif
@@ -311,7 +311,8 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
 	 * to not do even the assignment. In that case, slab_equal_or_root
 	 * will also be a constant.
 	 */
-	if (!memcg_kmem_enabled() && !unlikely(s->flags & SLAB_DEBUG_FREE))
+	if (!memcg_kmem_enabled() &&
+	    !unlikely(s->flags & SLAB_CONSISTENCY_CHECKS))
 		return s;
 
 	page = virt_to_head_page(x);
diff --git a/mm/slub.c b/mm/slub.c
index 744d29b43bf6..9cde663bbb10 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -160,7 +160,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
  */
 #define MAX_PARTIAL 10
 
-#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
+#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
 				SLAB_POISON | SLAB_STORE_USER)
 
 /*
@@ -1007,20 +1007,32 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
 	init_tracking(s, object);
 }
 
-static noinline int alloc_debug_processing(struct kmem_cache *s,
+static inline int alloc_consistency_checks(struct kmem_cache *s,
 					struct page *page,
 					void *object, unsigned long addr)
 {
 	if (!check_slab(s, page))
-		goto bad;
+		return 0;
 
 	if (!check_valid_pointer(s, page, object)) {
 		object_err(s, page, object, "Freelist Pointer check fails");
-		goto bad;
+		return 0;
 	}
 
 	if (!check_object(s, page, object, SLUB_RED_INACTIVE))
-		goto bad;
+		return 0;
+
+	return 1;
+}
+
+static noinline int alloc_debug_processing(struct kmem_cache *s,
+					struct page *page,
+					void *object, unsigned long addr)
+{
+	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
+		if (!alloc_consistency_checks(s, page, object, addr))
+			goto bad;
+	}
 
 	/* Success perform special debug activities for allocs */
 	if (s->flags & SLAB_STORE_USER)
@@ -1043,39 +1055,21 @@ bad:
 	return 0;
 }
 
-/* Supports checking bulk free of a constructed freelist */
-static noinline int free_debug_processing(
-	struct kmem_cache *s, struct page *page,
-	void *head, void *tail, int bulk_cnt,
-	unsigned long addr)
+static inline int free_consistency_checks(struct kmem_cache *s,
+		struct page *page, void *object, unsigned long addr)
 {
-	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
-	void *object = head;
-	int cnt = 0;
-	unsigned long uninitialized_var(flags);
-	int ret = 0;
-
-	spin_lock_irqsave(&n->list_lock, flags);
-	slab_lock(page);
-
-	if (!check_slab(s, page))
-		goto out;
-
-next_object:
-	cnt++;
-
 	if (!check_valid_pointer(s, page, object)) {
 		slab_err(s, page, "Invalid object pointer 0x%p", object);
-		goto out;
+		return 0;
 	}
 
 	if (on_freelist(s, page, object)) {
 		object_err(s, page, object, "Object already free");
-		goto out;
+		return 0;
 	}
 
 	if (!check_object(s, page, object, SLUB_RED_ACTIVE))
-		goto out;
+		return 0;
 
 	if (unlikely(s != page->slab_cache)) {
 		if (!PageSlab(page)) {
@@ -1088,7 +1082,37 @@ next_object:
 		} else
 			object_err(s, page, object,
 					"page slab pointer corrupt.");
-		goto out;
+		return 0;
+	}
+	return 1;
+}
+
+/* Supports checking bulk free of a constructed freelist */
+static noinline int free_debug_processing(
+	struct kmem_cache *s, struct page *page,
+	void *head, void *tail, int bulk_cnt,
+	unsigned long addr)
+{
+	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+	void *object = head;
+	int cnt = 0;
+	unsigned long uninitialized_var(flags);
+	int ret = 0;
+
+	spin_lock_irqsave(&n->list_lock, flags);
+	slab_lock(page);
+
+	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
+		if (!check_slab(s, page))
+			goto out;
+	}
+
+next_object:
+	cnt++;
+
+	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
+		if (!free_consistency_checks(s, page, object, addr))
+			goto out;
 	}
 
 	if (s->flags & SLAB_STORE_USER)
@@ -1145,7 +1169,7 @@ static int __init setup_slub_debug(char *str)
 	for (; *str && *str != ','; str++) {
 		switch (tolower(*str)) {
 		case 'f':
-			slub_debug |= SLAB_DEBUG_FREE;
+			slub_debug |= SLAB_CONSISTENCY_CHECKS;
 			break;
 		case 'z':
 			slub_debug |= SLAB_RED_ZONE;
@@ -1449,7 +1473,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 	int order = compound_order(page);
 	int pages = 1 << order;
 
-	if (kmem_cache_debug(s)) {
+	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
 		void *p;
 
 		slab_pad_check(s, page);
@@ -4769,16 +4793,16 @@ SLAB_ATTR_RO(total_objects);
 
 static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
 {
-	return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
+	return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
 }
 
 static ssize_t sanity_checks_store(struct kmem_cache *s,
 				const char *buf, size_t length)
 {
-	s->flags &= ~SLAB_DEBUG_FREE;
+	s->flags &= ~SLAB_CONSISTENCY_CHECKS;
 	if (buf[0] == '1') {
 		s->flags &= ~__CMPXCHG_DOUBLE;
-		s->flags |= SLAB_DEBUG_FREE;
+		s->flags |= SLAB_CONSISTENCY_CHECKS;
 	}
 	return length;
 }
@@ -5313,7 +5337,7 @@ static char *create_unique_id(struct kmem_cache *s)
 		*p++ = 'd';
 	if (s->flags & SLAB_RECLAIM_ACCOUNT)
 		*p++ = 'a';
-	if (s->flags & SLAB_DEBUG_FREE)
+	if (s->flags & SLAB_CONSISTENCY_CHECKS)
 		*p++ = 'F';
 	if (!(s->flags & SLAB_NOTRACK))
 		*p++ = 't';
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
index 86e698d07e20..1889163f2f05 100644
--- a/tools/vm/slabinfo.c
+++ b/tools/vm/slabinfo.c
@@ -135,7 +135,7 @@ static void usage(void)
 		"\nValid debug options (FZPUT may be combined)\n"
 		"a / A          Switch on all debug options (=FZUP)\n"
 		"-              Switch off all debug options\n"
-		"f / F          Sanity Checks (SLAB_DEBUG_FREE)\n"
+		"f / F          Sanity Checks (SLAB_CONSISTENCY_CHECKS)\n"
 		"z / Z          Redzoning\n"
 		"p / P          Poisoning\n"
 		"u / U          Tracking\n"
-- 
cgit v1.2.3


From d86bd1bece6fc41d59253002db5441fe960a37f6 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Tue, 15 Mar 2016 14:55:12 -0700
Subject: mm/slub: support left redzone

SLUB already has a redzone debugging feature.  But it is only positioned
at the end of object (aka right redzone) so it cannot catch left oob.
Although current object's right redzone acts as left redzone of next
object, first object in a slab cannot take advantage of this effect.
This patch explicitly adds a left red zone to each object to detect left
oob more precisely.

Background:

Someone complained to me that left OOB doesn't catch even if KASAN is
enabled which does page allocation debugging.  That page is out of our
control so it would be allocated when left OOB happens and, in this
case, we can't find OOB.  Moreover, SLUB debugging feature can be
enabled without page allocator debugging and, in this case, we will miss
that OOB.

Before trying to implement, I expected that changes would be too
complex, but, it doesn't look that complex to me now.  Almost changes
are applied to debug specific functions so I feel okay.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h |   1 +
 mm/slub.c                | 100 +++++++++++++++++++++++++++++++++--------------
 2 files changed, 72 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index b7e57927f521..ac5143f95ee6 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -81,6 +81,7 @@ struct kmem_cache {
 	int reserved;		/* Reserved bytes at the end of slabs */
 	const char *name;	/* Name (only for display!) */
 	struct list_head list;	/* List of slab caches */
+	int red_left_pad;	/* Left redzone padding size */
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;	/* For sysfs */
 #endif
diff --git a/mm/slub.c b/mm/slub.c
index c2a227d8a4ee..2d4d817f3d7a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -124,6 +124,14 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
 #endif
 }
 
+static inline void *fixup_red_left(struct kmem_cache *s, void *p)
+{
+	if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
+		p += s->red_left_pad;
+
+	return p;
+}
+
 static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
 {
 #ifdef CONFIG_SLUB_CPU_PARTIAL
@@ -232,24 +240,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
  * 			Core slab cache functions
  *******************************************************************/
 
-/* Verify that a pointer has an address that is valid within a slab page */
-static inline int check_valid_pointer(struct kmem_cache *s,
-				struct page *page, const void *object)
-{
-	void *base;
-
-	if (!object)
-		return 1;
-
-	base = page_address(page);
-	if (object < base || object >= base + page->objects * s->size ||
-		(object - base) % s->size) {
-		return 0;
-	}
-
-	return 1;
-}
-
 static inline void *get_freepointer(struct kmem_cache *s, void *object)
 {
 	return *(void **)(object + s->offset);
@@ -279,12 +269,14 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
 
 /* Loop over all objects in a slab */
 #define for_each_object(__p, __s, __addr, __objects) \
-	for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
-			__p += (__s)->size)
+	for (__p = fixup_red_left(__s, __addr); \
+		__p < (__addr) + (__objects) * (__s)->size; \
+		__p += (__s)->size)
 
 #define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
-	for (__p = (__addr), __idx = 1; __idx <= __objects;\
-			__p += (__s)->size, __idx++)
+	for (__p = fixup_red_left(__s, __addr), __idx = 1; \
+		__idx <= __objects; \
+		__p += (__s)->size, __idx++)
 
 /* Determine object index from a given position */
 static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
@@ -442,6 +434,22 @@ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
 		set_bit(slab_index(p, s, addr), map);
 }
 
+static inline int size_from_object(struct kmem_cache *s)
+{
+	if (s->flags & SLAB_RED_ZONE)
+		return s->size - s->red_left_pad;
+
+	return s->size;
+}
+
+static inline void *restore_red_left(struct kmem_cache *s, void *p)
+{
+	if (s->flags & SLAB_RED_ZONE)
+		p -= s->red_left_pad;
+
+	return p;
+}
+
 /*
  * Debug settings:
  */
@@ -475,6 +483,26 @@ static inline void metadata_access_disable(void)
 /*
  * Object debugging
  */
+
+/* Verify that a pointer has an address that is valid within a slab page */
+static inline int check_valid_pointer(struct kmem_cache *s,
+				struct page *page, void *object)
+{
+	void *base;
+
+	if (!object)
+		return 1;
+
+	base = page_address(page);
+	object = restore_red_left(s, object);
+	if (object < base || object >= base + page->objects * s->size ||
+		(object - base) % s->size) {
+		return 0;
+	}
+
+	return 1;
+}
+
 static void print_section(char *text, u8 *addr, unsigned int length)
 {
 	metadata_access_enable();
@@ -614,7 +642,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
 	pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
 	       p, p - addr, get_freepointer(s, p));
 
-	if (p > addr + 16)
+	if (s->flags & SLAB_RED_ZONE)
+		print_section("Redzone ", p - s->red_left_pad, s->red_left_pad);
+	else if (p > addr + 16)
 		print_section("Bytes b4 ", p - 16, 16);
 
 	print_section("Object ", p, min_t(unsigned long, s->object_size,
@@ -631,9 +661,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
 	if (s->flags & SLAB_STORE_USER)
 		off += 2 * sizeof(struct track);
 
-	if (off != s->size)
+	if (off != size_from_object(s))
 		/* Beginning of the filler is the free pointer */
-		print_section("Padding ", p + off, s->size - off);
+		print_section("Padding ", p + off, size_from_object(s) - off);
 
 	dump_stack();
 }
@@ -663,6 +693,9 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
 {
 	u8 *p = object;
 
+	if (s->flags & SLAB_RED_ZONE)
+		memset(p - s->red_left_pad, val, s->red_left_pad);
+
 	if (s->flags & __OBJECT_POISON) {
 		memset(p, POISON_FREE, s->object_size - 1);
 		p[s->object_size - 1] = POISON_END;
@@ -755,11 +788,11 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
 		/* We also have user information there */
 		off += 2 * sizeof(struct track);
 
-	if (s->size == off)
+	if (size_from_object(s) == off)
 		return 1;
 
 	return check_bytes_and_report(s, page, p, "Object padding",
-				p + off, POISON_INUSE, s->size - off);
+			p + off, POISON_INUSE, size_from_object(s) - off);
 }
 
 /* Check the pad bytes at the end of a slab page */
@@ -803,6 +836,10 @@ static int check_object(struct kmem_cache *s, struct page *page,
 	u8 *endobject = object + s->object_size;
 
 	if (s->flags & SLAB_RED_ZONE) {
+		if (!check_bytes_and_report(s, page, object, "Redzone",
+			object - s->red_left_pad, val, s->red_left_pad))
+			return 0;
+
 		if (!check_bytes_and_report(s, page, object, "Redzone",
 			endobject, val, s->inuse - s->object_size))
 			return 0;
@@ -1445,7 +1482,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 			set_freepointer(s, p, NULL);
 	}
 
-	page->freelist = start;
+	page->freelist = fixup_red_left(s, start);
 	page->inuse = page->objects;
 	page->frozen = 1;
 
@@ -3274,7 +3311,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 		 */
 		size += 2 * sizeof(struct track);
 
-	if (flags & SLAB_RED_ZONE)
+	if (flags & SLAB_RED_ZONE) {
 		/*
 		 * Add some empty padding so that we can catch
 		 * overwrites from earlier objects rather than let
@@ -3283,6 +3320,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 		 * of the object.
 		 */
 		size += sizeof(void *);
+
+		s->red_left_pad = sizeof(void *);
+		s->red_left_pad = ALIGN(s->red_left_pad, s->align);
+		size += s->red_left_pad;
+	}
 #endif
 
 	/*
-- 
cgit v1.2.3


From 20f6e03a40ba536dfc7c6f83dd894d994aeb39f3 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 15 Mar 2016 14:55:42 -0700
Subject: tracepoints: move trace_print_flags definitions to tracepoint-defs.h

The following patch will need to declare array of struct
trace_print_flags in a header.  To prevent this header from pulling in
all of RCU through trace_events.h, move the struct
trace_print_flags{_64} definitions to the new lightweight
tracepoint-defs.h header.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/trace_events.h    | 10 ----------
 include/linux/tracepoint-defs.h | 14 ++++++++++++--
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 925730bc9fc1..705df7db4482 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -15,16 +15,6 @@ struct tracer;
 struct dentry;
 struct bpf_prog;
 
-struct trace_print_flags {
-	unsigned long		mask;
-	const char		*name;
-};
-
-struct trace_print_flags_u64 {
-	unsigned long long	mask;
-	const char		*name;
-};
-
 const char *trace_print_flags_seq(struct trace_seq *p, const char *delim,
 				  unsigned long flags,
 				  const struct trace_print_flags *flag_array);
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index e1ee97c713bf..4ac89acb6136 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -3,13 +3,23 @@
 
 /*
  * File can be included directly by headers who only want to access
- * tracepoint->key to guard out of line trace calls. Otherwise
- * linux/tracepoint.h should be used.
+ * tracepoint->key to guard out of line trace calls, or the definition of
+ * trace_print_flags{_u64}. Otherwise linux/tracepoint.h should be used.
  */
 
 #include <linux/atomic.h>
 #include <linux/static_key.h>
 
+struct trace_print_flags {
+	unsigned long		mask;
+	const char		*name;
+};
+
+struct trace_print_flags_u64 {
+	unsigned long long	mask;
+	const char		*name;
+};
+
 struct tracepoint_func {
 	void *func;
 	void *data;
-- 
cgit v1.2.3


From 1f7866b4aebd19e2525775083279e171b36783a4 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 15 Mar 2016 14:55:45 -0700
Subject: mm, tracing: make show_gfp_flags() up to date

The show_gfp_flags() macro provides human-friendly printing of gfp flags
in tracepoints.  However, it is somewhat out of date and missing several
flags.  This patches fills in the missing flags, and distinguishes
properly between GFP_ATOMIC and __GFP_ATOMIC which were both translated
to "GFP_ATOMIC".  More generally, all __GFP_X flags which were
previously printed as GFP_X, are now printed as __GFP_X, since ommiting
the underscores results in output that doesn't actually match the source
code, and can only lead to confusion.  Where both variants are defined
equal (e.g.  _DMA and _DMA32), the variant without underscores are
preferred.

Also add a note in gfp.h so hopefully future changes will be synced
better.

__GFP_MOVABLE is defined twice in include/linux/gfp.h with different
comments.  Leave just the newer one, which was intended to replace the
old one.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Michal Hocko <mhocko@suse.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h             |  6 ++++-
 include/trace/events/gfpflags.h | 53 ++++++++++++++++++++++++-----------------
 2 files changed, 36 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index af1f2b24bbe4..bbe5e7fae337 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -9,6 +9,11 @@
 
 struct vm_area_struct;
 
+/*
+ * In case of changes, please don't forget to update
+ * include/trace/events/gfpflags.h
+ */
+
 /* Plain integer GFP bitmasks. Do not use this directly. */
 #define ___GFP_DMA		0x01u
 #define ___GFP_HIGHMEM		0x02u
@@ -48,7 +53,6 @@ struct vm_area_struct;
 #define __GFP_DMA	((__force gfp_t)___GFP_DMA)
 #define __GFP_HIGHMEM	((__force gfp_t)___GFP_HIGHMEM)
 #define __GFP_DMA32	((__force gfp_t)___GFP_DMA32)
-#define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* Page is movable */
 #define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE allowed */
 #define GFP_ZONEMASK	(__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
 
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index dde6bf092c8a..f53b216c9311 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -11,33 +11,42 @@
 #define show_gfp_flags(flags)						\
 	(flags) ? __print_flags(flags, "|",				\
 	{(unsigned long)GFP_TRANSHUGE,		"GFP_TRANSHUGE"},	\
-	{(unsigned long)GFP_HIGHUSER_MOVABLE,	"GFP_HIGHUSER_MOVABLE"}, \
+	{(unsigned long)GFP_HIGHUSER_MOVABLE,	"GFP_HIGHUSER_MOVABLE"},\
 	{(unsigned long)GFP_HIGHUSER,		"GFP_HIGHUSER"},	\
 	{(unsigned long)GFP_USER,		"GFP_USER"},		\
 	{(unsigned long)GFP_TEMPORARY,		"GFP_TEMPORARY"},	\
+	{(unsigned long)GFP_KERNEL_ACCOUNT,	"GFP_KERNEL_ACCOUNT"},	\
 	{(unsigned long)GFP_KERNEL,		"GFP_KERNEL"},		\
 	{(unsigned long)GFP_NOFS,		"GFP_NOFS"},		\
 	{(unsigned long)GFP_ATOMIC,		"GFP_ATOMIC"},		\
 	{(unsigned long)GFP_NOIO,		"GFP_NOIO"},		\
-	{(unsigned long)__GFP_HIGH,		"GFP_HIGH"},		\
-	{(unsigned long)__GFP_ATOMIC,		"GFP_ATOMIC"},		\
-	{(unsigned long)__GFP_IO,		"GFP_IO"},		\
-	{(unsigned long)__GFP_COLD,		"GFP_COLD"},		\
-	{(unsigned long)__GFP_NOWARN,		"GFP_NOWARN"},		\
-	{(unsigned long)__GFP_REPEAT,		"GFP_REPEAT"},		\
-	{(unsigned long)__GFP_NOFAIL,		"GFP_NOFAIL"},		\
-	{(unsigned long)__GFP_NORETRY,		"GFP_NORETRY"},		\
-	{(unsigned long)__GFP_COMP,		"GFP_COMP"},		\
-	{(unsigned long)__GFP_ZERO,		"GFP_ZERO"},		\
-	{(unsigned long)__GFP_NOMEMALLOC,	"GFP_NOMEMALLOC"},	\
-	{(unsigned long)__GFP_MEMALLOC,		"GFP_MEMALLOC"},	\
-	{(unsigned long)__GFP_HARDWALL,		"GFP_HARDWALL"},	\
-	{(unsigned long)__GFP_THISNODE,		"GFP_THISNODE"},	\
-	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
-	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"},		\
-	{(unsigned long)__GFP_NOTRACK,		"GFP_NOTRACK"},		\
-	{(unsigned long)__GFP_DIRECT_RECLAIM,	"GFP_DIRECT_RECLAIM"},	\
-	{(unsigned long)__GFP_KSWAPD_RECLAIM,	"GFP_KSWAPD_RECLAIM"},	\
-	{(unsigned long)__GFP_OTHER_NODE,	"GFP_OTHER_NODE"}	\
-	) : "GFP_NOWAIT"
+	{(unsigned long)GFP_NOWAIT,		"GFP_NOWAIT"},		\
+	{(unsigned long)GFP_DMA,		"GFP_DMA"},		\
+	{(unsigned long)__GFP_HIGHMEM,		"__GFP_HIGHMEM"},	\
+	{(unsigned long)GFP_DMA32,		"GFP_DMA32"},		\
+	{(unsigned long)__GFP_HIGH,		"__GFP_HIGH"},		\
+	{(unsigned long)__GFP_ATOMIC,		"__GFP_ATOMIC"},	\
+	{(unsigned long)__GFP_IO,		"__GFP_IO"},		\
+	{(unsigned long)__GFP_FS,		"__GFP_FS"},		\
+	{(unsigned long)__GFP_COLD,		"__GFP_COLD"},		\
+	{(unsigned long)__GFP_NOWARN,		"__GFP_NOWARN"},	\
+	{(unsigned long)__GFP_REPEAT,		"__GFP_REPEAT"},	\
+	{(unsigned long)__GFP_NOFAIL,		"__GFP_NOFAIL"},	\
+	{(unsigned long)__GFP_NORETRY,		"__GFP_NORETRY"},	\
+	{(unsigned long)__GFP_COMP,		"__GFP_COMP"},		\
+	{(unsigned long)__GFP_ZERO,		"__GFP_ZERO"},		\
+	{(unsigned long)__GFP_NOMEMALLOC,	"__GFP_NOMEMALLOC"},	\
+	{(unsigned long)__GFP_MEMALLOC,		"__GFP_MEMALLOC"},	\
+	{(unsigned long)__GFP_HARDWALL,		"__GFP_HARDWALL"},	\
+	{(unsigned long)__GFP_THISNODE,		"__GFP_THISNODE"},	\
+	{(unsigned long)__GFP_RECLAIMABLE,	"__GFP_RECLAIMABLE"},	\
+	{(unsigned long)__GFP_MOVABLE,		"__GFP_MOVABLE"},	\
+	{(unsigned long)__GFP_ACCOUNT,		"__GFP_ACCOUNT"},	\
+	{(unsigned long)__GFP_NOTRACK,		"__GFP_NOTRACK"},	\
+	{(unsigned long)__GFP_WRITE,		"__GFP_WRITE"},		\
+	{(unsigned long)__GFP_RECLAIM,		"__GFP_RECLAIM"},	\
+	{(unsigned long)__GFP_DIRECT_RECLAIM,	"__GFP_DIRECT_RECLAIM"},\
+	{(unsigned long)__GFP_KSWAPD_RECLAIM,	"__GFP_KSWAPD_RECLAIM"},\
+	{(unsigned long)__GFP_OTHER_NODE,	"__GFP_OTHER_NODE"}	\
+	) : "none"
 
-- 
cgit v1.2.3


From 14e0a214d62d284ff40b1fd7d687cb66fca9fc67 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 15 Mar 2016 14:55:49 -0700
Subject: tools, perf: make gfp_compact_table up to date

When updating tracing's show_gfp_flags() I have noticed that perf's
gfp_compact_table is also outdated.  Fill in the missing flags and place
a note in gfp.h to increase chance that future updates are synced.
Convert the __GFP_X flags from "GFP_X" to "__GFP_X" strings in line with
the previous patch.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h       |  2 +-
 tools/perf/builtin-kmem.c | 47 ++++++++++++++++++++++++++++-------------------
 2 files changed, 29 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index bbe5e7fae337..3d6d878c00f5 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -11,7 +11,7 @@ struct vm_area_struct;
 
 /*
  * In case of changes, please don't forget to update
- * include/trace/events/gfpflags.h
+ * include/trace/events/gfpflags.h and tools/perf/builtin-kmem.c
  */
 
 /* Plain integer GFP bitmasks. Do not use this directly. */
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 4d3340cce9a0..83343ed30e8f 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -612,30 +612,39 @@ static const struct {
 	{ "GFP_HIGHUSER",		"HU" },
 	{ "GFP_USER",			"U" },
 	{ "GFP_TEMPORARY",		"TMP" },
+	{ "GFP_KERNEL_ACCOUNT",		"KAC" },
 	{ "GFP_KERNEL",			"K" },
 	{ "GFP_NOFS",			"NF" },
 	{ "GFP_ATOMIC",			"A" },
 	{ "GFP_NOIO",			"NI" },
-	{ "GFP_HIGH",			"H" },
-	{ "GFP_WAIT",			"W" },
-	{ "GFP_IO",			"I" },
-	{ "GFP_COLD",			"CO" },
-	{ "GFP_NOWARN",			"NWR" },
-	{ "GFP_REPEAT",			"R" },
-	{ "GFP_NOFAIL",			"NF" },
-	{ "GFP_NORETRY",		"NR" },
-	{ "GFP_COMP",			"C" },
-	{ "GFP_ZERO",			"Z" },
-	{ "GFP_NOMEMALLOC",		"NMA" },
-	{ "GFP_MEMALLOC",		"MA" },
-	{ "GFP_HARDWALL",		"HW" },
-	{ "GFP_THISNODE",		"TN" },
-	{ "GFP_RECLAIMABLE",		"RC" },
-	{ "GFP_MOVABLE",		"M" },
-	{ "GFP_NOTRACK",		"NT" },
-	{ "GFP_NO_KSWAPD",		"NK" },
-	{ "GFP_OTHER_NODE",		"ON" },
 	{ "GFP_NOWAIT",			"NW" },
+	{ "GFP_DMA",			"D" },
+	{ "__GFP_HIGHMEM",		"HM" },
+	{ "GFP_DMA32",			"D32" },
+	{ "__GFP_HIGH",			"H" },
+	{ "__GFP_ATOMIC",		"_A" },
+	{ "__GFP_IO",			"I" },
+	{ "__GFP_FS",			"F" },
+	{ "__GFP_COLD",			"CO" },
+	{ "__GFP_NOWARN",		"NWR" },
+	{ "__GFP_REPEAT",		"R" },
+	{ "__GFP_NOFAIL",		"NF" },
+	{ "__GFP_NORETRY",		"NR" },
+	{ "__GFP_COMP",			"C" },
+	{ "__GFP_ZERO",			"Z" },
+	{ "__GFP_NOMEMALLOC",		"NMA" },
+	{ "__GFP_MEMALLOC",		"MA" },
+	{ "__GFP_HARDWALL",		"HW" },
+	{ "__GFP_THISNODE",		"TN" },
+	{ "__GFP_RECLAIMABLE",		"RC" },
+	{ "__GFP_MOVABLE",		"M" },
+	{ "__GFP_ACCOUNT",		"AC" },
+	{ "__GFP_NOTRACK",		"NT" },
+	{ "__GFP_WRITE",		"WR" },
+	{ "__GFP_RECLAIM",		"R" },
+	{ "__GFP_DIRECT_RECLAIM",	"DR" },
+	{ "__GFP_KSWAPD_RECLAIM",	"KR" },
+	{ "__GFP_OTHER_NODE",		"ON" },
 };
 
 static size_t max_gfp_len;
-- 
cgit v1.2.3


From 420adbe9fc1a45187cfa74df9dbfd72272c4e2fa Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 15 Mar 2016 14:55:52 -0700
Subject: mm, tracing: unify mm flags handling in tracepoints and printk

In tracepoints, it's possible to print gfp flags in a human-friendly
format through a macro show_gfp_flags(), which defines a translation
array and passes is to __print_flags().  Since the following patch will
introduce support for gfp flags printing in printk(), it would be nice
to reuse the array.  This is not straightforward, since __print_flags()
can't simply reference an array defined in a .c file such as mm/debug.c
- it has to be a macro to allow the macro magic to communicate the
format to userspace tools such as trace-cmd.

The solution is to create a macro __def_gfpflag_names which is used both
in show_gfp_flags(), and to define the gfpflag_names[] array in
mm/debug.c.

On the other hand, mm/debug.c also defines translation tables for page
flags and vma flags, and desire was expressed (but not implemented in
this series) to use these also from tracepoints.  Thus, this patch also
renames the events/gfpflags.h file to events/mmflags.h and moves the
table definitions there, using the same macro approach as for gfpflags.
This allows translating all three kinds of mm-specific flags both in
tracepoints and printk.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Michal Hocko <mhocko@suse.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h                |   2 +-
 include/trace/events/btrfs.h       |   2 +-
 include/trace/events/compaction.h  |   2 +-
 include/trace/events/gfpflags.h    |  52 ------------
 include/trace/events/huge_memory.h |   2 -
 include/trace/events/kmem.h        |   2 +-
 include/trace/events/mmflags.h     | 164 +++++++++++++++++++++++++++++++++++++
 include/trace/events/vmscan.h      |   2 +-
 mm/debug.c                         |  88 +++-----------------
 tools/perf/builtin-kmem.c          |   2 +-
 10 files changed, 181 insertions(+), 137 deletions(-)
 delete mode 100644 include/trace/events/gfpflags.h
 create mode 100644 include/trace/events/mmflags.h

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 3d6d878c00f5..06546b36eb6a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -11,7 +11,7 @@ struct vm_area_struct;
 
 /*
  * In case of changes, please don't forget to update
- * include/trace/events/gfpflags.h and tools/perf/builtin-kmem.c
+ * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c
  */
 
 /* Plain integer GFP bitmasks. Do not use this directly. */
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index d866f21efbbf..677807f29a1c 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -6,7 +6,7 @@
 
 #include <linux/writeback.h>
 #include <linux/tracepoint.h>
-#include <trace/events/gfpflags.h>
+#include <trace/events/mmflags.h>
 
 struct btrfs_root;
 struct btrfs_fs_info;
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index c92d1e1cbad9..111e5666e5eb 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -7,7 +7,7 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/tracepoint.h>
-#include <trace/events/gfpflags.h>
+#include <trace/events/mmflags.h>
 
 #define COMPACTION_STATUS					\
 	EM( COMPACT_DEFERRED,		"deferred")		\
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
deleted file mode 100644
index f53b216c9311..000000000000
--- a/include/trace/events/gfpflags.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * The order of these masks is important. Matching masks will be seen
- * first and the left over flags will end up showing by themselves.
- *
- * For example, if we have GFP_KERNEL before GFP_USER we wil get:
- *
- *  GFP_KERNEL|GFP_HARDWALL
- *
- * Thus most bits set go first.
- */
-#define show_gfp_flags(flags)						\
-	(flags) ? __print_flags(flags, "|",				\
-	{(unsigned long)GFP_TRANSHUGE,		"GFP_TRANSHUGE"},	\
-	{(unsigned long)GFP_HIGHUSER_MOVABLE,	"GFP_HIGHUSER_MOVABLE"},\
-	{(unsigned long)GFP_HIGHUSER,		"GFP_HIGHUSER"},	\
-	{(unsigned long)GFP_USER,		"GFP_USER"},		\
-	{(unsigned long)GFP_TEMPORARY,		"GFP_TEMPORARY"},	\
-	{(unsigned long)GFP_KERNEL_ACCOUNT,	"GFP_KERNEL_ACCOUNT"},	\
-	{(unsigned long)GFP_KERNEL,		"GFP_KERNEL"},		\
-	{(unsigned long)GFP_NOFS,		"GFP_NOFS"},		\
-	{(unsigned long)GFP_ATOMIC,		"GFP_ATOMIC"},		\
-	{(unsigned long)GFP_NOIO,		"GFP_NOIO"},		\
-	{(unsigned long)GFP_NOWAIT,		"GFP_NOWAIT"},		\
-	{(unsigned long)GFP_DMA,		"GFP_DMA"},		\
-	{(unsigned long)__GFP_HIGHMEM,		"__GFP_HIGHMEM"},	\
-	{(unsigned long)GFP_DMA32,		"GFP_DMA32"},		\
-	{(unsigned long)__GFP_HIGH,		"__GFP_HIGH"},		\
-	{(unsigned long)__GFP_ATOMIC,		"__GFP_ATOMIC"},	\
-	{(unsigned long)__GFP_IO,		"__GFP_IO"},		\
-	{(unsigned long)__GFP_FS,		"__GFP_FS"},		\
-	{(unsigned long)__GFP_COLD,		"__GFP_COLD"},		\
-	{(unsigned long)__GFP_NOWARN,		"__GFP_NOWARN"},	\
-	{(unsigned long)__GFP_REPEAT,		"__GFP_REPEAT"},	\
-	{(unsigned long)__GFP_NOFAIL,		"__GFP_NOFAIL"},	\
-	{(unsigned long)__GFP_NORETRY,		"__GFP_NORETRY"},	\
-	{(unsigned long)__GFP_COMP,		"__GFP_COMP"},		\
-	{(unsigned long)__GFP_ZERO,		"__GFP_ZERO"},		\
-	{(unsigned long)__GFP_NOMEMALLOC,	"__GFP_NOMEMALLOC"},	\
-	{(unsigned long)__GFP_MEMALLOC,		"__GFP_MEMALLOC"},	\
-	{(unsigned long)__GFP_HARDWALL,		"__GFP_HARDWALL"},	\
-	{(unsigned long)__GFP_THISNODE,		"__GFP_THISNODE"},	\
-	{(unsigned long)__GFP_RECLAIMABLE,	"__GFP_RECLAIMABLE"},	\
-	{(unsigned long)__GFP_MOVABLE,		"__GFP_MOVABLE"},	\
-	{(unsigned long)__GFP_ACCOUNT,		"__GFP_ACCOUNT"},	\
-	{(unsigned long)__GFP_NOTRACK,		"__GFP_NOTRACK"},	\
-	{(unsigned long)__GFP_WRITE,		"__GFP_WRITE"},		\
-	{(unsigned long)__GFP_RECLAIM,		"__GFP_RECLAIM"},	\
-	{(unsigned long)__GFP_DIRECT_RECLAIM,	"__GFP_DIRECT_RECLAIM"},\
-	{(unsigned long)__GFP_KSWAPD_RECLAIM,	"__GFP_KSWAPD_RECLAIM"},\
-	{(unsigned long)__GFP_OTHER_NODE,	"__GFP_OTHER_NODE"}	\
-	) : "none"
-
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index 47c6212d8f3c..551ba4acde4d 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -6,8 +6,6 @@
 
 #include  <linux/tracepoint.h>
 
-#include <trace/events/gfpflags.h>
-
 #define SCAN_STATUS							\
 	EM( SCAN_FAIL,			"failed")			\
 	EM( SCAN_SUCCEED,		"succeeded")			\
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index f7554fd7fc62..ca7217389067 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -6,7 +6,7 @@
 
 #include <linux/types.h>
 #include <linux/tracepoint.h>
-#include <trace/events/gfpflags.h>
+#include <trace/events/mmflags.h>
 
 DECLARE_EVENT_CLASS(kmem_alloc,
 
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
new file mode 100644
index 000000000000..a849185c82f0
--- /dev/null
+++ b/include/trace/events/mmflags.h
@@ -0,0 +1,164 @@
+/*
+ * The order of these masks is important. Matching masks will be seen
+ * first and the left over flags will end up showing by themselves.
+ *
+ * For example, if we have GFP_KERNEL before GFP_USER we wil get:
+ *
+ *  GFP_KERNEL|GFP_HARDWALL
+ *
+ * Thus most bits set go first.
+ */
+
+#define __def_gfpflag_names						\
+	{(unsigned long)GFP_TRANSHUGE,		"GFP_TRANSHUGE"},	\
+	{(unsigned long)GFP_HIGHUSER_MOVABLE,	"GFP_HIGHUSER_MOVABLE"},\
+	{(unsigned long)GFP_HIGHUSER,		"GFP_HIGHUSER"},	\
+	{(unsigned long)GFP_USER,		"GFP_USER"},		\
+	{(unsigned long)GFP_TEMPORARY,		"GFP_TEMPORARY"},	\
+	{(unsigned long)GFP_KERNEL_ACCOUNT,	"GFP_KERNEL_ACCOUNT"},	\
+	{(unsigned long)GFP_KERNEL,		"GFP_KERNEL"},		\
+	{(unsigned long)GFP_NOFS,		"GFP_NOFS"},		\
+	{(unsigned long)GFP_ATOMIC,		"GFP_ATOMIC"},		\
+	{(unsigned long)GFP_NOIO,		"GFP_NOIO"},		\
+	{(unsigned long)GFP_NOWAIT,		"GFP_NOWAIT"},		\
+	{(unsigned long)GFP_DMA,		"GFP_DMA"},		\
+	{(unsigned long)__GFP_HIGHMEM,		"__GFP_HIGHMEM"},	\
+	{(unsigned long)GFP_DMA32,		"GFP_DMA32"},		\
+	{(unsigned long)__GFP_HIGH,		"__GFP_HIGH"},		\
+	{(unsigned long)__GFP_ATOMIC,		"__GFP_ATOMIC"},	\
+	{(unsigned long)__GFP_IO,		"__GFP_IO"},		\
+	{(unsigned long)__GFP_FS,		"__GFP_FS"},		\
+	{(unsigned long)__GFP_COLD,		"__GFP_COLD"},		\
+	{(unsigned long)__GFP_NOWARN,		"__GFP_NOWARN"},	\
+	{(unsigned long)__GFP_REPEAT,		"__GFP_REPEAT"},	\
+	{(unsigned long)__GFP_NOFAIL,		"__GFP_NOFAIL"},	\
+	{(unsigned long)__GFP_NORETRY,		"__GFP_NORETRY"},	\
+	{(unsigned long)__GFP_COMP,		"__GFP_COMP"},		\
+	{(unsigned long)__GFP_ZERO,		"__GFP_ZERO"},		\
+	{(unsigned long)__GFP_NOMEMALLOC,	"__GFP_NOMEMALLOC"},	\
+	{(unsigned long)__GFP_MEMALLOC,		"__GFP_MEMALLOC"},	\
+	{(unsigned long)__GFP_HARDWALL,		"__GFP_HARDWALL"},	\
+	{(unsigned long)__GFP_THISNODE,		"__GFP_THISNODE"},	\
+	{(unsigned long)__GFP_RECLAIMABLE,	"__GFP_RECLAIMABLE"},	\
+	{(unsigned long)__GFP_MOVABLE,		"__GFP_MOVABLE"},	\
+	{(unsigned long)__GFP_ACCOUNT,		"__GFP_ACCOUNT"},	\
+	{(unsigned long)__GFP_NOTRACK,		"__GFP_NOTRACK"},	\
+	{(unsigned long)__GFP_WRITE,		"__GFP_WRITE"},		\
+	{(unsigned long)__GFP_RECLAIM,		"__GFP_RECLAIM"},	\
+	{(unsigned long)__GFP_DIRECT_RECLAIM,	"__GFP_DIRECT_RECLAIM"},\
+	{(unsigned long)__GFP_KSWAPD_RECLAIM,	"__GFP_KSWAPD_RECLAIM"},\
+	{(unsigned long)__GFP_OTHER_NODE,	"__GFP_OTHER_NODE"}	\
+
+#define show_gfp_flags(flags)						\
+	(flags) ? __print_flags(flags, "|",				\
+	__def_gfpflag_names						\
+	) : "none"
+
+#ifdef CONFIG_MMU
+#define IF_HAVE_PG_MLOCK(flag,string) ,{1UL << flag, string}
+#else
+#define IF_HAVE_PG_MLOCK(flag,string)
+#endif
+
+#ifdef CONFIG_ARCH_USES_PG_UNCACHED
+#define IF_HAVE_PG_UNCACHED(flag,string) ,{1UL << flag, string}
+#else
+#define IF_HAVE_PG_UNCACHED(flag,string)
+#endif
+
+#ifdef CONFIG_MEMORY_FAILURE
+#define IF_HAVE_PG_HWPOISON(flag,string) ,{1UL << flag, string}
+#else
+#define IF_HAVE_PG_HWPOISON(flag,string)
+#endif
+
+#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+#define IF_HAVE_PG_IDLE(flag,string) ,{1UL << flag, string}
+#else
+#define IF_HAVE_PG_IDLE(flag,string)
+#endif
+
+#define __def_pageflag_names						\
+	{1UL << PG_locked,		"locked"	},		\
+	{1UL << PG_error,		"error"		},		\
+	{1UL << PG_referenced,		"referenced"	},		\
+	{1UL << PG_uptodate,		"uptodate"	},		\
+	{1UL << PG_dirty,		"dirty"		},		\
+	{1UL << PG_lru,			"lru"		},		\
+	{1UL << PG_active,		"active"	},		\
+	{1UL << PG_slab,		"slab"		},		\
+	{1UL << PG_owner_priv_1,	"owner_priv_1"	},		\
+	{1UL << PG_arch_1,		"arch_1"	},		\
+	{1UL << PG_reserved,		"reserved"	},		\
+	{1UL << PG_private,		"private"	},		\
+	{1UL << PG_private_2,		"private_2"	},		\
+	{1UL << PG_writeback,		"writeback"	},		\
+	{1UL << PG_head,		"head"		},		\
+	{1UL << PG_swapcache,		"swapcache"	},		\
+	{1UL << PG_mappedtodisk,	"mappedtodisk"	},		\
+	{1UL << PG_reclaim,		"reclaim"	},		\
+	{1UL << PG_swapbacked,		"swapbacked"	},		\
+	{1UL << PG_unevictable,		"unevictable"	}		\
+IF_HAVE_PG_MLOCK(PG_mlocked,		"mlocked"	)		\
+IF_HAVE_PG_UNCACHED(PG_uncached,	"uncached"	)		\
+IF_HAVE_PG_HWPOISON(PG_hwpoison,	"hwpoison"	)		\
+IF_HAVE_PG_IDLE(PG_young,		"young"		)		\
+IF_HAVE_PG_IDLE(PG_idle,		"idle"		)
+
+#define show_page_flags(flags)						\
+	(flags) ? __print_flags(flags, "|",				\
+	__def_pageflag_names						\
+	) : "none"
+
+#if defined(CONFIG_X86)
+#define __VM_ARCH_SPECIFIC {VM_PAT,     "pat"           }
+#elif defined(CONFIG_PPC)
+#define __VM_ARCH_SPECIFIC {VM_SAO,     "sao"           }
+#elif defined(CONFIG_PARISC) || defined(CONFIG_METAG) || defined(CONFIG_IA64)
+#define __VM_ARCH_SPECIFIC {VM_GROWSUP,	"growsup"	}
+#elif !defined(CONFIG_MMU)
+#define __VM_ARCH_SPECIFIC {VM_MAPPED_COPY,"mappedcopy"	}
+#else
+#define __VM_ARCH_SPECIFIC {VM_ARCH_1,	"arch_1"	}
+#endif
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define IF_HAVE_VM_SOFTDIRTY(flag,name) {flag, name },
+#else
+#define IF_HAVE_VM_SOFTDIRTY(flag,name)
+#endif
+
+#define __def_vmaflag_names						\
+	{VM_READ,			"read"		},		\
+	{VM_WRITE,			"write"		},		\
+	{VM_EXEC,			"exec"		},		\
+	{VM_SHARED,			"shared"	},		\
+	{VM_MAYREAD,			"mayread"	},		\
+	{VM_MAYWRITE,			"maywrite"	},		\
+	{VM_MAYEXEC,			"mayexec"	},		\
+	{VM_MAYSHARE,			"mayshare"	},		\
+	{VM_GROWSDOWN,			"growsdown"	},		\
+	{VM_PFNMAP,			"pfnmap"	},		\
+	{VM_DENYWRITE,			"denywrite"	},		\
+	{VM_LOCKONFAULT,		"lockonfault"	},		\
+	{VM_LOCKED,			"locked"	},		\
+	{VM_IO,				"io"		},		\
+	{VM_SEQ_READ,			"seqread"	},		\
+	{VM_RAND_READ,			"randread"	},		\
+	{VM_DONTCOPY,			"dontcopy"	},		\
+	{VM_DONTEXPAND,			"dontexpand"	},		\
+	{VM_ACCOUNT,			"account"	},		\
+	{VM_NORESERVE,			"noreserve"	},		\
+	{VM_HUGETLB,			"hugetlb"	},		\
+	__VM_ARCH_SPECIFIC				,		\
+	{VM_DONTDUMP,			"dontdump"	},		\
+IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,	"softdirty"	)		\
+	{VM_MIXEDMAP,			"mixedmap"	},		\
+	{VM_HUGEPAGE,			"hugepage"	},		\
+	{VM_NOHUGEPAGE,			"nohugepage"	},		\
+	{VM_MERGEABLE,			"mergeable"	}		\
+
+#define show_vma_flags(flags)						\
+	(flags) ? __print_flags(flags, "|",				\
+	__def_vmaflag_names						\
+	) : "none"
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 31763dd8db1c..0101ef37f1ee 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -8,7 +8,7 @@
 #include <linux/tracepoint.h>
 #include <linux/mm.h>
 #include <linux/memcontrol.h>
-#include <trace/events/gfpflags.h>
+#include <trace/events/mmflags.h>
 
 #define RECLAIM_WB_ANON		0x0001u
 #define RECLAIM_WB_FILE		0x0002u
diff --git a/mm/debug.c b/mm/debug.c
index f05b2d5d6481..410af904a7d5 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -9,41 +9,14 @@
 #include <linux/mm.h>
 #include <linux/trace_events.h>
 #include <linux/memcontrol.h>
+#include <trace/events/mmflags.h>
 
 static const struct trace_print_flags pageflag_names[] = {
-	{1UL << PG_locked,		"locked"	},
-	{1UL << PG_error,		"error"		},
-	{1UL << PG_referenced,		"referenced"	},
-	{1UL << PG_uptodate,		"uptodate"	},
-	{1UL << PG_dirty,		"dirty"		},
-	{1UL << PG_lru,			"lru"		},
-	{1UL << PG_active,		"active"	},
-	{1UL << PG_slab,		"slab"		},
-	{1UL << PG_owner_priv_1,	"owner_priv_1"	},
-	{1UL << PG_arch_1,		"arch_1"	},
-	{1UL << PG_reserved,		"reserved"	},
-	{1UL << PG_private,		"private"	},
-	{1UL << PG_private_2,		"private_2"	},
-	{1UL << PG_writeback,		"writeback"	},
-	{1UL << PG_head,		"head"		},
-	{1UL << PG_swapcache,		"swapcache"	},
-	{1UL << PG_mappedtodisk,	"mappedtodisk"	},
-	{1UL << PG_reclaim,		"reclaim"	},
-	{1UL << PG_swapbacked,		"swapbacked"	},
-	{1UL << PG_unevictable,		"unevictable"	},
-#ifdef CONFIG_MMU
-	{1UL << PG_mlocked,		"mlocked"	},
-#endif
-#ifdef CONFIG_ARCH_USES_PG_UNCACHED
-	{1UL << PG_uncached,		"uncached"	},
-#endif
-#ifdef CONFIG_MEMORY_FAILURE
-	{1UL << PG_hwpoison,		"hwpoison"	},
-#endif
-#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
-	{1UL << PG_young,		"young"		},
-	{1UL << PG_idle,		"idle"		},
-#endif
+	__def_pageflag_names
+};
+
+static const struct trace_print_flags gfpflag_names[] = {
+	__def_gfpflag_names
 };
 
 static void dump_flags(unsigned long flags,
@@ -108,47 +81,8 @@ EXPORT_SYMBOL(dump_page);
 
 #ifdef CONFIG_DEBUG_VM
 
-static const struct trace_print_flags vmaflags_names[] = {
-	{VM_READ,			"read"		},
-	{VM_WRITE,			"write"		},
-	{VM_EXEC,			"exec"		},
-	{VM_SHARED,			"shared"	},
-	{VM_MAYREAD,			"mayread"	},
-	{VM_MAYWRITE,			"maywrite"	},
-	{VM_MAYEXEC,			"mayexec"	},
-	{VM_MAYSHARE,			"mayshare"	},
-	{VM_GROWSDOWN,			"growsdown"	},
-	{VM_PFNMAP,			"pfnmap"	},
-	{VM_DENYWRITE,			"denywrite"	},
-	{VM_LOCKONFAULT,		"lockonfault"	},
-	{VM_LOCKED,			"locked"	},
-	{VM_IO,				"io"		},
-	{VM_SEQ_READ,			"seqread"	},
-	{VM_RAND_READ,			"randread"	},
-	{VM_DONTCOPY,			"dontcopy"	},
-	{VM_DONTEXPAND,			"dontexpand"	},
-	{VM_ACCOUNT,			"account"	},
-	{VM_NORESERVE,			"noreserve"	},
-	{VM_HUGETLB,			"hugetlb"	},
-#if defined(CONFIG_X86)
-	{VM_PAT,			"pat"		},
-#elif defined(CONFIG_PPC)
-	{VM_SAO,			"sao"		},
-#elif defined(CONFIG_PARISC) || defined(CONFIG_METAG) || defined(CONFIG_IA64)
-	{VM_GROWSUP,			"growsup"	},
-#elif !defined(CONFIG_MMU)
-	{VM_MAPPED_COPY,		"mappedcopy"	},
-#else
-	{VM_ARCH_1,			"arch_1"	},
-#endif
-	{VM_DONTDUMP,			"dontdump"	},
-#ifdef CONFIG_MEM_SOFT_DIRTY
-	{VM_SOFTDIRTY,			"softdirty"	},
-#endif
-	{VM_MIXEDMAP,			"mixedmap"	},
-	{VM_HUGEPAGE,			"hugepage"	},
-	{VM_NOHUGEPAGE,			"nohugepage"	},
-	{VM_MERGEABLE,			"mergeable"	},
+static const struct trace_print_flags vmaflag_names[] = {
+	__def_vmaflag_names
 };
 
 void dump_vma(const struct vm_area_struct *vma)
@@ -162,7 +96,7 @@ void dump_vma(const struct vm_area_struct *vma)
 		(unsigned long)pgprot_val(vma->vm_page_prot),
 		vma->anon_vma, vma->vm_ops, vma->vm_pgoff,
 		vma->vm_file, vma->vm_private_data);
-	dump_flags(vma->vm_flags, vmaflags_names, ARRAY_SIZE(vmaflags_names));
+	dump_flags(vma->vm_flags, vmaflag_names, ARRAY_SIZE(vmaflag_names));
 }
 EXPORT_SYMBOL(dump_vma);
 
@@ -233,8 +167,8 @@ void dump_mm(const struct mm_struct *mm)
 		""		/* This is here to not have a comma! */
 		);
 
-		dump_flags(mm->def_flags, vmaflags_names,
-				ARRAY_SIZE(vmaflags_names));
+		dump_flags(mm->def_flags, vmaflag_names,
+				ARRAY_SIZE(vmaflag_names));
 }
 
 #endif		/* CONFIG_DEBUG_VM */
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 83343ed30e8f..c9cb3be47cff 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -602,7 +602,7 @@ static int gfpcmp(const void *a, const void *b)
 	return fa->flags - fb->flags;
 }
 
-/* see include/trace/events/gfpflags.h */
+/* see include/trace/events/mmflags.h */
 static const struct {
 	const char *original;
 	const char *compact;
-- 
cgit v1.2.3


From 60f30350fd69a3e4d5f0f45937d3274c22565134 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 15 Mar 2016 14:56:08 -0700
Subject: mm, page_owner: print migratetype of page and pageblock, symbolic
 flags

The information in /sys/kernel/debug/page_owner includes the migratetype
of the pageblock the page belongs to.  This is also checked against the
page's migratetype (as declared by gfp_flags during its allocation), and
the page is reported as Fallback if its migratetype differs from the
pageblock's one.  t This is somewhat misleading because in fact fallback
allocation is not the only reason why these two can differ.  It also
doesn't direcly provide the page's migratetype, although it's possible
to derive that from the gfp_flags.

It's arguably better to print both page and pageblock's migratetype and
leave the interpretation to the consumer than to suggest fallback
allocation as the only possible reason.  While at it, we can print the
migratetypes as string the same way as /proc/pagetypeinfo does, as some
of the numeric values depend on kernel configuration.  For that, this
patch moves the migratetype_names array from #ifdef CONFIG_PROC_FS part
of mm/vmstat.c to mm/page_alloc.c and exports it.

With the new format strings for flags, we can now also provide symbolic
page and gfp flags in the /sys/kernel/debug/page_owner file.  This
replaces the positional printing of page flags as single letters, which
might have looked nicer, but was limited to a subset of flags, and
required the user to remember the letters.

Example page_owner entry after the patch:

  Page allocated via order 0, mask 0x24213ca(GFP_HIGHUSER_MOVABLE|__GFP_COLD|__GFP_NOWARN|__GFP_NORETRY)
  PFN 520 type Movable Block 1 type Movable Flags 0xfffff8001006c(referenced|uptodate|lru|active|mappedtodisk)
   [<ffffffff811682c4>] __alloc_pages_nodemask+0x134/0x230
   [<ffffffff811b4058>] alloc_pages_current+0x88/0x120
   [<ffffffff8115e386>] __page_cache_alloc+0xe6/0x120
   [<ffffffff8116ba6c>] __do_page_cache_readahead+0xdc/0x240
   [<ffffffff8116bd05>] ondemand_readahead+0x135/0x260
   [<ffffffff8116bfb1>] page_cache_sync_readahead+0x31/0x50
   [<ffffffff81160523>] generic_file_read_iter+0x453/0x760
   [<ffffffff811e0d57>] __vfs_read+0xa7/0xd0

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  3 +++
 mm/page_alloc.c        | 13 +++++++++++++
 mm/page_owner.c        | 24 +++++++-----------------
 mm/vmstat.c            | 13 -------------
 4 files changed, 23 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7b6c2cfee390..9fc23ab550a7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -63,6 +63,9 @@ enum {
 	MIGRATE_TYPES
 };
 
+/* In mm/page_alloc.c; keep in sync also with show_migration_types() there */
+extern char * const migratetype_names[MIGRATE_TYPES];
+
 #ifdef CONFIG_CMA
 #  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
 #else
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4e8029a7a4bf..030fafccaa6b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -223,6 +223,19 @@ static char * const zone_names[MAX_NR_ZONES] = {
 #endif
 };
 
+char * const migratetype_names[MIGRATE_TYPES] = {
+	"Unmovable",
+	"Movable",
+	"Reclaimable",
+	"HighAtomic",
+#ifdef CONFIG_CMA
+	"CMA",
+#endif
+#ifdef CONFIG_MEMORY_ISOLATION
+	"Isolate",
+#endif
+};
+
 compound_page_dtor * const compound_page_dtors[] = {
 	NULL,
 	free_compound_page,
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 983c3a10fa07..7a37a30d941b 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -100,8 +100,9 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 		return -ENOMEM;
 
 	ret = snprintf(kbuf, count,
-			"Page allocated via order %u, mask 0x%x\n",
-			page_ext->order, page_ext->gfp_mask);
+			"Page allocated via order %u, mask %#x(%pGg)\n",
+			page_ext->order, page_ext->gfp_mask,
+			&page_ext->gfp_mask);
 
 	if (ret >= count)
 		goto err;
@@ -110,23 +111,12 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 	pageblock_mt = get_pfnblock_migratetype(page, pfn);
 	page_mt  = gfpflags_to_migratetype(page_ext->gfp_mask);
 	ret += snprintf(kbuf + ret, count - ret,
-			"PFN %lu Block %lu type %d %s Flags %s%s%s%s%s%s%s%s%s%s%s%s\n",
+			"PFN %lu type %s Block %lu type %s Flags %#lx(%pGp)\n",
 			pfn,
+			migratetype_names[page_mt],
 			pfn >> pageblock_order,
-			pageblock_mt,
-			pageblock_mt != page_mt ? "Fallback" : "        ",
-			PageLocked(page)	? "K" : " ",
-			PageError(page)		? "E" : " ",
-			PageReferenced(page)	? "R" : " ",
-			PageUptodate(page)	? "U" : " ",
-			PageDirty(page)		? "D" : " ",
-			PageLRU(page)		? "L" : " ",
-			PageActive(page)	? "A" : " ",
-			PageSlab(page)		? "S" : " ",
-			PageWriteback(page)	? "W" : " ",
-			PageCompound(page)	? "C" : " ",
-			PageSwapCache(page)	? "B" : " ",
-			PageMappedToDisk(page)	? "M" : " ");
+			migratetype_names[pageblock_mt],
+			page->flags, &page->flags);
 
 	if (ret >= count)
 		goto err;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 084c6725b373..72c17981cb70 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -924,19 +924,6 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
 #endif
 
 #ifdef CONFIG_PROC_FS
-static char * const migratetype_names[MIGRATE_TYPES] = {
-	"Unmovable",
-	"Movable",
-	"Reclaimable",
-	"HighAtomic",
-#ifdef CONFIG_CMA
-	"CMA",
-#endif
-#ifdef CONFIG_MEMORY_ISOLATION
-	"Isolate",
-#endif
-};
-
 static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
 						struct zone *zone)
 {
-- 
cgit v1.2.3


From 7dd80b8af0bcd705a9ef2fa272c082882616a499 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 15 Mar 2016 14:56:12 -0700
Subject: mm, page_owner: convert page_owner_inited to static key

CONFIG_PAGE_OWNER attempts to impose negligible runtime overhead when
enabled during compilation, but not actually enabled during runtime by
boot param page_owner=on.  This overhead can be further reduced using
the static key mechanism, which this patch does.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/page_owner.txt |  9 +++++----
 include/linux/page_owner.h      | 22 ++++++++++------------
 mm/page_owner.c                 |  9 +++++----
 mm/vmstat.c                     |  2 +-
 4 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/page_owner.txt b/Documentation/vm/page_owner.txt
index 8f3ce9b3aa11..ffff1439076a 100644
--- a/Documentation/vm/page_owner.txt
+++ b/Documentation/vm/page_owner.txt
@@ -28,10 +28,11 @@ with page owner and page owner is disabled in runtime due to no enabling
 boot option, runtime overhead is marginal. If disabled in runtime, it
 doesn't require memory to store owner information, so there is no runtime
 memory overhead. And, page owner inserts just two unlikely branches into
-the page allocator hotpath and if it returns false then allocation is
-done like as the kernel without page owner. These two unlikely branches
-would not affect to allocation performance. Following is the kernel's
-code size change due to this facility.
+the page allocator hotpath and if not enabled, then allocation is done
+like as the kernel without page owner. These two unlikely branches should
+not affect to allocation performance, especially if the static keys jump
+label patching functionality is available. Following is the kernel's code
+size change due to this facility.
 
 - Without page owner
    text    data     bss     dec     hex filename
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index cacaabea8a09..8e2eb153c7b9 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -1,8 +1,10 @@
 #ifndef __LINUX_PAGE_OWNER_H
 #define __LINUX_PAGE_OWNER_H
 
+#include <linux/jump_label.h>
+
 #ifdef CONFIG_PAGE_OWNER
-extern bool page_owner_inited;
+extern struct static_key_false page_owner_inited;
 extern struct page_ext_operations page_owner_ops;
 
 extern void __reset_page_owner(struct page *page, unsigned int order);
@@ -12,27 +14,23 @@ extern gfp_t __get_page_owner_gfp(struct page *page);
 
 static inline void reset_page_owner(struct page *page, unsigned int order)
 {
-	if (likely(!page_owner_inited))
-		return;
-
-	__reset_page_owner(page, order);
+	if (static_branch_unlikely(&page_owner_inited))
+		__reset_page_owner(page, order);
 }
 
 static inline void set_page_owner(struct page *page,
 			unsigned int order, gfp_t gfp_mask)
 {
-	if (likely(!page_owner_inited))
-		return;
-
-	__set_page_owner(page, order, gfp_mask);
+	if (static_branch_unlikely(&page_owner_inited))
+		__set_page_owner(page, order, gfp_mask);
 }
 
 static inline gfp_t get_page_owner_gfp(struct page *page)
 {
-	if (likely(!page_owner_inited))
+	if (static_branch_unlikely(&page_owner_inited))
+		return __get_page_owner_gfp(page);
+	else
 		return 0;
-
-	return __get_page_owner_gfp(page);
 }
 #else
 static inline void reset_page_owner(struct page *page, unsigned int order)
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 7a37a30d941b..feaa28b40c1c 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -5,10 +5,11 @@
 #include <linux/bootmem.h>
 #include <linux/stacktrace.h>
 #include <linux/page_owner.h>
+#include <linux/jump_label.h>
 #include "internal.h"
 
 static bool page_owner_disabled = true;
-bool page_owner_inited __read_mostly;
+DEFINE_STATIC_KEY_FALSE(page_owner_inited);
 
 static void init_early_allocated_pages(void);
 
@@ -37,7 +38,7 @@ static void init_page_owner(void)
 	if (page_owner_disabled)
 		return;
 
-	page_owner_inited = true;
+	static_branch_enable(&page_owner_inited);
 	init_early_allocated_pages();
 }
 
@@ -147,7 +148,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	struct page *page;
 	struct page_ext *page_ext;
 
-	if (!page_owner_inited)
+	if (!static_branch_unlikely(&page_owner_inited))
 		return -EINVAL;
 
 	page = NULL;
@@ -295,7 +296,7 @@ static int __init pageowner_init(void)
 {
 	struct dentry *dentry;
 
-	if (!page_owner_inited) {
+	if (!static_branch_unlikely(&page_owner_inited)) {
 		pr_info("page_owner is disabled\n");
 		return 0;
 	}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 72c17981cb70..69ce64f7b8d7 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1120,7 +1120,7 @@ static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
 #ifdef CONFIG_PAGE_OWNER
 	int mtype;
 
-	if (!page_owner_inited)
+	if (!static_branch_unlikely(&page_owner_inited))
 		return;
 
 	drain_all_pages(NULL);
-- 
cgit v1.2.3


From d435edca928805074dae005ab9a42d9fa60fc702 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 15 Mar 2016 14:56:15 -0700
Subject: mm, page_owner: copy page owner info during migration

The page_owner mechanism stores gfp_flags of an allocation and stack
trace that lead to it.  During page migration, the original information
is practically replaced by the allocation of free page as the migration
target.  Arguably this is less useful and might lead to all the
page_owner info for migratable pages gradually converge towards
compaction or numa balancing migrations.  It has also lead to
inaccuracies such as one fixed by commit e2cfc91120fa ("mm/page_owner:
set correct gfp_mask on page_owner").

This patch thus introduces copying the page_owner info during migration.
However, since the fact that the page has been migrated from its
original place might be useful for debugging, the next patch will
introduce a way to track that information as well.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_owner.h | 10 +++++++++-
 mm/migrate.c               |  3 +++
 mm/page_owner.c            | 25 +++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 8e2eb153c7b9..6440daab4ef8 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -11,6 +11,7 @@ extern void __reset_page_owner(struct page *page, unsigned int order);
 extern void __set_page_owner(struct page *page,
 			unsigned int order, gfp_t gfp_mask);
 extern gfp_t __get_page_owner_gfp(struct page *page);
+extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
 
 static inline void reset_page_owner(struct page *page, unsigned int order)
 {
@@ -32,6 +33,11 @@ static inline gfp_t get_page_owner_gfp(struct page *page)
 	else
 		return 0;
 }
+static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
+{
+	if (static_branch_unlikely(&page_owner_inited))
+		__copy_page_owner(oldpage, newpage);
+}
 #else
 static inline void reset_page_owner(struct page *page, unsigned int order)
 {
@@ -44,6 +50,8 @@ static inline gfp_t get_page_owner_gfp(struct page *page)
 {
 	return 0;
 }
-
+static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
+{
+}
 #endif /* CONFIG_PAGE_OWNER */
 #endif /* __LINUX_PAGE_OWNER_H */
diff --git a/mm/migrate.c b/mm/migrate.c
index 3ad0fea5c438..8133805431ba 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -38,6 +38,7 @@
 #include <linux/balloon_compaction.h>
 #include <linux/mmu_notifier.h>
 #include <linux/page_idle.h>
+#include <linux/page_owner.h>
 
 #include <asm/tlbflush.h>
 
@@ -578,6 +579,8 @@ void migrate_page_copy(struct page *newpage, struct page *page)
 	 */
 	if (PageWriteback(newpage))
 		end_page_writeback(newpage);
+
+	copy_page_owner(page, newpage);
 }
 
 /************************************************************
diff --git a/mm/page_owner.c b/mm/page_owner.c
index feaa28b40c1c..774b55623212 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -84,6 +84,31 @@ gfp_t __get_page_owner_gfp(struct page *page)
 	return page_ext->gfp_mask;
 }
 
+void __copy_page_owner(struct page *oldpage, struct page *newpage)
+{
+	struct page_ext *old_ext = lookup_page_ext(oldpage);
+	struct page_ext *new_ext = lookup_page_ext(newpage);
+	int i;
+
+	new_ext->order = old_ext->order;
+	new_ext->gfp_mask = old_ext->gfp_mask;
+	new_ext->nr_entries = old_ext->nr_entries;
+
+	for (i = 0; i < ARRAY_SIZE(new_ext->trace_entries); i++)
+		new_ext->trace_entries[i] = old_ext->trace_entries[i];
+
+	/*
+	 * We don't clear the bit on the oldpage as it's going to be freed
+	 * after migration. Until then, the info can be useful in case of
+	 * a bug, and the overal stats will be off a bit only temporarily.
+	 * Also, migrate_misplaced_transhuge_page() can still fail the
+	 * migration and then we want the oldpage to retain the info. But
+	 * in that case we also don't need to explicitly clear the info from
+	 * the new page, which will be freed.
+	 */
+	__set_bit(PAGE_EXT_OWNER, &new_ext->flags);
+}
+
 static ssize_t
 print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 		struct page *page, struct page_ext *page_ext)
-- 
cgit v1.2.3


From 7cd12b4abfd2f8f42414c520bbd051a5b7dc7a8c Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 15 Mar 2016 14:56:18 -0700
Subject: mm, page_owner: track and print last migrate reason

During migration, page_owner info is now copied with the rest of the
page, so the stacktrace leading to free page allocation during migration
is overwritten.  For debugging purposes, it might be however useful to
know that the page has been migrated since its initial allocation.  This
might happen many times during the lifetime for different reasons and
fully tracking this, especially with stacktraces would incur extra
memory costs.  As a compromise, store and print the migrate_reason of
the last migration that occurred to the page.  This is enough to
distinguish compaction, numa balancing etc.

Example page_owner entry after the patch:

  Page allocated via order 0, mask 0x24200ca(GFP_HIGHUSER_MOVABLE)
  PFN 628753 type Movable Block 1228 type Movable Flags 0x1fffff80040030(dirty|lru|swapbacked)
   [<ffffffff811682c4>] __alloc_pages_nodemask+0x134/0x230
   [<ffffffff811b6325>] alloc_pages_vma+0xb5/0x250
   [<ffffffff81177491>] shmem_alloc_page+0x61/0x90
   [<ffffffff8117a438>] shmem_getpage_gfp+0x678/0x960
   [<ffffffff8117c2b9>] shmem_fallocate+0x329/0x440
   [<ffffffff811de600>] vfs_fallocate+0x140/0x230
   [<ffffffff811df434>] SyS_fallocate+0x44/0x70
   [<ffffffff8158cc2e>] entry_SYSCALL_64_fastpath+0x12/0x71
  Page has been migrated, last migrate reason: compaction

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h    |  6 +++++-
 include/linux/page_ext.h   |  1 +
 include/linux/page_owner.h |  9 +++++++++
 mm/debug.c                 | 11 +++++++++++
 mm/migrate.c               | 10 +++++++---
 mm/page_owner.c            | 17 +++++++++++++++++
 6 files changed, 50 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index cac1c0904d5f..9b50325e4ddf 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -23,9 +23,13 @@ enum migrate_reason {
 	MR_SYSCALL,		/* also applies to cpusets */
 	MR_MEMPOLICY_MBIND,
 	MR_NUMA_MISPLACED,
-	MR_CMA
+	MR_CMA,
+	MR_TYPES
 };
 
+/* In mm/debug.c; also keep sync with include/trace/events/migrate.h */
+extern char *migrate_reason_names[MR_TYPES];
+
 #ifdef CONFIG_MIGRATION
 
 extern void putback_movable_pages(struct list_head *l);
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index 17f118a82854..e1fe7cf5bddf 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -45,6 +45,7 @@ struct page_ext {
 	unsigned int order;
 	gfp_t gfp_mask;
 	unsigned int nr_entries;
+	int last_migrate_reason;
 	unsigned long trace_entries[8];
 #endif
 };
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 6440daab4ef8..555893bf13d7 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -12,6 +12,7 @@ extern void __set_page_owner(struct page *page,
 			unsigned int order, gfp_t gfp_mask);
 extern gfp_t __get_page_owner_gfp(struct page *page);
 extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
+extern void __set_page_owner_migrate_reason(struct page *page, int reason);
 
 static inline void reset_page_owner(struct page *page, unsigned int order)
 {
@@ -38,6 +39,11 @@ static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
 	if (static_branch_unlikely(&page_owner_inited))
 		__copy_page_owner(oldpage, newpage);
 }
+static inline void set_page_owner_migrate_reason(struct page *page, int reason)
+{
+	if (static_branch_unlikely(&page_owner_inited))
+		__set_page_owner_migrate_reason(page, reason);
+}
 #else
 static inline void reset_page_owner(struct page *page, unsigned int order)
 {
@@ -53,5 +59,8 @@ static inline gfp_t get_page_owner_gfp(struct page *page)
 static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
 {
 }
+static inline void set_page_owner_migrate_reason(struct page *page, int reason)
+{
+}
 #endif /* CONFIG_PAGE_OWNER */
 #endif /* __LINUX_PAGE_OWNER_H */
diff --git a/mm/debug.c b/mm/debug.c
index 231e1452a912..78dc54877075 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -10,9 +10,20 @@
 #include <linux/trace_events.h>
 #include <linux/memcontrol.h>
 #include <trace/events/mmflags.h>
+#include <linux/migrate.h>
 
 #include "internal.h"
 
+char *migrate_reason_names[MR_TYPES] = {
+	"compaction",
+	"memory_failure",
+	"memory_hotplug",
+	"syscall_or_cpuset",
+	"mempolicy_mbind",
+	"numa_misplaced",
+	"cma",
+};
+
 const struct trace_print_flags pageflag_names[] = {
 	__def_pageflag_names,
 	{0, NULL}
diff --git a/mm/migrate.c b/mm/migrate.c
index 8133805431ba..432ecd0172cd 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -955,8 +955,10 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
 	}
 
 	rc = __unmap_and_move(page, newpage, force, mode);
-	if (rc == MIGRATEPAGE_SUCCESS)
+	if (rc == MIGRATEPAGE_SUCCESS) {
 		put_new_page = NULL;
+		set_page_owner_migrate_reason(newpage, reason);
+	}
 
 out:
 	if (rc != -EAGAIN) {
@@ -1021,7 +1023,7 @@ out:
 static int unmap_and_move_huge_page(new_page_t get_new_page,
 				free_page_t put_new_page, unsigned long private,
 				struct page *hpage, int force,
-				enum migrate_mode mode)
+				enum migrate_mode mode, int reason)
 {
 	int rc = -EAGAIN;
 	int *result = NULL;
@@ -1079,6 +1081,7 @@ put_anon:
 	if (rc == MIGRATEPAGE_SUCCESS) {
 		hugetlb_cgroup_migrate(hpage, new_hpage);
 		put_new_page = NULL;
+		set_page_owner_migrate_reason(new_hpage, reason);
 	}
 
 	unlock_page(hpage);
@@ -1151,7 +1154,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 			if (PageHuge(page))
 				rc = unmap_and_move_huge_page(get_new_page,
 						put_new_page, private, page,
-						pass > 2, mode);
+						pass > 2, mode, reason);
 			else
 				rc = unmap_and_move(get_new_page, put_new_page,
 						private, page, pass > 2, mode,
@@ -1842,6 +1845,7 @@ fail_putback:
 	set_page_memcg(new_page, page_memcg(page));
 	set_page_memcg(page, NULL);
 	page_remove_rmap(page, true);
+	set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);
 
 	spin_unlock(ptl);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 774b55623212..a57068cfe52f 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -6,6 +6,7 @@
 #include <linux/stacktrace.h>
 #include <linux/page_owner.h>
 #include <linux/jump_label.h>
+#include <linux/migrate.h>
 #include "internal.h"
 
 static bool page_owner_disabled = true;
@@ -73,10 +74,18 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask)
 	page_ext->order = order;
 	page_ext->gfp_mask = gfp_mask;
 	page_ext->nr_entries = trace.nr_entries;
+	page_ext->last_migrate_reason = -1;
 
 	__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
 }
 
+void __set_page_owner_migrate_reason(struct page *page, int reason)
+{
+	struct page_ext *page_ext = lookup_page_ext(page);
+
+	page_ext->last_migrate_reason = reason;
+}
+
 gfp_t __get_page_owner_gfp(struct page *page)
 {
 	struct page_ext *page_ext = lookup_page_ext(page);
@@ -151,6 +160,14 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 	if (ret >= count)
 		goto err;
 
+	if (page_ext->last_migrate_reason != -1) {
+		ret += snprintf(kbuf + ret, count - ret,
+			"Page has been migrated, last migrate reason: %s\n",
+			migrate_reason_names[page_ext->last_migrate_reason]);
+		if (ret >= count)
+			goto err;
+	}
+
 	ret += snprintf(kbuf + ret, count - ret, "\n");
 	if (ret >= count)
 		goto err;
-- 
cgit v1.2.3


From 4e462112e98f9ad6dd62e160f8b14c7df5fed2fc Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 15 Mar 2016 14:56:21 -0700
Subject: mm, page_owner: dump page owner info from dump_page()

The page_owner mechanism is useful for dealing with memory leaks.  By
reading /sys/kernel/debug/page_owner one can determine the stack traces
leading to allocations of all pages, and find e.g.  a buggy driver.

This information might be also potentially useful for debugging, such as
the VM_BUG_ON_PAGE() calls to dump_page().  So let's print the stored
info from dump_page().

Example output:

  page:ffffea000292f1c0 count:1 mapcount:0 mapping:ffff8800b2f6cc18 index:0x91d
  flags: 0x1fffff8001002c(referenced|uptodate|lru|mappedtodisk)
  page dumped because: VM_BUG_ON_PAGE(1)
  page->mem_cgroup:ffff8801392c5000
  page allocated via order 0, migratetype Movable, gfp_mask 0x24213ca(GFP_HIGHUSER_MOVABLE|__GFP_COLD|__GFP_NOWARN|__GFP_NORETRY)
   [<ffffffff811682c4>] __alloc_pages_nodemask+0x134/0x230
   [<ffffffff811b40c8>] alloc_pages_current+0x88/0x120
   [<ffffffff8115e386>] __page_cache_alloc+0xe6/0x120
   [<ffffffff8116ba6c>] __do_page_cache_readahead+0xdc/0x240
   [<ffffffff8116bd05>] ondemand_readahead+0x135/0x260
   [<ffffffff8116be9c>] page_cache_async_readahead+0x6c/0x70
   [<ffffffff811604c2>] generic_file_read_iter+0x3f2/0x760
   [<ffffffff811e0dc7>] __vfs_read+0xa7/0xd0
  page has been migrated, last migrate reason: compaction

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_owner.h |  9 +++++++++
 mm/debug.c                 |  2 ++
 mm/page_alloc.c            |  1 +
 mm/page_owner.c            | 25 +++++++++++++++++++++++++
 4 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 555893bf13d7..46f1b939948c 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -13,6 +13,7 @@ extern void __set_page_owner(struct page *page,
 extern gfp_t __get_page_owner_gfp(struct page *page);
 extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
 extern void __set_page_owner_migrate_reason(struct page *page, int reason);
+extern void __dump_page_owner(struct page *page);
 
 static inline void reset_page_owner(struct page *page, unsigned int order)
 {
@@ -44,6 +45,11 @@ static inline void set_page_owner_migrate_reason(struct page *page, int reason)
 	if (static_branch_unlikely(&page_owner_inited))
 		__set_page_owner_migrate_reason(page, reason);
 }
+static inline void dump_page_owner(struct page *page)
+{
+	if (static_branch_unlikely(&page_owner_inited))
+		__dump_page_owner(page);
+}
 #else
 static inline void reset_page_owner(struct page *page, unsigned int order)
 {
@@ -62,5 +68,8 @@ static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
 static inline void set_page_owner_migrate_reason(struct page *page, int reason)
 {
 }
+static inline void dump_page_owner(struct page *page)
+{
+}
 #endif /* CONFIG_PAGE_OWNER */
 #endif /* __LINUX_PAGE_OWNER_H */
diff --git a/mm/debug.c b/mm/debug.c
index 78dc54877075..61b1f1bb328e 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -11,6 +11,7 @@
 #include <linux/memcontrol.h>
 #include <trace/events/mmflags.h>
 #include <linux/migrate.h>
+#include <linux/page_owner.h>
 
 #include "internal.h"
 
@@ -67,6 +68,7 @@ void dump_page_badflags(struct page *page, const char *reason,
 void dump_page(struct page *page, const char *reason)
 {
 	dump_page_badflags(page, reason, 0);
+	dump_page_owner(page);
 }
 EXPORT_SYMBOL(dump_page);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 030fafccaa6b..d98672d33752 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -443,6 +443,7 @@ static void bad_page(struct page *page, const char *reason,
 	printk(KERN_ALERT "BUG: Bad page state in process %s  pfn:%05lx\n",
 		current->comm, page_to_pfn(page));
 	dump_page_badflags(page, reason, bad_flags);
+	dump_page_owner(page);
 
 	print_modules();
 	dump_stack();
diff --git a/mm/page_owner.c b/mm/page_owner.c
index a57068cfe52f..44ad1f00c4e1 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -183,6 +183,31 @@ err:
 	return -ENOMEM;
 }
 
+void __dump_page_owner(struct page *page)
+{
+	struct page_ext *page_ext = lookup_page_ext(page);
+	struct stack_trace trace = {
+		.nr_entries = page_ext->nr_entries,
+		.entries = &page_ext->trace_entries[0],
+	};
+	gfp_t gfp_mask = page_ext->gfp_mask;
+	int mt = gfpflags_to_migratetype(gfp_mask);
+
+	if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
+		pr_alert("page_owner info is not active (free page?)\n");
+		return;
+	}
+
+	pr_alert("page allocated via order %u, migratetype %s, "
+			"gfp_mask %#x(%pGg)\n", page_ext->order,
+			migratetype_names[mt], gfp_mask, &gfp_mask);
+	print_stack_trace(&trace, 0);
+
+	if (page_ext->last_migrate_reason != -1)
+		pr_alert("page has been migrated, last migrate reason: %s\n",
+			migrate_reason_names[page_ext->last_migrate_reason]);
+}
+
 static ssize_t
 read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
-- 
cgit v1.2.3


From ff8e81163889ac4c7f59e7f7db6377d0c5d8d69c Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 15 Mar 2016 14:56:24 -0700
Subject: mm, debug: move bad flags printing to bad_page()

Since bad_page() is the only user of the badflags parameter of
dump_page_badflags(), we can move the code to bad_page() and simplify a
bit.

The dump_page_badflags() function is renamed to __dump_page() and can
still be called separately from dump_page() for temporary debug prints
where page_owner info is not desired.

The only user-visible change is that page->mem_cgroup is printed before
the bad flags.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h |  3 +--
 mm/debug.c              | 10 +++-------
 mm/page_alloc.c         | 10 +++++++---
 3 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 053824b0a412..de7be78c6f0e 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -9,8 +9,7 @@ struct vm_area_struct;
 struct mm_struct;
 
 extern void dump_page(struct page *page, const char *reason);
-extern void dump_page_badflags(struct page *page, const char *reason,
-			       unsigned long badflags);
+extern void __dump_page(struct page *page, const char *reason);
 void dump_vma(const struct vm_area_struct *vma);
 void dump_mm(const struct mm_struct *mm);
 
diff --git a/mm/debug.c b/mm/debug.c
index 61b1f1bb328e..df7247b0b532 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -40,8 +40,7 @@ const struct trace_print_flags vmaflag_names[] = {
 	{0, NULL}
 };
 
-void dump_page_badflags(struct page *page, const char *reason,
-		unsigned long badflags)
+void __dump_page(struct page *page, const char *reason)
 {
 	pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx",
 		  page, atomic_read(&page->_count), page_mapcount(page),
@@ -50,15 +49,12 @@ void dump_page_badflags(struct page *page, const char *reason,
 		pr_cont(" compound_mapcount: %d", compound_mapcount(page));
 	pr_cont("\n");
 	BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1);
+
 	pr_emerg("flags: %#lx(%pGp)\n", page->flags, &page->flags);
 
 	if (reason)
 		pr_alert("page dumped because: %s\n", reason);
 
-	badflags &= page->flags;
-	if (badflags)
-		pr_alert("bad because of flags: %#lx(%pGp)\n", badflags,
-								&badflags);
 #ifdef CONFIG_MEMCG
 	if (page->mem_cgroup)
 		pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup);
@@ -67,7 +63,7 @@ void dump_page_badflags(struct page *page, const char *reason,
 
 void dump_page(struct page *page, const char *reason)
 {
-	dump_page_badflags(page, reason, 0);
+	__dump_page(page, reason);
 	dump_page_owner(page);
 }
 EXPORT_SYMBOL(dump_page);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d98672d33752..0691403aed93 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -430,7 +430,7 @@ static void bad_page(struct page *page, const char *reason,
 			goto out;
 		}
 		if (nr_unshown) {
-			printk(KERN_ALERT
+			pr_alert(
 			      "BUG: Bad page state: %lu messages suppressed\n",
 				nr_unshown);
 			nr_unshown = 0;
@@ -440,9 +440,13 @@ static void bad_page(struct page *page, const char *reason,
 	if (nr_shown++ == 0)
 		resume = jiffies + 60 * HZ;
 
-	printk(KERN_ALERT "BUG: Bad page state in process %s  pfn:%05lx\n",
+	pr_alert("BUG: Bad page state in process %s  pfn:%05lx\n",
 		current->comm, page_to_pfn(page));
-	dump_page_badflags(page, reason, bad_flags);
+	__dump_page(page, reason);
+	bad_flags &= page->flags;
+	if (bad_flags)
+		pr_alert("bad because of flags: %#lx(%pGp)\n",
+						bad_flags, &bad_flags);
 	dump_page_owner(page);
 
 	print_modules();
-- 
cgit v1.2.3


From 8823b1dbc05fab1a8bec275eeae4709257c2661d Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@fedoraproject.org>
Date: Tue, 15 Mar 2016 14:56:27 -0700
Subject: mm/page_poison.c: enable PAGE_POISONING as a separate option

Page poisoning is currently set up as a feature if architectures don't
have architecture debug page_alloc to allow unmapping of pages.  It has
uses apart from that though.  Clearing of the pages on free provides an
increase in security as it helps to limit the risk of information leaks.
Allow page poisoning to be enabled as a separate option independent of
kernel_map pages since the two features do separate work.  Because of
how hiberanation is implemented, the checks on alloc cannot occur if
hibernation is enabled.  The runtime alloc checks can also be enabled
with an option when !HIBERNATION.

Credit to Grsecurity/PaX team for inspiring this work

Signed-off-by: Laura Abbott <labbott@fedoraproject.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mathias Krause <minipli@googlemail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Jianyu Zhan <nasa4836@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |   5 ++
 include/linux/mm.h                  |   9 ++
 mm/Kconfig.debug                    |  25 +++++-
 mm/Makefile                         |   2 +-
 mm/debug-pagealloc.c                | 137 ----------------------------
 mm/page_alloc.c                     |   2 +
 mm/page_poison.c                    | 173 ++++++++++++++++++++++++++++++++++++
 7 files changed, 214 insertions(+), 139 deletions(-)
 delete mode 100644 mm/debug-pagealloc.c
 create mode 100644 mm/page_poison.c

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 208ae7287659..8e5abd640b0b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2731,6 +2731,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			we can turn it on.
 			on: enable the feature
 
+	page_poison=	[KNL] Boot-time parameter changing the state of
+			poisoning on the buddy allocator.
+			off: turn off poisoning
+			on: turn on poisoning
+
 	panic=		[KNL] Kernel behaviour on panic: delay <timeout>
 			timeout > 0: seconds before rebooting
 			timeout = 0: wait forever
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 69fd6bbb8cce..99dcc8f36e28 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2176,6 +2176,15 @@ extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
 			       unsigned long size, pte_fn_t fn, void *data);
 
 
+#ifdef CONFIG_PAGE_POISONING
+extern bool page_poisoning_enabled(void);
+extern void kernel_poison_pages(struct page *page, int numpages, int enable);
+#else
+static inline bool page_poisoning_enabled(void) { return false; }
+static inline void kernel_poison_pages(struct page *page, int numpages,
+					int enable) { }
+#endif
+
 #ifdef CONFIG_DEBUG_PAGEALLOC
 extern bool _debug_pagealloc_enabled;
 extern void __kernel_map_pages(struct page *page, int numpages, int enable);
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index a0c136af9c91..1f99f9a0deae 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -41,4 +41,27 @@ config DEBUG_PAGEALLOC_ENABLE_DEFAULT
 	  can be overridden by debug_pagealloc=off|on.
 
 config PAGE_POISONING
-	bool
+	bool "Poison pages after freeing"
+	select PAGE_EXTENSION
+	select PAGE_POISONING_NO_SANITY if HIBERNATION
+	---help---
+	  Fill the pages with poison patterns after free_pages() and verify
+	  the patterns before alloc_pages. The filling of the memory helps
+	  reduce the risk of information leaks from freed data. This does
+	  have a potential performance impact.
+
+	  Note that "poison" here is not the same thing as the "HWPoison"
+	  for CONFIG_MEMORY_FAILURE. This is software poisoning only.
+
+	  If unsure, say N
+
+config PAGE_POISONING_NO_SANITY
+	depends on PAGE_POISONING
+	bool "Only poison, don't sanity check"
+	---help---
+	   Skip the sanity checking on alloc, only fill the pages with
+	   poison on free. This reduces some of the overhead of the
+	   poisoning feature.
+
+	   If you are only interested in sanitization, say Y. Otherwise
+	   say N.
diff --git a/mm/Makefile b/mm/Makefile
index 2ed43191fc3b..cfdd481d27a5 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -48,7 +48,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
 obj-$(CONFIG_KSM) += ksm.o
-obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
+obj-$(CONFIG_PAGE_POISONING) += page_poison.o
 obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_SLUB) += slub.o
 obj-$(CONFIG_KMEMCHECK) += kmemcheck.o
diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c
deleted file mode 100644
index 5bf5906ce13b..000000000000
--- a/mm/debug-pagealloc.c
+++ /dev/null
@@ -1,137 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/page_ext.h>
-#include <linux/poison.h>
-#include <linux/ratelimit.h>
-
-static bool page_poisoning_enabled __read_mostly;
-
-static bool need_page_poisoning(void)
-{
-	if (!debug_pagealloc_enabled())
-		return false;
-
-	return true;
-}
-
-static void init_page_poisoning(void)
-{
-	if (!debug_pagealloc_enabled())
-		return;
-
-	page_poisoning_enabled = true;
-}
-
-struct page_ext_operations page_poisoning_ops = {
-	.need = need_page_poisoning,
-	.init = init_page_poisoning,
-};
-
-static inline void set_page_poison(struct page *page)
-{
-	struct page_ext *page_ext;
-
-	page_ext = lookup_page_ext(page);
-	__set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
-}
-
-static inline void clear_page_poison(struct page *page)
-{
-	struct page_ext *page_ext;
-
-	page_ext = lookup_page_ext(page);
-	__clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
-}
-
-static inline bool page_poison(struct page *page)
-{
-	struct page_ext *page_ext;
-
-	page_ext = lookup_page_ext(page);
-	return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
-}
-
-static void poison_page(struct page *page)
-{
-	void *addr = kmap_atomic(page);
-
-	set_page_poison(page);
-	memset(addr, PAGE_POISON, PAGE_SIZE);
-	kunmap_atomic(addr);
-}
-
-static void poison_pages(struct page *page, int n)
-{
-	int i;
-
-	for (i = 0; i < n; i++)
-		poison_page(page + i);
-}
-
-static bool single_bit_flip(unsigned char a, unsigned char b)
-{
-	unsigned char error = a ^ b;
-
-	return error && !(error & (error - 1));
-}
-
-static void check_poison_mem(unsigned char *mem, size_t bytes)
-{
-	static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 10);
-	unsigned char *start;
-	unsigned char *end;
-
-	start = memchr_inv(mem, PAGE_POISON, bytes);
-	if (!start)
-		return;
-
-	for (end = mem + bytes - 1; end > start; end--) {
-		if (*end != PAGE_POISON)
-			break;
-	}
-
-	if (!__ratelimit(&ratelimit))
-		return;
-	else if (start == end && single_bit_flip(*start, PAGE_POISON))
-		printk(KERN_ERR "pagealloc: single bit error\n");
-	else
-		printk(KERN_ERR "pagealloc: memory corruption\n");
-
-	print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, start,
-			end - start + 1, 1);
-	dump_stack();
-}
-
-static void unpoison_page(struct page *page)
-{
-	void *addr;
-
-	if (!page_poison(page))
-		return;
-
-	addr = kmap_atomic(page);
-	check_poison_mem(addr, PAGE_SIZE);
-	clear_page_poison(page);
-	kunmap_atomic(addr);
-}
-
-static void unpoison_pages(struct page *page, int n)
-{
-	int i;
-
-	for (i = 0; i < n; i++)
-		unpoison_page(page + i);
-}
-
-void __kernel_map_pages(struct page *page, int numpages, int enable)
-{
-	if (!page_poisoning_enabled)
-		return;
-
-	if (enable)
-		unpoison_pages(page, numpages);
-	else
-		poison_pages(page, numpages);
-}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0691403aed93..2a08349fbab2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1025,6 +1025,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
 					   PAGE_SIZE << order);
 	}
 	arch_free_page(page, order);
+	kernel_poison_pages(page, 1 << order, 0);
 	kernel_map_pages(page, 1 << order, 0);
 
 	return true;
@@ -1420,6 +1421,7 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
 
 	arch_alloc_page(page, order);
 	kernel_map_pages(page, 1 << order, 1);
+	kernel_poison_pages(page, 1 << order, 1);
 	kasan_alloc_pages(page, order);
 
 	if (gfp_flags & __GFP_ZERO)
diff --git a/mm/page_poison.c b/mm/page_poison.c
new file mode 100644
index 000000000000..89d3bc773633
--- /dev/null
+++ b/mm/page_poison.c
@@ -0,0 +1,173 @@
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/page_ext.h>
+#include <linux/poison.h>
+#include <linux/ratelimit.h>
+
+static bool __page_poisoning_enabled __read_mostly;
+static bool want_page_poisoning __read_mostly;
+
+static int early_page_poison_param(char *buf)
+{
+	if (!buf)
+		return -EINVAL;
+
+	if (strcmp(buf, "on") == 0)
+		want_page_poisoning = true;
+	else if (strcmp(buf, "off") == 0)
+		want_page_poisoning = false;
+
+	return 0;
+}
+early_param("page_poison", early_page_poison_param);
+
+bool page_poisoning_enabled(void)
+{
+	return __page_poisoning_enabled;
+}
+
+static bool need_page_poisoning(void)
+{
+	return want_page_poisoning;
+}
+
+static void init_page_poisoning(void)
+{
+	/*
+	 * page poisoning is debug page alloc for some arches. If either
+	 * of those options are enabled, enable poisoning
+	 */
+	if (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC)) {
+		if (!want_page_poisoning && !debug_pagealloc_enabled())
+			return;
+	} else {
+		if (!want_page_poisoning)
+			return;
+	}
+
+	__page_poisoning_enabled = true;
+}
+
+struct page_ext_operations page_poisoning_ops = {
+	.need = need_page_poisoning,
+	.init = init_page_poisoning,
+};
+
+static inline void set_page_poison(struct page *page)
+{
+	struct page_ext *page_ext;
+
+	page_ext = lookup_page_ext(page);
+	__set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
+}
+
+static inline void clear_page_poison(struct page *page)
+{
+	struct page_ext *page_ext;
+
+	page_ext = lookup_page_ext(page);
+	__clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
+}
+
+static inline bool page_poison(struct page *page)
+{
+	struct page_ext *page_ext;
+
+	page_ext = lookup_page_ext(page);
+	return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
+}
+
+static void poison_page(struct page *page)
+{
+	void *addr = kmap_atomic(page);
+
+	set_page_poison(page);
+	memset(addr, PAGE_POISON, PAGE_SIZE);
+	kunmap_atomic(addr);
+}
+
+static void poison_pages(struct page *page, int n)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		poison_page(page + i);
+}
+
+static bool single_bit_flip(unsigned char a, unsigned char b)
+{
+	unsigned char error = a ^ b;
+
+	return error && !(error & (error - 1));
+}
+
+static void check_poison_mem(unsigned char *mem, size_t bytes)
+{
+	static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 10);
+	unsigned char *start;
+	unsigned char *end;
+
+	if (IS_ENABLED(CONFIG_PAGE_POISONING_NO_SANITY))
+		return;
+
+	start = memchr_inv(mem, PAGE_POISON, bytes);
+	if (!start)
+		return;
+
+	for (end = mem + bytes - 1; end > start; end--) {
+		if (*end != PAGE_POISON)
+			break;
+	}
+
+	if (!__ratelimit(&ratelimit))
+		return;
+	else if (start == end && single_bit_flip(*start, PAGE_POISON))
+		pr_err("pagealloc: single bit error\n");
+	else
+		pr_err("pagealloc: memory corruption\n");
+
+	print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, start,
+			end - start + 1, 1);
+	dump_stack();
+}
+
+static void unpoison_page(struct page *page)
+{
+	void *addr;
+
+	if (!page_poison(page))
+		return;
+
+	addr = kmap_atomic(page);
+	check_poison_mem(addr, PAGE_SIZE);
+	clear_page_poison(page);
+	kunmap_atomic(addr);
+}
+
+static void unpoison_pages(struct page *page, int n)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		unpoison_page(page + i);
+}
+
+void kernel_poison_pages(struct page *page, int numpages, int enable)
+{
+	if (!page_poisoning_enabled())
+		return;
+
+	if (enable)
+		unpoison_pages(page, numpages);
+	else
+		poison_pages(page, numpages);
+}
+
+#ifndef CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	/* This function does nothing, all work is done via poison pages */
+}
+#endif
-- 
cgit v1.2.3


From 1414c7f4f7d72d138fff35f00151d15749b5beda Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@fedoraproject.org>
Date: Tue, 15 Mar 2016 14:56:30 -0700
Subject: mm/page_poisoning.c: allow for zero poisoning

By default, page poisoning uses a poison value (0xaa) on free.  If this
is changed to 0, the page is not only sanitized but zeroing on alloc
with __GFP_ZERO can be skipped as well.  The tradeoff is that detecting
corruption from the poisoning is harder to detect.  This feature also
cannot be used with hibernation since pages are not guaranteed to be
zeroed after hibernation.

Credit to Grsecurity/PaX team for inspiring this work

Signed-off-by: Laura Abbott <labbott@fedoraproject.org>
Acked-by: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mathias Krause <minipli@googlemail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Jianyu Zhan <nasa4836@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h       |  2 ++
 include/linux/poison.h   |  4 ++++
 kernel/power/hibernate.c | 17 +++++++++++++++++
 mm/Kconfig.debug         | 14 ++++++++++++++
 mm/page_alloc.c          | 11 ++++++++++-
 mm/page_ext.c            | 10 ++++++++--
 mm/page_poison.c         |  7 +++++--
 7 files changed, 60 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 99dcc8f36e28..b97243d6aa49 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2179,10 +2179,12 @@ extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
 #ifdef CONFIG_PAGE_POISONING
 extern bool page_poisoning_enabled(void);
 extern void kernel_poison_pages(struct page *page, int numpages, int enable);
+extern bool page_is_poisoned(struct page *page);
 #else
 static inline bool page_poisoning_enabled(void) { return false; }
 static inline void kernel_poison_pages(struct page *page, int numpages,
 					int enable) { }
+static inline bool page_is_poisoned(struct page *page) { return false; }
 #endif
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 4a27153574e2..51334edec506 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -30,7 +30,11 @@
 #define TIMER_ENTRY_STATIC	((void *) 0x300 + POISON_POINTER_DELTA)
 
 /********** mm/debug-pagealloc.c **********/
+#ifdef CONFIG_PAGE_POISONING_ZERO
+#define PAGE_POISON 0x00
+#else
 #define PAGE_POISON 0xaa
+#endif
 
 /********** mm/page_alloc.c ************/
 
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index b7342a24f559..aa0f26b58426 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -1158,6 +1158,22 @@ static int __init kaslr_nohibernate_setup(char *str)
 	return nohibernate_setup(str);
 }
 
+static int __init page_poison_nohibernate_setup(char *str)
+{
+#ifdef CONFIG_PAGE_POISONING_ZERO
+	/*
+	 * The zeroing option for page poison skips the checks on alloc.
+	 * since hibernation doesn't save free pages there's no way to
+	 * guarantee the pages will still be zeroed.
+	 */
+	if (!strcmp(str, "on")) {
+		pr_info("Disabling hibernation due to page poisoning\n");
+		return nohibernate_setup(str);
+	}
+#endif
+	return 1;
+}
+
 __setup("noresume", noresume_setup);
 __setup("resume_offset=", resume_offset_setup);
 __setup("resume=", resume_setup);
@@ -1166,3 +1182,4 @@ __setup("resumewait", resumewait_setup);
 __setup("resumedelay=", resumedelay_setup);
 __setup("nohibernate", nohibernate_setup);
 __setup("kaslr", kaslr_nohibernate_setup);
+__setup("page_poison=", page_poison_nohibernate_setup);
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 1f99f9a0deae..5c50b238b770 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -65,3 +65,17 @@ config PAGE_POISONING_NO_SANITY
 
 	   If you are only interested in sanitization, say Y. Otherwise
 	   say N.
+
+config PAGE_POISONING_ZERO
+	bool "Use zero for poisoning instead of random data"
+	depends on PAGE_POISONING
+	---help---
+	   Instead of using the existing poison value, fill the pages with
+	   zeros. This makes it harder to detect when errors are occurring
+	   due to sanitization but the zeroing at free means that it is
+	   no longer necessary to write zeros when GFP_ZERO is used on
+	   allocation.
+
+	   Enabling page poisoning with this option will disable hibernation
+
+	   If unsure, say N
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2a08349fbab2..50897dcaefdb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1405,15 +1405,24 @@ static inline int check_new_page(struct page *page)
 	return 0;
 }
 
+static inline bool free_pages_prezeroed(bool poisoned)
+{
+	return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) &&
+		page_poisoning_enabled() && poisoned;
+}
+
 static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
 								int alloc_flags)
 {
 	int i;
+	bool poisoned = true;
 
 	for (i = 0; i < (1 << order); i++) {
 		struct page *p = page + i;
 		if (unlikely(check_new_page(p)))
 			return 1;
+		if (poisoned)
+			poisoned &= page_is_poisoned(p);
 	}
 
 	set_page_private(page, 0);
@@ -1424,7 +1433,7 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
 	kernel_poison_pages(page, 1 << order, 1);
 	kasan_alloc_pages(page, order);
 
-	if (gfp_flags & __GFP_ZERO)
+	if (!free_pages_prezeroed(poisoned) && (gfp_flags & __GFP_ZERO))
 		for (i = 0; i < (1 << order); i++)
 			clear_highpage(page + i);
 
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 292ca7b8debd..2d864e64f7fe 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -106,12 +106,15 @@ struct page_ext *lookup_page_ext(struct page *page)
 	struct page_ext *base;
 
 	base = NODE_DATA(page_to_nid(page))->node_page_ext;
-#ifdef CONFIG_DEBUG_VM
+#if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING)
 	/*
 	 * The sanity checks the page allocator does upon freeing a
 	 * page can reach here before the page_ext arrays are
 	 * allocated when feeding a range of pages to the allocator
 	 * for the first time during bootup or memory hotplug.
+	 *
+	 * This check is also necessary for ensuring page poisoning
+	 * works as expected when enabled
 	 */
 	if (unlikely(!base))
 		return NULL;
@@ -180,12 +183,15 @@ struct page_ext *lookup_page_ext(struct page *page)
 {
 	unsigned long pfn = page_to_pfn(page);
 	struct mem_section *section = __pfn_to_section(pfn);
-#ifdef CONFIG_DEBUG_VM
+#if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING)
 	/*
 	 * The sanity checks the page allocator does upon freeing a
 	 * page can reach here before the page_ext arrays are
 	 * allocated when feeding a range of pages to the allocator
 	 * for the first time during bootup or memory hotplug.
+	 *
+	 * This check is also necessary for ensuring page poisoning
+	 * works as expected when enabled
 	 */
 	if (!section->page_ext)
 		return NULL;
diff --git a/mm/page_poison.c b/mm/page_poison.c
index 89d3bc773633..479e7ea2bea6 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -71,11 +71,14 @@ static inline void clear_page_poison(struct page *page)
 	__clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
 }
 
-static inline bool page_poison(struct page *page)
+bool page_is_poisoned(struct page *page)
 {
 	struct page_ext *page_ext;
 
 	page_ext = lookup_page_ext(page);
+	if (!page_ext)
+		return false;
+
 	return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
 }
 
@@ -137,7 +140,7 @@ static void unpoison_page(struct page *page)
 {
 	void *addr;
 
-	if (!page_poison(page))
+	if (!page_is_poisoned(page))
 		return;
 
 	addr = kmap_atomic(page);
-- 
cgit v1.2.3


From 31bc3858ea3ebcc3157b3f5f0e624c5962f5a7a6 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 15 Mar 2016 14:56:48 -0700
Subject: memory-hotplug: add automatic onlining policy for the newly added
 memory

Currently, all newly added memory blocks remain in 'offline' state
unless someone onlines them, some linux distributions carry special udev
rules like:

  SUBSYSTEM=="memory", ACTION=="add", ATTR{state}=="offline", ATTR{state}="online"

to make this happen automatically.  This is not a great solution for
virtual machines where memory hotplug is being used to address high
memory pressure situations as such onlining is slow and a userspace
process doing this (udev) has a chance of being killed by the OOM killer
as it will probably require to allocate some memory.

Introduce default policy for the newly added memory blocks in
/sys/devices/system/memory/auto_online_blocks file with two possible
values: "offline" which preserves the current behavior and "online"
which causes all newly added memory blocks to go online as soon as
they're added.  The default is "offline".

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Daniel Kiper <daniel.kiper@oracle.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Daniel Kiper <daniel.kiper@oracle.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Kay Sievers <kay@vrfy.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/memory-hotplug.txt | 23 ++++++++++++++++++++---
 drivers/base/memory.c            | 34 +++++++++++++++++++++++++++++++++-
 drivers/xen/balloon.c            |  2 +-
 include/linux/memory.h           |  3 +++
 include/linux/memory_hotplug.h   |  4 +++-
 mm/memory_hotplug.c              | 17 +++++++++++++++--
 6 files changed, 75 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index ce2cfcf35c27..443f4b44ad97 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -256,10 +256,27 @@ If the memory block is offline, you'll read "offline".
 
 5.2. How to online memory
 ------------
-Even if the memory is hot-added, it is not at ready-to-use state.
-For using newly added memory, you have to "online" the memory block.
+When the memory is hot-added, the kernel decides whether or not to "online"
+it according to the policy which can be read from "auto_online_blocks" file:
 
-For onlining, you have to write "online" to the memory block's state file as:
+% cat /sys/devices/system/memory/auto_online_blocks
+
+The default is "offline" which means the newly added memory is not in a
+ready-to-use state and you have to "online" the newly added memory blocks
+manually. Automatic onlining can be requested by writing "online" to
+"auto_online_blocks" file:
+
+% echo online > /sys/devices/system/memory/auto_online_blocks
+
+This sets a global policy and impacts all memory blocks that will subsequently
+be hotplugged. Currently offline blocks keep their state. It is possible, under
+certain circumstances, that some memory blocks will be added but will fail to
+online. User space tools can check their "state" files
+(/sys/devices/system/memory/memoryXXX/state) and try to online them manually.
+
+If the automatic onlining wasn't requested, failed, or some memory block was
+offlined it is possible to change the individual block's state by writing to the
+"state" file:
 
 % echo online > /sys/devices/system/memory/memoryXXX/state
 
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 213456c2b123..f46dba8b7092 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -251,7 +251,7 @@ memory_block_action(unsigned long phys_index, unsigned long action, int online_t
 	return ret;
 }
 
-static int memory_block_change_state(struct memory_block *mem,
+int memory_block_change_state(struct memory_block *mem,
 		unsigned long to_state, unsigned long from_state_req)
 {
 	int ret = 0;
@@ -438,6 +438,37 @@ print_block_size(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR(block_size_bytes, 0444, print_block_size, NULL);
 
+/*
+ * Memory auto online policy.
+ */
+
+static ssize_t
+show_auto_online_blocks(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	if (memhp_auto_online)
+		return sprintf(buf, "online\n");
+	else
+		return sprintf(buf, "offline\n");
+}
+
+static ssize_t
+store_auto_online_blocks(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count)
+{
+	if (sysfs_streq(buf, "online"))
+		memhp_auto_online = true;
+	else if (sysfs_streq(buf, "offline"))
+		memhp_auto_online = false;
+	else
+		return -EINVAL;
+
+	return count;
+}
+
+static DEVICE_ATTR(auto_online_blocks, 0644, show_auto_online_blocks,
+		   store_auto_online_blocks);
+
 /*
  * Some architectures will have custom drivers to do this, and
  * will not need to do it from userspace.  The fake hot-add code
@@ -746,6 +777,7 @@ static struct attribute *memory_root_attrs[] = {
 #endif
 
 	&dev_attr_block_size_bytes.attr,
+	&dev_attr_auto_online_blocks.attr,
 	NULL
 };
 
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index dc4305b407bf..e6058debd01b 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -338,7 +338,7 @@ static enum bp_state reserve_additional_memory(void)
 	}
 #endif
 
-	rc = add_memory_resource(nid, resource);
+	rc = add_memory_resource(nid, resource, false);
 	if (rc) {
 		pr_warn("Cannot add additional memory (%i)\n", rc);
 		goto err;
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 8b8d8d12348e..82730adba950 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -109,6 +109,9 @@ extern void unregister_memory_notifier(struct notifier_block *nb);
 extern int register_memory_isolate_notifier(struct notifier_block *nb);
 extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
 extern int register_new_memory(int, struct mem_section *);
+extern int memory_block_change_state(struct memory_block *mem,
+				     unsigned long to_state,
+				     unsigned long from_state_req);
 #ifdef CONFIG_MEMORY_HOTREMOVE
 extern int unregister_memory_section(struct mem_section *);
 #endif
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 43405992d027..769d76870550 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -99,6 +99,8 @@ extern void __online_page_free(struct page *page);
 
 extern int try_online_node(int nid);
 
+extern bool memhp_auto_online;
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 extern bool is_pageblock_removable_nolock(struct page *page);
 extern int arch_remove_memory(u64 start, u64 size);
@@ -267,7 +269,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
-extern int add_memory_resource(int nid, struct resource *resource);
+extern int add_memory_resource(int nid, struct resource *resource, bool online);
 extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
 		bool for_device);
 extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 979b18cbd343..484e86761b3e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -77,6 +77,9 @@ static struct {
 #define memhp_lock_acquire()      lock_map_acquire(&mem_hotplug.dep_map)
 #define memhp_lock_release()      lock_map_release(&mem_hotplug.dep_map)
 
+bool memhp_auto_online;
+EXPORT_SYMBOL_GPL(memhp_auto_online);
+
 void get_online_mems(void)
 {
 	might_sleep();
@@ -1261,8 +1264,13 @@ int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
 	return zone_default;
 }
 
+static int online_memory_block(struct memory_block *mem, void *arg)
+{
+	return memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
+}
+
 /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
-int __ref add_memory_resource(int nid, struct resource *res)
+int __ref add_memory_resource(int nid, struct resource *res, bool online)
 {
 	u64 start, size;
 	pg_data_t *pgdat = NULL;
@@ -1322,6 +1330,11 @@ int __ref add_memory_resource(int nid, struct resource *res)
 	/* create new memmap entry */
 	firmware_map_add_hotplug(start, start + size, "System RAM");
 
+	/* online pages if requested */
+	if (online)
+		walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1),
+				  NULL, online_memory_block);
+
 	goto out;
 
 error:
@@ -1345,7 +1358,7 @@ int __ref add_memory(int nid, u64 start, u64 size)
 	if (IS_ERR(res))
 		return PTR_ERR(res);
 
-	ret = add_memory_resource(nid, res);
+	ret = add_memory_resource(nid, res, memhp_auto_online);
 	if (ret < 0)
 		release_memory_resource(res);
 	return ret;
-- 
cgit v1.2.3


From 81f8c3a461d16f0355ced3d56d6d1bb5923207a1 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 15 Mar 2016 14:57:04 -0700
Subject: mm: memcontrol: generalize locking for the page->mem_cgroup binding

These patches tag the page cache radix tree eviction entries with the
memcg an evicted page belonged to, thus making per-cgroup LRU reclaim
work properly and be as adaptive to new cache workingsets as global
reclaim already is.

This should have been part of the original thrash detection patch
series, but was deferred due to the complexity of those patches.

This patch (of 5):

So far the only sites that needed to exclude charge migration to
stabilize page->mem_cgroup have been per-cgroup page statistics, hence
the name mem_cgroup_begin_page_stat().  But per-cgroup thrash detection
will add another site that needs to ensure page->mem_cgroup lifetime.

Rename these locking functions to the more generic lock_page_memcg() and
unlock_page_memcg().  Since charge migration is a cgroup1 feature only,
we might be able to delete it at some point, and these now easy to
identify locking sites along with it.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Suggested-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c                | 14 +++++++-------
 fs/xfs/xfs_aops.c          |  8 ++++----
 include/linux/memcontrol.h | 16 +++++++++++-----
 mm/filemap.c               | 12 ++++++------
 mm/memcontrol.c            | 34 ++++++++++++++--------------------
 mm/page-writeback.c        | 28 ++++++++++++++--------------
 mm/rmap.c                  |  8 ++++----
 mm/truncate.c              |  6 +++---
 mm/vmscan.c                |  8 ++++----
 9 files changed, 67 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index e1632abb4ca9..dc991510bb06 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -621,7 +621,7 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
  * If warn is true, then emit a warning if the page is not uptodate and has
  * not been truncated.
  *
- * The caller must hold mem_cgroup_begin_page_stat() lock.
+ * The caller must hold lock_page_memcg().
  */
 static void __set_page_dirty(struct page *page, struct address_space *mapping,
 			     struct mem_cgroup *memcg, int warn)
@@ -683,17 +683,17 @@ int __set_page_dirty_buffers(struct page *page)
 		} while (bh != head);
 	}
 	/*
-	 * Use mem_group_begin_page_stat() to keep PageDirty synchronized with
-	 * per-memcg dirty page counters.
+	 * Lock out page->mem_cgroup migration to keep PageDirty
+	 * synchronized with per-memcg dirty page counters.
 	 */
-	memcg = mem_cgroup_begin_page_stat(page);
+	memcg = lock_page_memcg(page);
 	newly_dirty = !TestSetPageDirty(page);
 	spin_unlock(&mapping->private_lock);
 
 	if (newly_dirty)
 		__set_page_dirty(page, mapping, memcg, 1);
 
-	mem_cgroup_end_page_stat(memcg);
+	unlock_page_memcg(memcg);
 
 	if (newly_dirty)
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -1169,13 +1169,13 @@ void mark_buffer_dirty(struct buffer_head *bh)
 		struct address_space *mapping = NULL;
 		struct mem_cgroup *memcg;
 
-		memcg = mem_cgroup_begin_page_stat(page);
+		memcg = lock_page_memcg(page);
 		if (!TestSetPageDirty(page)) {
 			mapping = page_mapping(page);
 			if (mapping)
 				__set_page_dirty(page, mapping, memcg, 0);
 		}
-		mem_cgroup_end_page_stat(memcg);
+		unlock_page_memcg(memcg);
 		if (mapping)
 			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 	}
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a9ebabfe7587..5f85ebc52a98 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1978,10 +1978,10 @@ xfs_vm_set_page_dirty(
 		} while (bh != head);
 	}
 	/*
-	 * Use mem_group_begin_page_stat() to keep PageDirty synchronized with
-	 * per-memcg dirty page counters.
+	 * Lock out page->mem_cgroup migration to keep PageDirty
+	 * synchronized with per-memcg dirty page counters.
 	 */
-	memcg = mem_cgroup_begin_page_stat(page);
+	memcg = lock_page_memcg(page);
 	newly_dirty = !TestSetPageDirty(page);
 	spin_unlock(&mapping->private_lock);
 
@@ -1998,7 +1998,7 @@ xfs_vm_set_page_dirty(
 		}
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 	}
-	mem_cgroup_end_page_stat(memcg);
+	unlock_page_memcg(memcg);
 	if (newly_dirty)
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 	return newly_dirty;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 30b02e79610e..8502fd4144eb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -429,8 +429,8 @@ bool mem_cgroup_oom_synchronize(bool wait);
 extern int do_swap_account;
 #endif
 
-struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page);
-void mem_cgroup_end_page_stat(struct mem_cgroup *memcg);
+struct mem_cgroup *lock_page_memcg(struct page *page);
+void unlock_page_memcg(struct mem_cgroup *memcg);
 
 /**
  * mem_cgroup_update_page_stat - update page state statistics
@@ -438,7 +438,13 @@ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg);
  * @idx: page state item to account
  * @val: number of pages (positive or negative)
  *
- * See mem_cgroup_begin_page_stat() for locking requirements.
+ * Callers must use lock_page_memcg() to prevent double accounting
+ * when the page is concurrently being moved to another memcg:
+ *
+ *   memcg = lock_page_memcg(page);
+ *   if (TestClearPageState(page))
+ *     mem_cgroup_update_page_stat(memcg, state, -1);
+ *   unlock_page_memcg(memcg);
  */
 static inline void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
 				 enum mem_cgroup_stat_index idx, int val)
@@ -613,12 +619,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 {
 }
 
-static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page)
+static inline struct mem_cgroup *lock_page_memcg(struct page *page)
 {
 	return NULL;
 }
 
-static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
+static inline void unlock_page_memcg(struct mem_cgroup *memcg)
 {
 }
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 4a0f5fa79dbd..ee8140cf935d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -101,7 +101,7 @@
  *    ->tree_lock		(page_remove_rmap->set_page_dirty)
  *    bdi.wb->list_lock		(page_remove_rmap->set_page_dirty)
  *    ->inode->i_lock		(page_remove_rmap->set_page_dirty)
- *    ->memcg->move_lock	(page_remove_rmap->mem_cgroup_begin_page_stat)
+ *    ->memcg->move_lock	(page_remove_rmap->lock_page_memcg)
  *    bdi.wb->list_lock		(zap_pte_range->set_page_dirty)
  *    ->inode->i_lock		(zap_pte_range->set_page_dirty)
  *    ->private_lock		(zap_pte_range->__set_page_dirty_buffers)
@@ -177,7 +177,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
  * Delete a page from the page cache and free it. Caller has to make
  * sure the page is locked and that nobody else uses it - or that usage
  * is safe.  The caller must hold the mapping's tree_lock and
- * mem_cgroup_begin_page_stat().
+ * lock_page_memcg().
  */
 void __delete_from_page_cache(struct page *page, void *shadow,
 			      struct mem_cgroup *memcg)
@@ -263,11 +263,11 @@ void delete_from_page_cache(struct page *page)
 
 	freepage = mapping->a_ops->freepage;
 
-	memcg = mem_cgroup_begin_page_stat(page);
+	memcg = lock_page_memcg(page);
 	spin_lock_irqsave(&mapping->tree_lock, flags);
 	__delete_from_page_cache(page, NULL, memcg);
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
-	mem_cgroup_end_page_stat(memcg);
+	unlock_page_memcg(memcg);
 
 	if (freepage)
 		freepage(page);
@@ -561,7 +561,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		new->mapping = mapping;
 		new->index = offset;
 
-		memcg = mem_cgroup_begin_page_stat(old);
+		memcg = lock_page_memcg(old);
 		spin_lock_irqsave(&mapping->tree_lock, flags);
 		__delete_from_page_cache(old, NULL, memcg);
 		error = radix_tree_insert(&mapping->page_tree, offset, new);
@@ -576,7 +576,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		if (PageSwapBacked(new))
 			__inc_zone_page_state(new, NR_SHMEM);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-		mem_cgroup_end_page_stat(memcg);
+		unlock_page_memcg(memcg);
 		mem_cgroup_replace_page(old, new);
 		radix_tree_preload_end();
 		if (freepage)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d06cae2de783..953f0f984392 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1709,19 +1709,13 @@ cleanup:
 }
 
 /**
- * mem_cgroup_begin_page_stat - begin a page state statistics transaction
- * @page: page that is going to change accounted state
- *
- * This function must mark the beginning of an accounted page state
- * change to prevent double accounting when the page is concurrently
- * being moved to another memcg:
+ * lock_page_memcg - lock a page->mem_cgroup binding
+ * @page: the page
  *
- *   memcg = mem_cgroup_begin_page_stat(page);
- *   if (TestClearPageState(page))
- *     mem_cgroup_update_page_stat(memcg, state, -1);
- *   mem_cgroup_end_page_stat(memcg);
+ * This function protects unlocked LRU pages from being moved to
+ * another cgroup and stabilizes their page->mem_cgroup binding.
  */
-struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page)
+struct mem_cgroup *lock_page_memcg(struct page *page)
 {
 	struct mem_cgroup *memcg;
 	unsigned long flags;
@@ -1759,20 +1753,20 @@ again:
 	/*
 	 * When charge migration first begins, we can have locked and
 	 * unlocked page stat updates happening concurrently.  Track
-	 * the task who has the lock for mem_cgroup_end_page_stat().
+	 * the task who has the lock for unlock_page_memcg().
 	 */
 	memcg->move_lock_task = current;
 	memcg->move_lock_flags = flags;
 
 	return memcg;
 }
-EXPORT_SYMBOL(mem_cgroup_begin_page_stat);
+EXPORT_SYMBOL(lock_page_memcg);
 
 /**
- * mem_cgroup_end_page_stat - finish a page state statistics transaction
- * @memcg: the memcg that was accounted against
+ * unlock_page_memcg - unlock a page->mem_cgroup binding
+ * @memcg: the memcg returned by lock_page_memcg()
  */
-void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
+void unlock_page_memcg(struct mem_cgroup *memcg)
 {
 	if (memcg && memcg->move_lock_task == current) {
 		unsigned long flags = memcg->move_lock_flags;
@@ -1785,7 +1779,7 @@ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
 
 	rcu_read_unlock();
 }
-EXPORT_SYMBOL(mem_cgroup_end_page_stat);
+EXPORT_SYMBOL(unlock_page_memcg);
 
 /*
  * size of first charge trial. "32" comes from vmscan.c's magic value.
@@ -4923,9 +4917,9 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
 
 	lru_add_drain_all();
 	/*
-	 * Signal mem_cgroup_begin_page_stat() to take the memcg's
-	 * move_lock while we're moving its pages to another memcg.
-	 * Then wait for already started RCU-only updates to finish.
+	 * Signal lock_page_memcg() to take the memcg's move_lock
+	 * while we're moving its pages to another memcg. Then wait
+	 * for already started RCU-only updates to finish.
 	 */
 	atomic_inc(&mc.from->moving_account);
 	synchronize_rcu();
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d782cbab735a..2b5ea1271e32 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2410,7 +2410,7 @@ int __set_page_dirty_no_writeback(struct page *page)
 /*
  * Helper function for set_page_dirty family.
  *
- * Caller must hold mem_cgroup_begin_page_stat().
+ * Caller must hold lock_page_memcg().
  *
  * NOTE: This relies on being atomic wrt interrupts.
  */
@@ -2442,7 +2442,7 @@ EXPORT_SYMBOL(account_page_dirtied);
 /*
  * Helper function for deaccounting dirty page without writeback.
  *
- * Caller must hold mem_cgroup_begin_page_stat().
+ * Caller must hold lock_page_memcg().
  */
 void account_page_cleaned(struct page *page, struct address_space *mapping,
 			  struct mem_cgroup *memcg, struct bdi_writeback *wb)
@@ -2471,13 +2471,13 @@ int __set_page_dirty_nobuffers(struct page *page)
 {
 	struct mem_cgroup *memcg;
 
-	memcg = mem_cgroup_begin_page_stat(page);
+	memcg = lock_page_memcg(page);
 	if (!TestSetPageDirty(page)) {
 		struct address_space *mapping = page_mapping(page);
 		unsigned long flags;
 
 		if (!mapping) {
-			mem_cgroup_end_page_stat(memcg);
+			unlock_page_memcg(memcg);
 			return 1;
 		}
 
@@ -2488,7 +2488,7 @@ int __set_page_dirty_nobuffers(struct page *page)
 		radix_tree_tag_set(&mapping->page_tree, page_index(page),
 				   PAGECACHE_TAG_DIRTY);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-		mem_cgroup_end_page_stat(memcg);
+		unlock_page_memcg(memcg);
 
 		if (mapping->host) {
 			/* !PageAnon && !swapper_space */
@@ -2496,7 +2496,7 @@ int __set_page_dirty_nobuffers(struct page *page)
 		}
 		return 1;
 	}
-	mem_cgroup_end_page_stat(memcg);
+	unlock_page_memcg(memcg);
 	return 0;
 }
 EXPORT_SYMBOL(__set_page_dirty_nobuffers);
@@ -2629,14 +2629,14 @@ void cancel_dirty_page(struct page *page)
 		struct mem_cgroup *memcg;
 		bool locked;
 
-		memcg = mem_cgroup_begin_page_stat(page);
+		memcg = lock_page_memcg(page);
 		wb = unlocked_inode_to_wb_begin(inode, &locked);
 
 		if (TestClearPageDirty(page))
 			account_page_cleaned(page, mapping, memcg, wb);
 
 		unlocked_inode_to_wb_end(inode, locked);
-		mem_cgroup_end_page_stat(memcg);
+		unlock_page_memcg(memcg);
 	} else {
 		ClearPageDirty(page);
 	}
@@ -2705,7 +2705,7 @@ int clear_page_dirty_for_io(struct page *page)
 		 * always locked coming in here, so we get the desired
 		 * exclusion.
 		 */
-		memcg = mem_cgroup_begin_page_stat(page);
+		memcg = lock_page_memcg(page);
 		wb = unlocked_inode_to_wb_begin(inode, &locked);
 		if (TestClearPageDirty(page)) {
 			mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
@@ -2714,7 +2714,7 @@ int clear_page_dirty_for_io(struct page *page)
 			ret = 1;
 		}
 		unlocked_inode_to_wb_end(inode, locked);
-		mem_cgroup_end_page_stat(memcg);
+		unlock_page_memcg(memcg);
 		return ret;
 	}
 	return TestClearPageDirty(page);
@@ -2727,7 +2727,7 @@ int test_clear_page_writeback(struct page *page)
 	struct mem_cgroup *memcg;
 	int ret;
 
-	memcg = mem_cgroup_begin_page_stat(page);
+	memcg = lock_page_memcg(page);
 	if (mapping) {
 		struct inode *inode = mapping->host;
 		struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -2755,7 +2755,7 @@ int test_clear_page_writeback(struct page *page)
 		dec_zone_page_state(page, NR_WRITEBACK);
 		inc_zone_page_state(page, NR_WRITTEN);
 	}
-	mem_cgroup_end_page_stat(memcg);
+	unlock_page_memcg(memcg);
 	return ret;
 }
 
@@ -2765,7 +2765,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 	struct mem_cgroup *memcg;
 	int ret;
 
-	memcg = mem_cgroup_begin_page_stat(page);
+	memcg = lock_page_memcg(page);
 	if (mapping) {
 		struct inode *inode = mapping->host;
 		struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -2796,7 +2796,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 		mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
 		inc_zone_page_state(page, NR_WRITEBACK);
 	}
-	mem_cgroup_end_page_stat(memcg);
+	unlock_page_memcg(memcg);
 	return ret;
 
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 79f3bf047f38..2871e7d3cced 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1289,19 +1289,19 @@ void page_add_file_rmap(struct page *page)
 {
 	struct mem_cgroup *memcg;
 
-	memcg = mem_cgroup_begin_page_stat(page);
+	memcg = lock_page_memcg(page);
 	if (atomic_inc_and_test(&page->_mapcount)) {
 		__inc_zone_page_state(page, NR_FILE_MAPPED);
 		mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
 	}
-	mem_cgroup_end_page_stat(memcg);
+	unlock_page_memcg(memcg);
 }
 
 static void page_remove_file_rmap(struct page *page)
 {
 	struct mem_cgroup *memcg;
 
-	memcg = mem_cgroup_begin_page_stat(page);
+	memcg = lock_page_memcg(page);
 
 	/* Hugepages are not counted in NR_FILE_MAPPED for now. */
 	if (unlikely(PageHuge(page))) {
@@ -1325,7 +1325,7 @@ static void page_remove_file_rmap(struct page *page)
 	if (unlikely(PageMlocked(page)))
 		clear_page_mlock(page);
 out:
-	mem_cgroup_end_page_stat(memcg);
+	unlock_page_memcg(memcg);
 }
 
 static void page_remove_anon_compound_rmap(struct page *page)
diff --git a/mm/truncate.c b/mm/truncate.c
index e3ee0e27cd17..51a24f6a555d 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -528,7 +528,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
 		return 0;
 
-	memcg = mem_cgroup_begin_page_stat(page);
+	memcg = lock_page_memcg(page);
 	spin_lock_irqsave(&mapping->tree_lock, flags);
 	if (PageDirty(page))
 		goto failed;
@@ -536,7 +536,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	BUG_ON(page_has_private(page));
 	__delete_from_page_cache(page, NULL, memcg);
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
-	mem_cgroup_end_page_stat(memcg);
+	unlock_page_memcg(memcg);
 
 	if (mapping->a_ops->freepage)
 		mapping->a_ops->freepage(page);
@@ -545,7 +545,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	return 1;
 failed:
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
-	mem_cgroup_end_page_stat(memcg);
+	unlock_page_memcg(memcg);
 	return 0;
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 039f08d369a5..08547a7136d3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -608,7 +608,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 	BUG_ON(!PageLocked(page));
 	BUG_ON(mapping != page_mapping(page));
 
-	memcg = mem_cgroup_begin_page_stat(page);
+	memcg = lock_page_memcg(page);
 	spin_lock_irqsave(&mapping->tree_lock, flags);
 	/*
 	 * The non racy check for a busy page.
@@ -648,7 +648,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 		mem_cgroup_swapout(page, swap);
 		__delete_from_swap_cache(page);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-		mem_cgroup_end_page_stat(memcg);
+		unlock_page_memcg(memcg);
 		swapcache_free(swap);
 	} else {
 		void (*freepage)(struct page *);
@@ -676,7 +676,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 			shadow = workingset_eviction(mapping, page);
 		__delete_from_page_cache(page, shadow, memcg);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-		mem_cgroup_end_page_stat(memcg);
+		unlock_page_memcg(memcg);
 
 		if (freepage != NULL)
 			freepage(page);
@@ -686,7 +686,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 
 cannot_free:
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
-	mem_cgroup_end_page_stat(memcg);
+	unlock_page_memcg(memcg);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 23047a96d7cfcfca1a6d026ecaec526ea4803e9e Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 15 Mar 2016 14:57:16 -0700
Subject: mm: workingset: per-cgroup cache thrash detection

Cache thrash detection (see a528910e12ec "mm: thrash detection-based
file cache sizing" for details) currently only works on the system
level, not inside cgroups.  Worse, as the refaults are compared to the
global number of active cache, cgroups might wrongfully get all their
refaults activated when their pages are hotter than those of others.

Move the refault machinery from the zone to the lruvec, and then tag
eviction entries with the memcg ID.  This makes the thrash detection
work correctly inside cgroups.

[sergey.senozhatsky@gmail.com: do not return from workingset_activation() with locked rcu and page]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 56 ++++++++++++++++++++++++++++----
 include/linux/mmzone.h     | 13 ++++----
 mm/memcontrol.c            | 25 ---------------
 mm/vmscan.c                | 18 +++++------
 mm/workingset.c            | 79 ++++++++++++++++++++++++++++++++++++++++------
 5 files changed, 134 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8502fd4144eb..09b449849369 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -89,6 +89,10 @@ enum mem_cgroup_events_target {
 };
 
 #ifdef CONFIG_MEMCG
+
+#define MEM_CGROUP_ID_SHIFT	16
+#define MEM_CGROUP_ID_MAX	USHRT_MAX
+
 struct mem_cgroup_stat_cpu {
 	long count[MEMCG_NR_STAT];
 	unsigned long events[MEMCG_NR_EVENTS];
@@ -265,6 +269,11 @@ struct mem_cgroup {
 
 extern struct mem_cgroup *root_mem_cgroup;
 
+static inline bool mem_cgroup_disabled(void)
+{
+	return !cgroup_subsys_enabled(memory_cgrp_subsys);
+}
+
 /**
  * mem_cgroup_events - count memory events against a cgroup
  * @memcg: the memory cgroup
@@ -312,6 +321,28 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
 				   struct mem_cgroup_reclaim_cookie *);
 void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
 
+static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
+{
+	if (mem_cgroup_disabled())
+		return 0;
+
+	return memcg->css.id;
+}
+
+/**
+ * mem_cgroup_from_id - look up a memcg from an id
+ * @id: the id to look up
+ *
+ * Caller must hold rcu_read_lock() and use css_tryget() as necessary.
+ */
+static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
+{
+	struct cgroup_subsys_state *css;
+
+	css = css_from_id(id, &memory_cgrp_subsys);
+	return mem_cgroup_from_css(css);
+}
+
 /**
  * parent_mem_cgroup - find the accounting parent of a memcg
  * @memcg: memcg whose parent to find
@@ -353,11 +384,6 @@ static inline bool mm_match_cgroup(struct mm_struct *mm,
 struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page);
 ino_t page_cgroup_ino(struct page *page);
 
-static inline bool mem_cgroup_disabled(void)
-{
-	return !cgroup_subsys_enabled(memory_cgrp_subsys);
-}
-
 static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
 {
 	if (mem_cgroup_disabled())
@@ -502,8 +528,17 @@ void mem_cgroup_split_huge_fixup(struct page *head);
 #endif
 
 #else /* CONFIG_MEMCG */
+
+#define MEM_CGROUP_ID_SHIFT	0
+#define MEM_CGROUP_ID_MAX	0
+
 struct mem_cgroup;
 
+static inline bool mem_cgroup_disabled(void)
+{
+	return true;
+}
+
 static inline void mem_cgroup_events(struct mem_cgroup *memcg,
 				     enum mem_cgroup_events_index idx,
 				     unsigned int nr)
@@ -586,9 +621,16 @@ static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
 {
 }
 
-static inline bool mem_cgroup_disabled(void)
+static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
-	return true;
+	return 0;
+}
+
+static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
+{
+	WARN_ON_ONCE(id);
+	/* XXX: This should always return root_mem_cgroup */
+	return NULL;
 }
 
 static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9fc23ab550a7..03cbdd906f55 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -212,10 +212,12 @@ struct zone_reclaim_stat {
 };
 
 struct lruvec {
-	struct list_head lists[NR_LRU_LISTS];
-	struct zone_reclaim_stat reclaim_stat;
+	struct list_head		lists[NR_LRU_LISTS];
+	struct zone_reclaim_stat	reclaim_stat;
+	/* Evictions & activations on the inactive file list */
+	atomic_long_t			inactive_age;
 #ifdef CONFIG_MEMCG
-	struct zone *zone;
+	struct zone			*zone;
 #endif
 };
 
@@ -490,9 +492,6 @@ struct zone {
 	spinlock_t		lru_lock;
 	struct lruvec		lruvec;
 
-	/* Evictions & activations on the inactive file list */
-	atomic_long_t		inactive_age;
-
 	/*
 	 * When free pages are below this point, additional steps are taken
 	 * when reading the number of free pages to avoid per-cpu counter
@@ -761,6 +760,8 @@ static inline struct zone *lruvec_zone(struct lruvec *lruvec)
 #endif
 }
 
+extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru);
+
 #ifdef CONFIG_HAVE_MEMORY_PRESENT
 void memory_present(int nid, unsigned long start, unsigned long end);
 #else
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 953f0f984392..864e237f32d9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -268,31 +268,6 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 	return (memcg == root_mem_cgroup);
 }
 
-/*
- * We restrict the id in the range of [1, 65535], so it can fit into
- * an unsigned short.
- */
-#define MEM_CGROUP_ID_MAX	USHRT_MAX
-
-static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
-{
-	return memcg->css.id;
-}
-
-/*
- * A helper function to get mem_cgroup from ID. must be called under
- * rcu_read_lock().  The caller is responsible for calling
- * css_tryget_online() if the mem_cgroup is used for charging. (dropping
- * refcnt from swap can be called against removed memcg.)
- */
-static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
-{
-	struct cgroup_subsys_state *css;
-
-	css = css_from_id(id, &memory_cgrp_subsys);
-	return mem_cgroup_from_css(css);
-}
-
 #ifndef CONFIG_SLOB
 /*
  * This will be the memcg's index in each cache's ->memcg_params.memcg_caches.
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 08547a7136d3..fd434cc89bea 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -213,7 +213,7 @@ bool zone_reclaimable(struct zone *zone)
 		zone_reclaimable_pages(zone) * 6;
 }
 
-static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
+unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru)
 {
 	if (!mem_cgroup_disabled())
 		return mem_cgroup_get_lru_size(lruvec, lru);
@@ -1923,8 +1923,8 @@ static bool inactive_file_is_low(struct lruvec *lruvec)
 	unsigned long inactive;
 	unsigned long active;
 
-	inactive = get_lru_size(lruvec, LRU_INACTIVE_FILE);
-	active = get_lru_size(lruvec, LRU_ACTIVE_FILE);
+	inactive = lruvec_lru_size(lruvec, LRU_INACTIVE_FILE);
+	active = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
 
 	return active > inactive;
 }
@@ -2063,7 +2063,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 	 * system is under heavy pressure.
 	 */
 	if (!inactive_file_is_low(lruvec) &&
-	    get_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) {
+	    lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) {
 		scan_balance = SCAN_FILE;
 		goto out;
 	}
@@ -2089,10 +2089,10 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 	 * anon in [0], file in [1]
 	 */
 
-	anon  = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
-		get_lru_size(lruvec, LRU_INACTIVE_ANON);
-	file  = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
-		get_lru_size(lruvec, LRU_INACTIVE_FILE);
+	anon  = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON) +
+		lruvec_lru_size(lruvec, LRU_INACTIVE_ANON);
+	file  = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) +
+		lruvec_lru_size(lruvec, LRU_INACTIVE_FILE);
 
 	spin_lock_irq(&zone->lru_lock);
 	if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
@@ -2130,7 +2130,7 @@ out:
 			unsigned long size;
 			unsigned long scan;
 
-			size = get_lru_size(lruvec, lru);
+			size = lruvec_lru_size(lruvec, lru);
 			scan = size >> sc->priority;
 
 			if (!scan && pass && force_scan)
diff --git a/mm/workingset.c b/mm/workingset.c
index 9a26a60368d2..14bc23a7779b 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -153,7 +153,8 @@
  */
 
 #define EVICTION_SHIFT	(RADIX_TREE_EXCEPTIONAL_ENTRY + \
-			 ZONES_SHIFT + NODES_SHIFT)
+			 ZONES_SHIFT + NODES_SHIFT +	\
+			 MEM_CGROUP_ID_SHIFT)
 #define EVICTION_MASK	(~0UL >> EVICTION_SHIFT)
 
 /*
@@ -166,9 +167,10 @@
  */
 static unsigned int bucket_order __read_mostly;
 
-static void *pack_shadow(unsigned long eviction, struct zone *zone)
+static void *pack_shadow(int memcgid, struct zone *zone, unsigned long eviction)
 {
 	eviction >>= bucket_order;
+	eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
 	eviction = (eviction << NODES_SHIFT) | zone_to_nid(zone);
 	eviction = (eviction << ZONES_SHIFT) | zone_idx(zone);
 	eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT);
@@ -176,18 +178,21 @@ static void *pack_shadow(unsigned long eviction, struct zone *zone)
 	return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY);
 }
 
-static void unpack_shadow(void *shadow, struct zone **zonep,
+static void unpack_shadow(void *shadow, int *memcgidp, struct zone **zonep,
 			  unsigned long *evictionp)
 {
 	unsigned long entry = (unsigned long)shadow;
-	int zid, nid;
+	int memcgid, nid, zid;
 
 	entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT;
 	zid = entry & ((1UL << ZONES_SHIFT) - 1);
 	entry >>= ZONES_SHIFT;
 	nid = entry & ((1UL << NODES_SHIFT) - 1);
 	entry >>= NODES_SHIFT;
+	memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
+	entry >>= MEM_CGROUP_ID_SHIFT;
 
+	*memcgidp = memcgid;
 	*zonep = NODE_DATA(nid)->node_zones + zid;
 	*evictionp = entry << bucket_order;
 }
@@ -202,11 +207,20 @@ static void unpack_shadow(void *shadow, struct zone **zonep,
  */
 void *workingset_eviction(struct address_space *mapping, struct page *page)
 {
+	struct mem_cgroup *memcg = page_memcg(page);
 	struct zone *zone = page_zone(page);
+	int memcgid = mem_cgroup_id(memcg);
 	unsigned long eviction;
+	struct lruvec *lruvec;
 
-	eviction = atomic_long_inc_return(&zone->inactive_age);
-	return pack_shadow(eviction, zone);
+	/* Page is fully exclusive and pins page->mem_cgroup */
+	VM_BUG_ON_PAGE(PageLRU(page), page);
+	VM_BUG_ON_PAGE(page_count(page), page);
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+
+	lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+	eviction = atomic_long_inc_return(&lruvec->inactive_age);
+	return pack_shadow(memcgid, zone, eviction);
 }
 
 /**
@@ -221,13 +235,42 @@ void *workingset_eviction(struct address_space *mapping, struct page *page)
 bool workingset_refault(void *shadow)
 {
 	unsigned long refault_distance;
+	unsigned long active_file;
+	struct mem_cgroup *memcg;
 	unsigned long eviction;
+	struct lruvec *lruvec;
 	unsigned long refault;
 	struct zone *zone;
+	int memcgid;
 
-	unpack_shadow(shadow, &zone, &eviction);
+	unpack_shadow(shadow, &memcgid, &zone, &eviction);
 
-	refault = atomic_long_read(&zone->inactive_age);
+	rcu_read_lock();
+	/*
+	 * Look up the memcg associated with the stored ID. It might
+	 * have been deleted since the page's eviction.
+	 *
+	 * Note that in rare events the ID could have been recycled
+	 * for a new cgroup that refaults a shared page. This is
+	 * impossible to tell from the available data. However, this
+	 * should be a rare and limited disturbance, and activations
+	 * are always speculative anyway. Ultimately, it's the aging
+	 * algorithm's job to shake out the minimum access frequency
+	 * for the active cache.
+	 *
+	 * XXX: On !CONFIG_MEMCG, this will always return NULL; it
+	 * would be better if the root_mem_cgroup existed in all
+	 * configurations instead.
+	 */
+	memcg = mem_cgroup_from_id(memcgid);
+	if (!mem_cgroup_disabled() && !memcg) {
+		rcu_read_unlock();
+		return false;
+	}
+	lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+	refault = atomic_long_read(&lruvec->inactive_age);
+	active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
+	rcu_read_unlock();
 
 	/*
 	 * The unsigned subtraction here gives an accurate distance
@@ -249,7 +292,7 @@ bool workingset_refault(void *shadow)
 
 	inc_zone_state(zone, WORKINGSET_REFAULT);
 
-	if (refault_distance <= zone_page_state(zone, NR_ACTIVE_FILE)) {
+	if (refault_distance <= active_file) {
 		inc_zone_state(zone, WORKINGSET_ACTIVATE);
 		return true;
 	}
@@ -262,7 +305,23 @@ bool workingset_refault(void *shadow)
  */
 void workingset_activation(struct page *page)
 {
-	atomic_long_inc(&page_zone(page)->inactive_age);
+	struct mem_cgroup *memcg;
+	struct lruvec *lruvec;
+
+	memcg = lock_page_memcg(page);
+	/*
+	 * Filter non-memcg pages here, e.g. unmap can call
+	 * mark_page_accessed() on VDSO pages.
+	 *
+	 * XXX: See workingset_refault() - this should return
+	 * root_mem_cgroup even for !CONFIG_MEMCG.
+	 */
+	if (!mem_cgroup_disabled() && !memcg)
+		goto out;
+	lruvec = mem_cgroup_zone_lruvec(page_zone(page), memcg);
+	atomic_long_inc(&lruvec->inactive_age);
+out:
+	unlock_page_memcg(memcg);
 }
 
 /*
-- 
cgit v1.2.3


From 6a93ca8fde3cfce0f00f02281139a377c83e8d8c Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 15 Mar 2016 14:57:19 -0700
Subject: mm: migrate: do not touch page->mem_cgroup of live pages

Changing a page's memcg association complicates dealing with the page,
so we want to limit this as much as possible.  Page migration e.g.  does
not have to do that.  Just like page cache replacement, it can forcibly
charge a replacement page, and then uncharge the old page when it gets
freed.  Temporarily overcharging the cgroup by a single page is not an
issue in practice, and charging is so cheap nowadays that this is much
preferrable to the headache of messing with live pages.

The only place that still changes the page->mem_cgroup binding of live
pages is when pages move along with a task to another cgroup.  But that
path isolates the page from the LRU, takes the page lock, and the move
lock (lock_page_memcg()).  That means page->mem_cgroup is always stable
in callers that have the page isolated from the LRU or locked.  Lighter
unlocked paths, like writeback accounting, can use lock_page_memcg().

[akpm@linux-foundation.org: fix build]
[vdavydov@virtuozzo.com: fix lockdep splat]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Greg Thelen <gthelen@google.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  4 ++--
 include/linux/mm.h         |  9 ---------
 mm/filemap.c               |  2 +-
 mm/memcontrol.c            | 13 +++++++------
 mm/migrate.c               | 15 +++++++++------
 mm/shmem.c                 |  2 +-
 6 files changed, 20 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 09b449849369..c45ab3fb6e04 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -300,7 +300,7 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
 void mem_cgroup_uncharge(struct page *page);
 void mem_cgroup_uncharge_list(struct list_head *page_list);
 
-void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage);
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
 
 struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
 struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
@@ -580,7 +580,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
 {
 }
 
-static inline void mem_cgroup_replace_page(struct page *old, struct page *new)
+static inline void mem_cgroup_migrate(struct page *old, struct page *new)
 {
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b97243d6aa49..6b471d1fc8df 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -905,20 +905,11 @@ static inline struct mem_cgroup *page_memcg(struct page *page)
 {
 	return page->mem_cgroup;
 }
-
-static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg)
-{
-	page->mem_cgroup = memcg;
-}
 #else
 static inline struct mem_cgroup *page_memcg(struct page *page)
 {
 	return NULL;
 }
-
-static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg)
-{
-}
 #endif
 
 /*
diff --git a/mm/filemap.c b/mm/filemap.c
index ee8140cf935d..d8317caffe85 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -577,7 +577,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 			__inc_zone_page_state(new, NR_SHMEM);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 		unlock_page_memcg(memcg);
-		mem_cgroup_replace_page(old, new);
+		mem_cgroup_migrate(old, new);
 		radix_tree_preload_end();
 		if (freepage)
 			freepage(old);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 864e237f32d9..64506b2eef34 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4457,7 +4457,7 @@ static int mem_cgroup_move_account(struct page *page,
 	VM_BUG_ON(compound && !PageTransHuge(page));
 
 	/*
-	 * Prevent mem_cgroup_replace_page() from looking at
+	 * Prevent mem_cgroup_migrate() from looking at
 	 * page->mem_cgroup of its source page while we change it.
 	 */
 	ret = -EBUSY;
@@ -5486,16 +5486,17 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
 }
 
 /**
- * mem_cgroup_replace_page - migrate a charge to another page
- * @oldpage: currently charged page
- * @newpage: page to transfer the charge to
+ * mem_cgroup_migrate - charge a page's replacement
+ * @oldpage: currently circulating page
+ * @newpage: replacement page
  *
- * Migrate the charge from @oldpage to @newpage.
+ * Charge @newpage as a replacement page for @oldpage. @oldpage will
+ * be uncharged upon free.
  *
  * Both pages must be locked, @newpage->mapping must be set up.
  * Either or both pages might be on the LRU already.
  */
-void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
 {
 	struct mem_cgroup *memcg;
 	unsigned int nr_pages;
diff --git a/mm/migrate.c b/mm/migrate.c
index 432ecd0172cd..848327d4a7ed 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -326,12 +326,13 @@ int migrate_page_move_mapping(struct address_space *mapping,
 			return -EAGAIN;
 
 		/* No turning back from here */
-		set_page_memcg(newpage, page_memcg(page));
 		newpage->index = page->index;
 		newpage->mapping = page->mapping;
 		if (PageSwapBacked(page))
 			SetPageSwapBacked(newpage);
 
+		mem_cgroup_migrate(page, newpage);
+
 		return MIGRATEPAGE_SUCCESS;
 	}
 
@@ -373,7 +374,6 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	 * Now we know that no one else is looking at the page:
 	 * no turning back from here.
 	 */
-	set_page_memcg(newpage, page_memcg(page));
 	newpage->index = page->index;
 	newpage->mapping = page->mapping;
 	if (PageSwapBacked(page))
@@ -428,6 +428,8 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	}
 	local_irq_enable();
 
+	mem_cgroup_migrate(page, newpage);
+
 	return MIGRATEPAGE_SUCCESS;
 }
 
@@ -458,9 +460,9 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 		return -EAGAIN;
 	}
 
-	set_page_memcg(newpage, page_memcg(page));
 	newpage->index = page->index;
 	newpage->mapping = page->mapping;
+
 	get_page(newpage);
 
 	radix_tree_replace_slot(pslot, newpage);
@@ -468,6 +470,9 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 	page_unfreeze_refs(page, expected_count - 1);
 
 	spin_unlock_irq(&mapping->tree_lock);
+
+	mem_cgroup_migrate(page, newpage);
+
 	return MIGRATEPAGE_SUCCESS;
 }
 
@@ -775,7 +780,6 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 	 * page is freed; but stats require that PageAnon be left as PageAnon.
 	 */
 	if (rc == MIGRATEPAGE_SUCCESS) {
-		set_page_memcg(page, NULL);
 		if (!PageAnon(page))
 			page->mapping = NULL;
 	}
@@ -1842,8 +1846,7 @@ fail_putback:
 	}
 
 	mlock_migrate_page(new_page, page);
-	set_page_memcg(new_page, page_memcg(page));
-	set_page_memcg(page, NULL);
+	mem_cgroup_migrate(page, new_page);
 	page_remove_rmap(page, true);
 	set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 440e2a7e6c1c..1acfdbc4bd9e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1116,7 +1116,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
 		 */
 		oldpage = newpage;
 	} else {
-		mem_cgroup_replace_page(oldpage, newpage);
+		mem_cgroup_migrate(oldpage, newpage);
 		lru_cache_add_anon(newpage);
 		*pagep = newpage;
 	}
-- 
cgit v1.2.3


From 62cccb8c8e7a3ca233f49d5e7dcb1557d25465cd Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 15 Mar 2016 14:57:22 -0700
Subject: mm: simplify lock_page_memcg()

Now that migration doesn't clear page->mem_cgroup of live pages anymore,
it's safe to make lock_page_memcg() and the memcg stat functions take
pages, and spare the callers from memcg objects.

[akpm@linux-foundation.org: fix warnings]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Suggested-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c                | 18 ++++++++---------
 fs/xfs/xfs_aops.c          |  7 +++----
 include/linux/memcontrol.h | 35 ++++++++++++++++-----------------
 include/linux/mm.h         |  5 ++---
 include/linux/pagemap.h    |  3 +--
 mm/filemap.c               | 20 ++++++++-----------
 mm/memcontrol.c            | 23 +++++++++-------------
 mm/page-writeback.c        | 49 ++++++++++++++++++++--------------------------
 mm/rmap.c                  | 16 ++++++---------
 mm/truncate.c              |  9 ++++-----
 mm/vmscan.c                | 11 +++++------
 mm/workingset.c            |  9 ++++-----
 12 files changed, 88 insertions(+), 117 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index dc991510bb06..33be29675358 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -624,14 +624,14 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
  * The caller must hold lock_page_memcg().
  */
 static void __set_page_dirty(struct page *page, struct address_space *mapping,
-			     struct mem_cgroup *memcg, int warn)
+			     int warn)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&mapping->tree_lock, flags);
 	if (page->mapping) {	/* Race with truncate? */
 		WARN_ON_ONCE(warn && !PageUptodate(page));
-		account_page_dirtied(page, mapping, memcg);
+		account_page_dirtied(page, mapping);
 		radix_tree_tag_set(&mapping->page_tree,
 				page_index(page), PAGECACHE_TAG_DIRTY);
 	}
@@ -666,7 +666,6 @@ static void __set_page_dirty(struct page *page, struct address_space *mapping,
 int __set_page_dirty_buffers(struct page *page)
 {
 	int newly_dirty;
-	struct mem_cgroup *memcg;
 	struct address_space *mapping = page_mapping(page);
 
 	if (unlikely(!mapping))
@@ -686,14 +685,14 @@ int __set_page_dirty_buffers(struct page *page)
 	 * Lock out page->mem_cgroup migration to keep PageDirty
 	 * synchronized with per-memcg dirty page counters.
 	 */
-	memcg = lock_page_memcg(page);
+	lock_page_memcg(page);
 	newly_dirty = !TestSetPageDirty(page);
 	spin_unlock(&mapping->private_lock);
 
 	if (newly_dirty)
-		__set_page_dirty(page, mapping, memcg, 1);
+		__set_page_dirty(page, mapping, 1);
 
-	unlock_page_memcg(memcg);
+	unlock_page_memcg(page);
 
 	if (newly_dirty)
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -1167,15 +1166,14 @@ void mark_buffer_dirty(struct buffer_head *bh)
 	if (!test_set_buffer_dirty(bh)) {
 		struct page *page = bh->b_page;
 		struct address_space *mapping = NULL;
-		struct mem_cgroup *memcg;
 
-		memcg = lock_page_memcg(page);
+		lock_page_memcg(page);
 		if (!TestSetPageDirty(page)) {
 			mapping = page_mapping(page);
 			if (mapping)
-				__set_page_dirty(page, mapping, memcg, 0);
+				__set_page_dirty(page, mapping, 0);
 		}
-		unlock_page_memcg(memcg);
+		unlock_page_memcg(page);
 		if (mapping)
 			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 	}
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 5f85ebc52a98..5c57b7b40728 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1957,7 +1957,6 @@ xfs_vm_set_page_dirty(
 	loff_t			end_offset;
 	loff_t			offset;
 	int			newly_dirty;
-	struct mem_cgroup	*memcg;
 
 	if (unlikely(!mapping))
 		return !TestSetPageDirty(page);
@@ -1981,7 +1980,7 @@ xfs_vm_set_page_dirty(
 	 * Lock out page->mem_cgroup migration to keep PageDirty
 	 * synchronized with per-memcg dirty page counters.
 	 */
-	memcg = lock_page_memcg(page);
+	lock_page_memcg(page);
 	newly_dirty = !TestSetPageDirty(page);
 	spin_unlock(&mapping->private_lock);
 
@@ -1992,13 +1991,13 @@ xfs_vm_set_page_dirty(
 		spin_lock_irqsave(&mapping->tree_lock, flags);
 		if (page->mapping) {	/* Race with truncate? */
 			WARN_ON_ONCE(!PageUptodate(page));
-			account_page_dirtied(page, mapping, memcg);
+			account_page_dirtied(page, mapping);
 			radix_tree_tag_set(&mapping->page_tree,
 					page_index(page), PAGECACHE_TAG_DIRTY);
 		}
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 	}
-	unlock_page_memcg(memcg);
+	unlock_page_memcg(page);
 	if (newly_dirty)
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 	return newly_dirty;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c45ab3fb6e04..d560c9a3cadf 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -455,42 +455,42 @@ bool mem_cgroup_oom_synchronize(bool wait);
 extern int do_swap_account;
 #endif
 
-struct mem_cgroup *lock_page_memcg(struct page *page);
-void unlock_page_memcg(struct mem_cgroup *memcg);
+void lock_page_memcg(struct page *page);
+void unlock_page_memcg(struct page *page);
 
 /**
  * mem_cgroup_update_page_stat - update page state statistics
- * @memcg: memcg to account against
+ * @page: the page
  * @idx: page state item to account
  * @val: number of pages (positive or negative)
  *
  * Callers must use lock_page_memcg() to prevent double accounting
  * when the page is concurrently being moved to another memcg:
  *
- *   memcg = lock_page_memcg(page);
+ *   lock_page_memcg(page);
  *   if (TestClearPageState(page))
- *     mem_cgroup_update_page_stat(memcg, state, -1);
- *   unlock_page_memcg(memcg);
+ *     mem_cgroup_update_page_stat(page, state, -1);
+ *   unlock_page_memcg(page);
  */
-static inline void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
+static inline void mem_cgroup_update_page_stat(struct page *page,
 				 enum mem_cgroup_stat_index idx, int val)
 {
 	VM_BUG_ON(!rcu_read_lock_held());
 
-	if (memcg)
-		this_cpu_add(memcg->stat->count[idx], val);
+	if (page->mem_cgroup)
+		this_cpu_add(page->mem_cgroup->stat->count[idx], val);
 }
 
-static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
+static inline void mem_cgroup_inc_page_stat(struct page *page,
 					    enum mem_cgroup_stat_index idx)
 {
-	mem_cgroup_update_page_stat(memcg, idx, 1);
+	mem_cgroup_update_page_stat(page, idx, 1);
 }
 
-static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg,
+static inline void mem_cgroup_dec_page_stat(struct page *page,
 					    enum mem_cgroup_stat_index idx)
 {
-	mem_cgroup_update_page_stat(memcg, idx, -1);
+	mem_cgroup_update_page_stat(page, idx, -1);
 }
 
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
@@ -661,12 +661,11 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 {
 }
 
-static inline struct mem_cgroup *lock_page_memcg(struct page *page)
+static inline void lock_page_memcg(struct page *page)
 {
-	return NULL;
 }
 
-static inline void unlock_page_memcg(struct mem_cgroup *memcg)
+static inline void unlock_page_memcg(struct page *page)
 {
 }
 
@@ -692,12 +691,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
 	return false;
 }
 
-static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
+static inline void mem_cgroup_inc_page_stat(struct page *page,
 					    enum mem_cgroup_stat_index idx)
 {
 }
 
-static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg,
+static inline void mem_cgroup_dec_page_stat(struct page *page,
 					    enum mem_cgroup_stat_index idx)
 {
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6b471d1fc8df..a862b4f0ac24 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1291,10 +1291,9 @@ int __set_page_dirty_nobuffers(struct page *page);
 int __set_page_dirty_no_writeback(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
-void account_page_dirtied(struct page *page, struct address_space *mapping,
-			  struct mem_cgroup *memcg);
+void account_page_dirtied(struct page *page, struct address_space *mapping);
 void account_page_cleaned(struct page *page, struct address_space *mapping,
-			  struct mem_cgroup *memcg, struct bdi_writeback *wb);
+			  struct bdi_writeback *wb);
 int set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 void cancel_dirty_page(struct page *page);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 92395a0a7dc5..183b15ea052b 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -663,8 +663,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 				pgoff_t index, gfp_t gfp_mask);
 extern void delete_from_page_cache(struct page *page);
-extern void __delete_from_page_cache(struct page *page, void *shadow,
-				     struct mem_cgroup *memcg);
+extern void __delete_from_page_cache(struct page *page, void *shadow);
 int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
 
 /*
diff --git a/mm/filemap.c b/mm/filemap.c
index d8317caffe85..8e629c4ef0c8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -179,8 +179,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
  * is safe.  The caller must hold the mapping's tree_lock and
  * lock_page_memcg().
  */
-void __delete_from_page_cache(struct page *page, void *shadow,
-			      struct mem_cgroup *memcg)
+void __delete_from_page_cache(struct page *page, void *shadow)
 {
 	struct address_space *mapping = page->mapping;
 
@@ -239,8 +238,7 @@ void __delete_from_page_cache(struct page *page, void *shadow,
 	 * anyway will be cleared before returning page into buddy allocator.
 	 */
 	if (WARN_ON_ONCE(PageDirty(page)))
-		account_page_cleaned(page, mapping, memcg,
-				     inode_to_wb(mapping->host));
+		account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
 }
 
 /**
@@ -254,7 +252,6 @@ void __delete_from_page_cache(struct page *page, void *shadow,
 void delete_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
-	struct mem_cgroup *memcg;
 	unsigned long flags;
 
 	void (*freepage)(struct page *);
@@ -263,11 +260,11 @@ void delete_from_page_cache(struct page *page)
 
 	freepage = mapping->a_ops->freepage;
 
-	memcg = lock_page_memcg(page);
+	lock_page_memcg(page);
 	spin_lock_irqsave(&mapping->tree_lock, flags);
-	__delete_from_page_cache(page, NULL, memcg);
+	__delete_from_page_cache(page, NULL);
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
-	unlock_page_memcg(memcg);
+	unlock_page_memcg(page);
 
 	if (freepage)
 		freepage(page);
@@ -551,7 +548,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 	if (!error) {
 		struct address_space *mapping = old->mapping;
 		void (*freepage)(struct page *);
-		struct mem_cgroup *memcg;
 		unsigned long flags;
 
 		pgoff_t offset = old->index;
@@ -561,9 +557,9 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		new->mapping = mapping;
 		new->index = offset;
 
-		memcg = lock_page_memcg(old);
+		lock_page_memcg(old);
 		spin_lock_irqsave(&mapping->tree_lock, flags);
-		__delete_from_page_cache(old, NULL, memcg);
+		__delete_from_page_cache(old, NULL);
 		error = radix_tree_insert(&mapping->page_tree, offset, new);
 		BUG_ON(error);
 		mapping->nrpages++;
@@ -576,7 +572,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		if (PageSwapBacked(new))
 			__inc_zone_page_state(new, NR_SHMEM);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-		unlock_page_memcg(memcg);
+		unlock_page_memcg(old);
 		mem_cgroup_migrate(old, new);
 		radix_tree_preload_end();
 		if (freepage)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 64506b2eef34..3e4199830456 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1690,7 +1690,7 @@ cleanup:
  * This function protects unlocked LRU pages from being moved to
  * another cgroup and stabilizes their page->mem_cgroup binding.
  */
-struct mem_cgroup *lock_page_memcg(struct page *page)
+void lock_page_memcg(struct page *page)
 {
 	struct mem_cgroup *memcg;
 	unsigned long flags;
@@ -1699,25 +1699,18 @@ struct mem_cgroup *lock_page_memcg(struct page *page)
 	 * The RCU lock is held throughout the transaction.  The fast
 	 * path can get away without acquiring the memcg->move_lock
 	 * because page moving starts with an RCU grace period.
-	 *
-	 * The RCU lock also protects the memcg from being freed when
-	 * the page state that is going to change is the only thing
-	 * preventing the page from being uncharged.
-	 * E.g. end-writeback clearing PageWriteback(), which allows
-	 * migration to go ahead and uncharge the page before the
-	 * account transaction might be complete.
 	 */
 	rcu_read_lock();
 
 	if (mem_cgroup_disabled())
-		return NULL;
+		return;
 again:
 	memcg = page->mem_cgroup;
 	if (unlikely(!memcg))
-		return NULL;
+		return;
 
 	if (atomic_read(&memcg->moving_account) <= 0)
-		return memcg;
+		return;
 
 	spin_lock_irqsave(&memcg->move_lock, flags);
 	if (memcg != page->mem_cgroup) {
@@ -1733,16 +1726,18 @@ again:
 	memcg->move_lock_task = current;
 	memcg->move_lock_flags = flags;
 
-	return memcg;
+	return;
 }
 EXPORT_SYMBOL(lock_page_memcg);
 
 /**
  * unlock_page_memcg - unlock a page->mem_cgroup binding
- * @memcg: the memcg returned by lock_page_memcg()
+ * @page: the page
  */
-void unlock_page_memcg(struct mem_cgroup *memcg)
+void unlock_page_memcg(struct page *page)
 {
+	struct mem_cgroup *memcg = page->mem_cgroup;
+
 	if (memcg && memcg->move_lock_task == current) {
 		unsigned long flags = memcg->move_lock_flags;
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2b5ea1271e32..d7cf2c53d125 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2414,8 +2414,7 @@ int __set_page_dirty_no_writeback(struct page *page)
  *
  * NOTE: This relies on being atomic wrt interrupts.
  */
-void account_page_dirtied(struct page *page, struct address_space *mapping,
-			  struct mem_cgroup *memcg)
+void account_page_dirtied(struct page *page, struct address_space *mapping)
 {
 	struct inode *inode = mapping->host;
 
@@ -2427,7 +2426,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping,
 		inode_attach_wb(inode, page);
 		wb = inode_to_wb(inode);
 
-		mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
+		mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY);
 		__inc_zone_page_state(page, NR_FILE_DIRTY);
 		__inc_zone_page_state(page, NR_DIRTIED);
 		__inc_wb_stat(wb, WB_RECLAIMABLE);
@@ -2445,10 +2444,10 @@ EXPORT_SYMBOL(account_page_dirtied);
  * Caller must hold lock_page_memcg().
  */
 void account_page_cleaned(struct page *page, struct address_space *mapping,
-			  struct mem_cgroup *memcg, struct bdi_writeback *wb)
+			  struct bdi_writeback *wb)
 {
 	if (mapping_cap_account_dirty(mapping)) {
-		mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
+		mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
 		dec_zone_page_state(page, NR_FILE_DIRTY);
 		dec_wb_stat(wb, WB_RECLAIMABLE);
 		task_io_account_cancelled_write(PAGE_CACHE_SIZE);
@@ -2469,26 +2468,24 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
  */
 int __set_page_dirty_nobuffers(struct page *page)
 {
-	struct mem_cgroup *memcg;
-
-	memcg = lock_page_memcg(page);
+	lock_page_memcg(page);
 	if (!TestSetPageDirty(page)) {
 		struct address_space *mapping = page_mapping(page);
 		unsigned long flags;
 
 		if (!mapping) {
-			unlock_page_memcg(memcg);
+			unlock_page_memcg(page);
 			return 1;
 		}
 
 		spin_lock_irqsave(&mapping->tree_lock, flags);
 		BUG_ON(page_mapping(page) != mapping);
 		WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
-		account_page_dirtied(page, mapping, memcg);
+		account_page_dirtied(page, mapping);
 		radix_tree_tag_set(&mapping->page_tree, page_index(page),
 				   PAGECACHE_TAG_DIRTY);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-		unlock_page_memcg(memcg);
+		unlock_page_memcg(page);
 
 		if (mapping->host) {
 			/* !PageAnon && !swapper_space */
@@ -2496,7 +2493,7 @@ int __set_page_dirty_nobuffers(struct page *page)
 		}
 		return 1;
 	}
-	unlock_page_memcg(memcg);
+	unlock_page_memcg(page);
 	return 0;
 }
 EXPORT_SYMBOL(__set_page_dirty_nobuffers);
@@ -2626,17 +2623,16 @@ void cancel_dirty_page(struct page *page)
 	if (mapping_cap_account_dirty(mapping)) {
 		struct inode *inode = mapping->host;
 		struct bdi_writeback *wb;
-		struct mem_cgroup *memcg;
 		bool locked;
 
-		memcg = lock_page_memcg(page);
+		lock_page_memcg(page);
 		wb = unlocked_inode_to_wb_begin(inode, &locked);
 
 		if (TestClearPageDirty(page))
-			account_page_cleaned(page, mapping, memcg, wb);
+			account_page_cleaned(page, mapping, wb);
 
 		unlocked_inode_to_wb_end(inode, locked);
-		unlock_page_memcg(memcg);
+		unlock_page_memcg(page);
 	} else {
 		ClearPageDirty(page);
 	}
@@ -2667,7 +2663,6 @@ int clear_page_dirty_for_io(struct page *page)
 	if (mapping && mapping_cap_account_dirty(mapping)) {
 		struct inode *inode = mapping->host;
 		struct bdi_writeback *wb;
-		struct mem_cgroup *memcg;
 		bool locked;
 
 		/*
@@ -2705,16 +2700,16 @@ int clear_page_dirty_for_io(struct page *page)
 		 * always locked coming in here, so we get the desired
 		 * exclusion.
 		 */
-		memcg = lock_page_memcg(page);
+		lock_page_memcg(page);
 		wb = unlocked_inode_to_wb_begin(inode, &locked);
 		if (TestClearPageDirty(page)) {
-			mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
+			mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
 			dec_zone_page_state(page, NR_FILE_DIRTY);
 			dec_wb_stat(wb, WB_RECLAIMABLE);
 			ret = 1;
 		}
 		unlocked_inode_to_wb_end(inode, locked);
-		unlock_page_memcg(memcg);
+		unlock_page_memcg(page);
 		return ret;
 	}
 	return TestClearPageDirty(page);
@@ -2724,10 +2719,9 @@ EXPORT_SYMBOL(clear_page_dirty_for_io);
 int test_clear_page_writeback(struct page *page)
 {
 	struct address_space *mapping = page_mapping(page);
-	struct mem_cgroup *memcg;
 	int ret;
 
-	memcg = lock_page_memcg(page);
+	lock_page_memcg(page);
 	if (mapping) {
 		struct inode *inode = mapping->host;
 		struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -2751,21 +2745,20 @@ int test_clear_page_writeback(struct page *page)
 		ret = TestClearPageWriteback(page);
 	}
 	if (ret) {
-		mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
+		mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
 		dec_zone_page_state(page, NR_WRITEBACK);
 		inc_zone_page_state(page, NR_WRITTEN);
 	}
-	unlock_page_memcg(memcg);
+	unlock_page_memcg(page);
 	return ret;
 }
 
 int __test_set_page_writeback(struct page *page, bool keep_write)
 {
 	struct address_space *mapping = page_mapping(page);
-	struct mem_cgroup *memcg;
 	int ret;
 
-	memcg = lock_page_memcg(page);
+	lock_page_memcg(page);
 	if (mapping) {
 		struct inode *inode = mapping->host;
 		struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -2793,10 +2786,10 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 		ret = TestSetPageWriteback(page);
 	}
 	if (!ret) {
-		mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
+		mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
 		inc_zone_page_state(page, NR_WRITEBACK);
 	}
-	unlock_page_memcg(memcg);
+	unlock_page_memcg(page);
 	return ret;
 
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 2871e7d3cced..02f0bfc3c80a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1287,21 +1287,17 @@ void page_add_new_anon_rmap(struct page *page,
  */
 void page_add_file_rmap(struct page *page)
 {
-	struct mem_cgroup *memcg;
-
-	memcg = lock_page_memcg(page);
+	lock_page_memcg(page);
 	if (atomic_inc_and_test(&page->_mapcount)) {
 		__inc_zone_page_state(page, NR_FILE_MAPPED);
-		mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
+		mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
 	}
-	unlock_page_memcg(memcg);
+	unlock_page_memcg(page);
 }
 
 static void page_remove_file_rmap(struct page *page)
 {
-	struct mem_cgroup *memcg;
-
-	memcg = lock_page_memcg(page);
+	lock_page_memcg(page);
 
 	/* Hugepages are not counted in NR_FILE_MAPPED for now. */
 	if (unlikely(PageHuge(page))) {
@@ -1320,12 +1316,12 @@ static void page_remove_file_rmap(struct page *page)
 	 * pte lock(a spinlock) is held, which implies preemption disabled.
 	 */
 	__dec_zone_page_state(page, NR_FILE_MAPPED);
-	mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
+	mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
 
 	if (unlikely(PageMlocked(page)))
 		clear_page_mlock(page);
 out:
-	unlock_page_memcg(memcg);
+	unlock_page_memcg(page);
 }
 
 static void page_remove_anon_compound_rmap(struct page *page)
diff --git a/mm/truncate.c b/mm/truncate.c
index 51a24f6a555d..87311af936f2 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -519,7 +519,6 @@ EXPORT_SYMBOL(invalidate_mapping_pages);
 static int
 invalidate_complete_page2(struct address_space *mapping, struct page *page)
 {
-	struct mem_cgroup *memcg;
 	unsigned long flags;
 
 	if (page->mapping != mapping)
@@ -528,15 +527,15 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
 		return 0;
 
-	memcg = lock_page_memcg(page);
+	lock_page_memcg(page);
 	spin_lock_irqsave(&mapping->tree_lock, flags);
 	if (PageDirty(page))
 		goto failed;
 
 	BUG_ON(page_has_private(page));
-	__delete_from_page_cache(page, NULL, memcg);
+	__delete_from_page_cache(page, NULL);
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
-	unlock_page_memcg(memcg);
+	unlock_page_memcg(page);
 
 	if (mapping->a_ops->freepage)
 		mapping->a_ops->freepage(page);
@@ -545,7 +544,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	return 1;
 failed:
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
-	unlock_page_memcg(memcg);
+	unlock_page_memcg(page);
 	return 0;
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index fd434cc89bea..34f7e2dae0a0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -603,12 +603,11 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 			    bool reclaimed)
 {
 	unsigned long flags;
-	struct mem_cgroup *memcg;
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(mapping != page_mapping(page));
 
-	memcg = lock_page_memcg(page);
+	lock_page_memcg(page);
 	spin_lock_irqsave(&mapping->tree_lock, flags);
 	/*
 	 * The non racy check for a busy page.
@@ -648,7 +647,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 		mem_cgroup_swapout(page, swap);
 		__delete_from_swap_cache(page);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-		unlock_page_memcg(memcg);
+		unlock_page_memcg(page);
 		swapcache_free(swap);
 	} else {
 		void (*freepage)(struct page *);
@@ -674,9 +673,9 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 		if (reclaimed && page_is_file_cache(page) &&
 		    !mapping_exiting(mapping) && !dax_mapping(mapping))
 			shadow = workingset_eviction(mapping, page);
-		__delete_from_page_cache(page, shadow, memcg);
+		__delete_from_page_cache(page, shadow);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-		unlock_page_memcg(memcg);
+		unlock_page_memcg(page);
 
 		if (freepage != NULL)
 			freepage(page);
@@ -686,7 +685,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 
 cannot_free:
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
-	unlock_page_memcg(memcg);
+	unlock_page_memcg(page);
 	return 0;
 }
 
diff --git a/mm/workingset.c b/mm/workingset.c
index 14bc23a7779b..6130ba0b2641 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -305,10 +305,9 @@ bool workingset_refault(void *shadow)
  */
 void workingset_activation(struct page *page)
 {
-	struct mem_cgroup *memcg;
 	struct lruvec *lruvec;
 
-	memcg = lock_page_memcg(page);
+	lock_page_memcg(page);
 	/*
 	 * Filter non-memcg pages here, e.g. unmap can call
 	 * mark_page_accessed() on VDSO pages.
@@ -316,12 +315,12 @@ void workingset_activation(struct page *page)
 	 * XXX: See workingset_refault() - this should return
 	 * root_mem_cgroup even for !CONFIG_MEMCG.
 	 */
-	if (!mem_cgroup_disabled() && !memcg)
+	if (!mem_cgroup_disabled() && !page_memcg(page))
 		goto out;
-	lruvec = mem_cgroup_zone_lruvec(page_zone(page), memcg);
+	lruvec = mem_cgroup_zone_lruvec(page_zone(page), page_memcg(page));
 	atomic_long_inc(&lruvec->inactive_age);
 out:
-	unlock_page_memcg(memcg);
+	unlock_page_memcg(page);
 }
 
 /*
-- 
cgit v1.2.3


From fdf1cdb91b6ab7a8a91df68c384f36b8a0909cab Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 15 Mar 2016 14:57:25 -0700
Subject: mm: remove unnecessary uses of lock_page_memcg()

There are several users that nest lock_page_memcg() inside lock_page()
to prevent page->mem_cgroup from changing.  But the page lock prevents
pages from moving between cgroups, so that is unnecessary overhead.

Remove lock_page_memcg() in contexts with locked contexts and fix the
debug code in the page stat functions to be okay with the page lock.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 12 +++++++-----
 mm/filemap.c               |  7 +------
 mm/page-writeback.c        |  2 --
 mm/truncate.c              |  3 ---
 mm/vmscan.c                |  4 ----
 5 files changed, 8 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d560c9a3cadf..f0c4bec6565b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -28,6 +28,7 @@
 #include <linux/eventfd.h>
 #include <linux/mmzone.h>
 #include <linux/writeback.h>
+#include <linux/page-flags.h>
 
 struct mem_cgroup;
 struct page;
@@ -464,18 +465,19 @@ void unlock_page_memcg(struct page *page);
  * @idx: page state item to account
  * @val: number of pages (positive or negative)
  *
- * Callers must use lock_page_memcg() to prevent double accounting
- * when the page is concurrently being moved to another memcg:
+ * The @page must be locked or the caller must use lock_page_memcg()
+ * to prevent double accounting when the page is concurrently being
+ * moved to another memcg:
  *
- *   lock_page_memcg(page);
+ *   lock_page(page) or lock_page_memcg(page)
  *   if (TestClearPageState(page))
  *     mem_cgroup_update_page_stat(page, state, -1);
- *   unlock_page_memcg(page);
+ *   unlock_page(page) or unlock_page_memcg(page)
  */
 static inline void mem_cgroup_update_page_stat(struct page *page,
 				 enum mem_cgroup_stat_index idx, int val)
 {
-	VM_BUG_ON(!rcu_read_lock_held());
+	VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page)));
 
 	if (page->mem_cgroup)
 		this_cpu_add(page->mem_cgroup->stat->count[idx], val);
diff --git a/mm/filemap.c b/mm/filemap.c
index 8e629c4ef0c8..61b441b191ad 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -176,8 +176,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
 /*
  * Delete a page from the page cache and free it. Caller has to make
  * sure the page is locked and that nobody else uses it - or that usage
- * is safe.  The caller must hold the mapping's tree_lock and
- * lock_page_memcg().
+ * is safe.  The caller must hold the mapping's tree_lock.
  */
 void __delete_from_page_cache(struct page *page, void *shadow)
 {
@@ -260,11 +259,9 @@ void delete_from_page_cache(struct page *page)
 
 	freepage = mapping->a_ops->freepage;
 
-	lock_page_memcg(page);
 	spin_lock_irqsave(&mapping->tree_lock, flags);
 	__delete_from_page_cache(page, NULL);
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
-	unlock_page_memcg(page);
 
 	if (freepage)
 		freepage(page);
@@ -557,7 +554,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		new->mapping = mapping;
 		new->index = offset;
 
-		lock_page_memcg(old);
 		spin_lock_irqsave(&mapping->tree_lock, flags);
 		__delete_from_page_cache(old, NULL);
 		error = radix_tree_insert(&mapping->page_tree, offset, new);
@@ -572,7 +568,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		if (PageSwapBacked(new))
 			__inc_zone_page_state(new, NR_SHMEM);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-		unlock_page_memcg(old);
 		mem_cgroup_migrate(old, new);
 		radix_tree_preload_end();
 		if (freepage)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d7cf2c53d125..11ff8f758631 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2700,7 +2700,6 @@ int clear_page_dirty_for_io(struct page *page)
 		 * always locked coming in here, so we get the desired
 		 * exclusion.
 		 */
-		lock_page_memcg(page);
 		wb = unlocked_inode_to_wb_begin(inode, &locked);
 		if (TestClearPageDirty(page)) {
 			mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
@@ -2709,7 +2708,6 @@ int clear_page_dirty_for_io(struct page *page)
 			ret = 1;
 		}
 		unlocked_inode_to_wb_end(inode, locked);
-		unlock_page_memcg(page);
 		return ret;
 	}
 	return TestClearPageDirty(page);
diff --git a/mm/truncate.c b/mm/truncate.c
index 87311af936f2..7598b552ae03 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -527,7 +527,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
 		return 0;
 
-	lock_page_memcg(page);
 	spin_lock_irqsave(&mapping->tree_lock, flags);
 	if (PageDirty(page))
 		goto failed;
@@ -535,7 +534,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	BUG_ON(page_has_private(page));
 	__delete_from_page_cache(page, NULL);
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
-	unlock_page_memcg(page);
 
 	if (mapping->a_ops->freepage)
 		mapping->a_ops->freepage(page);
@@ -544,7 +542,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	return 1;
 failed:
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
-	unlock_page_memcg(page);
 	return 0;
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 34f7e2dae0a0..dd984470248f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -607,7 +607,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 	BUG_ON(!PageLocked(page));
 	BUG_ON(mapping != page_mapping(page));
 
-	lock_page_memcg(page);
 	spin_lock_irqsave(&mapping->tree_lock, flags);
 	/*
 	 * The non racy check for a busy page.
@@ -647,7 +646,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 		mem_cgroup_swapout(page, swap);
 		__delete_from_swap_cache(page);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-		unlock_page_memcg(page);
 		swapcache_free(swap);
 	} else {
 		void (*freepage)(struct page *);
@@ -675,7 +673,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 			shadow = workingset_eviction(mapping, page);
 		__delete_from_page_cache(page, shadow);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-		unlock_page_memcg(page);
 
 		if (freepage != NULL)
 			freepage(page);
@@ -685,7 +682,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 
 cannot_free:
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
-	unlock_page_memcg(page);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 7cf91a98e607c2f935dbcc177d70011e95b8faff Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Tue, 15 Mar 2016 14:57:51 -0700
Subject: mm/compaction: speed up pageblock_pfn_to_page() when zone is
 contiguous

There is a performance drop report due to hugepage allocation and in
there half of cpu time are spent on pageblock_pfn_to_page() in
compaction [1].

In that workload, compaction is triggered to make hugepage but most of
pageblocks are un-available for compaction due to pageblock type and
skip bit so compaction usually fails.  Most costly operations in this
case is to find valid pageblock while scanning whole zone range.  To
check if pageblock is valid to compact, valid pfn within pageblock is
required and we can obtain it by calling pageblock_pfn_to_page().  This
function checks whether pageblock is in a single zone and return valid
pfn if possible.  Problem is that we need to check it every time before
scanning pageblock even if we re-visit it and this turns out to be very
expensive in this workload.

Although we have no way to skip this pageblock check in the system where
hole exists at arbitrary position, we can use cached value for zone
continuity and just do pfn_to_page() in the system where hole doesn't
exist.  This optimization considerably speeds up in above workload.

Before vs After
  Max: 1096 MB/s vs 1325 MB/s
  Min: 635 MB/s 1015 MB/s
  Avg: 899 MB/s 1194 MB/s

Avg is improved by roughly 30% [2].

[1]: http://www.spinics.net/lists/linux-mm/msg97378.html
[2]: https://lkml.org/lkml/2015/12/9/23

[akpm@linux-foundation.org: don't forget to restore zone->contiguous on error path, per Vlastimil]
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Reported-by: Aaron Lu <aaron.lu@intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Tested-by: Aaron Lu <aaron.lu@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h            |  6 ----
 include/linux/memory_hotplug.h |  3 ++
 include/linux/mmzone.h         |  2 ++
 mm/compaction.c                | 43 -----------------------
 mm/internal.h                  | 12 +++++++
 mm/memory_hotplug.c            | 13 +++++--
 mm/page_alloc.c                | 78 +++++++++++++++++++++++++++++++++++++++++-
 7 files changed, 105 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 06546b36eb6a..bb16dfeb917e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -519,13 +519,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
 void drain_all_pages(struct zone *zone);
 void drain_local_pages(struct zone *zone);
 
-#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 void page_alloc_init_late(void);
-#else
-static inline void page_alloc_init_late(void)
-{
-}
-#endif
 
 /*
  * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 769d76870550..adbef586e696 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -198,6 +198,9 @@ void put_online_mems(void);
 void mem_hotplug_begin(void);
 void mem_hotplug_done(void);
 
+extern void set_zone_contiguous(struct zone *zone);
+extern void clear_zone_contiguous(struct zone *zone);
+
 #else /* ! CONFIG_MEMORY_HOTPLUG */
 /*
  * Stub functions for when hotplug is off
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 03cbdd906f55..6de02ac378a0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -522,6 +522,8 @@ struct zone {
 	bool			compact_blockskip_flush;
 #endif
 
+	bool			contiguous;
+
 	ZONE_PADDING(_pad3_)
 	/* Zone statistics */
 	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
diff --git a/mm/compaction.c b/mm/compaction.c
index 8ce36ebc8d15..93f71d968098 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -71,49 +71,6 @@ static inline bool migrate_async_suitable(int migratetype)
 	return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
 }
 
-/*
- * Check that the whole (or subset of) a pageblock given by the interval of
- * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
- * with the migration of free compaction scanner. The scanners then need to
- * use only pfn_valid_within() check for arches that allow holes within
- * pageblocks.
- *
- * Return struct page pointer of start_pfn, or NULL if checks were not passed.
- *
- * It's possible on some configurations to have a setup like node0 node1 node0
- * i.e. it's possible that all pages within a zones range of pages do not
- * belong to a single zone. We assume that a border between node0 and node1
- * can occur within a single pageblock, but not a node0 node1 node0
- * interleaving within a single pageblock. It is therefore sufficient to check
- * the first and last page of a pageblock and avoid checking each individual
- * page in a pageblock.
- */
-static struct page *pageblock_pfn_to_page(unsigned long start_pfn,
-				unsigned long end_pfn, struct zone *zone)
-{
-	struct page *start_page;
-	struct page *end_page;
-
-	/* end_pfn is one past the range we are checking */
-	end_pfn--;
-
-	if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
-		return NULL;
-
-	start_page = pfn_to_page(start_pfn);
-
-	if (page_zone(start_page) != zone)
-		return NULL;
-
-	end_page = pfn_to_page(end_pfn);
-
-	/* This gives a shorter code than deriving page_zone(end_page) */
-	if (page_zone_id(start_page) != page_zone_id(end_page))
-		return NULL;
-
-	return start_page;
-}
-
 #ifdef CONFIG_COMPACTION
 
 /* Do not skip compaction more than 64 times */
diff --git a/mm/internal.h b/mm/internal.h
index 6636e1d3ecf0..ad9400d759c8 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -132,6 +132,18 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
 	return page_idx ^ (1 << order);
 }
 
+extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
+				unsigned long end_pfn, struct zone *zone);
+
+static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
+				unsigned long end_pfn, struct zone *zone)
+{
+	if (zone->contiguous)
+		return pfn_to_page(start_pfn);
+
+	return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
+}
+
 extern int __isolate_free_page(struct page *page, unsigned int order);
 extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
 					unsigned int order);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 484e86761b3e..24ea06393816 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -512,6 +512,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
 	int start_sec, end_sec;
 	struct vmem_altmap *altmap;
 
+	clear_zone_contiguous(zone);
+
 	/* during initialize mem_map, align hot-added range to section */
 	start_sec = pfn_to_section_nr(phys_start_pfn);
 	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
@@ -524,7 +526,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
 		if (altmap->base_pfn != phys_start_pfn
 				|| vmem_altmap_offset(altmap) > nr_pages) {
 			pr_warn_once("memory add fail, invalid altmap\n");
-			return -EINVAL;
+			err = -EINVAL;
+			goto out;
 		}
 		altmap->alloc = 0;
 	}
@@ -542,7 +545,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
 		err = 0;
 	}
 	vmemmap_populate_print_last();
-
+out:
+	set_zone_contiguous(zone);
 	return err;
 }
 EXPORT_SYMBOL_GPL(__add_pages);
@@ -814,6 +818,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 		}
 	}
 
+	clear_zone_contiguous(zone);
+
 	/*
 	 * We can only remove entire sections
 	 */
@@ -829,6 +835,9 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 		if (ret)
 			break;
 	}
+
+	set_zone_contiguous(zone);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(__remove_pages);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 50897dcaefdb..c46b75d14b6f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1128,6 +1128,75 @@ void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
 	return __free_pages_boot_core(page, pfn, order);
 }
 
+/*
+ * Check that the whole (or subset of) a pageblock given by the interval of
+ * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
+ * with the migration of free compaction scanner. The scanners then need to
+ * use only pfn_valid_within() check for arches that allow holes within
+ * pageblocks.
+ *
+ * Return struct page pointer of start_pfn, or NULL if checks were not passed.
+ *
+ * It's possible on some configurations to have a setup like node0 node1 node0
+ * i.e. it's possible that all pages within a zones range of pages do not
+ * belong to a single zone. We assume that a border between node0 and node1
+ * can occur within a single pageblock, but not a node0 node1 node0
+ * interleaving within a single pageblock. It is therefore sufficient to check
+ * the first and last page of a pageblock and avoid checking each individual
+ * page in a pageblock.
+ */
+struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
+				     unsigned long end_pfn, struct zone *zone)
+{
+	struct page *start_page;
+	struct page *end_page;
+
+	/* end_pfn is one past the range we are checking */
+	end_pfn--;
+
+	if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
+		return NULL;
+
+	start_page = pfn_to_page(start_pfn);
+
+	if (page_zone(start_page) != zone)
+		return NULL;
+
+	end_page = pfn_to_page(end_pfn);
+
+	/* This gives a shorter code than deriving page_zone(end_page) */
+	if (page_zone_id(start_page) != page_zone_id(end_page))
+		return NULL;
+
+	return start_page;
+}
+
+void set_zone_contiguous(struct zone *zone)
+{
+	unsigned long block_start_pfn = zone->zone_start_pfn;
+	unsigned long block_end_pfn;
+
+	block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages);
+	for (; block_start_pfn < zone_end_pfn(zone);
+			block_start_pfn = block_end_pfn,
+			 block_end_pfn += pageblock_nr_pages) {
+
+		block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
+
+		if (!__pageblock_pfn_to_page(block_start_pfn,
+					     block_end_pfn, zone))
+			return;
+	}
+
+	/* We confirm that there is no hole */
+	zone->contiguous = true;
+}
+
+void clear_zone_contiguous(struct zone *zone)
+{
+	zone->contiguous = false;
+}
+
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 static void __init deferred_free_range(struct page *page,
 					unsigned long pfn, int nr_pages)
@@ -1278,9 +1347,13 @@ free_range:
 	pgdat_init_report_one_done();
 	return 0;
 }
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 
 void __init page_alloc_init_late(void)
 {
+	struct zone *zone;
+
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 	int nid;
 
 	/* There will be num_node_state(N_MEMORY) threads */
@@ -1294,8 +1367,11 @@ void __init page_alloc_init_late(void)
 
 	/* Reinit limits that are based on free pages after the kernel is up */
 	files_maxfiles_init();
+#endif
+
+	for_each_populated_zone(zone)
+		set_zone_contiguous(zone);
 }
-#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 
 #ifdef CONFIG_CMA
 /* Free whole pageblock and set its migration type to MIGRATE_CMA. */
-- 
cgit v1.2.3


From e9a7c2f1a548f34bcaa7640094201e8b29247940 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 15 Mar 2016 14:58:25 -0700
Subject: autofs4: coding style fixes

Try and make the coding style completely consistent throughtout the
autofs module and inline with kernel coding style recommendations.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/autofs_i.h          | 55 ++++++++++++------------
 fs/autofs4/dev-ioctl.c         | 28 ++++++++-----
 fs/autofs4/expire.c            | 60 ++++++++++++++------------
 fs/autofs4/init.c              | 10 ++---
 fs/autofs4/inode.c             | 25 ++++++-----
 fs/autofs4/root.c              | 95 +++++++++++++++++++++++-------------------
 fs/autofs4/symlink.c           | 11 ++---
 fs/autofs4/waitq.c             | 49 ++++++++++++----------
 include/linux/auto_dev-ioctl.h |  1 -
 include/linux/auto_fs.h        | 10 ++---
 include/uapi/linux/auto_fs.h   | 19 ++++-----
 include/uapi/linux/auto_fs4.h  | 15 ++-----
 12 files changed, 190 insertions(+), 188 deletions(-)

(limited to 'include/linux')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c37149b929be..e50cfae487b2 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -1,15 +1,11 @@
-/* -*- c -*- ------------------------------------------------------------- *
- *   
- * linux/fs/autofs/autofs_i.h
- *
- *   Copyright 1997-1998 Transmeta Corporation - All Rights Reserved
- *   Copyright 2005-2006 Ian Kent <raven@themaw.net>
+/*
+ *  Copyright 1997-1998 Transmeta Corporation - All Rights Reserved
+ *  Copyright 2005-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
  * option, any later version, incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
+ */
 
 /* Internal header file for autofs */
 
@@ -35,7 +31,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <asm/current.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* #define DEBUG */
 
@@ -51,12 +47,14 @@
 	printk(KERN_ERR "pid %d: %s: " fmt "\n",	\
 		current->pid, __func__, ##__VA_ARGS__)
 
-/* Unified info structure.  This is pointed to by both the dentry and
-   inode structures.  Each file in the filesystem has an instance of this
-   structure.  It holds a reference to the dentry, so dentries are never
-   flushed while the file exists.  All name lookups are dealt with at the
-   dentry level, although the filesystem can interfere in the validation
-   process.  Readdir is implemented by traversing the dentry lists. */
+/*
+ * Unified info structure.  This is pointed to by both the dentry and
+ * inode structures.  Each file in the filesystem has an instance of this
+ * structure.  It holds a reference to the dentry, so dentries are never
+ * flushed while the file exists.  All name lookups are dealt with at the
+ * dentry level, although the filesystem can interfere in the validation
+ * process.  Readdir is implemented by traversing the dentry lists.
+ */
 struct autofs_info {
 	struct dentry	*dentry;
 	struct inode	*inode;
@@ -78,7 +76,7 @@ struct autofs_info {
 	kgid_t gid;
 };
 
-#define AUTOFS_INF_EXPIRING	(1<<0) /* dentry is in the process of expiring */
+#define AUTOFS_INF_EXPIRING	(1<<0) /* dentry in the process of expiring */
 #define AUTOFS_INF_NO_RCU	(1<<1) /* the dentry is being considered
 					* for expiry, so RCU_walk is
 					* not permitted
@@ -140,10 +138,11 @@ static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry)
 }
 
 /* autofs4_oz_mode(): do we see the man behind the curtain?  (The
-   processes which do manipulations for us in user space sees the raw
-   filesystem without "magic".) */
-
-static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
+ * processes which do manipulations for us in user space sees the raw
+ * filesystem without "magic".)
+ */
+static inline int autofs4_oz_mode(struct autofs_sb_info *sbi)
+{
 	return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
 }
 
@@ -154,12 +153,12 @@ void autofs4_free_ino(struct autofs_info *);
 int is_autofs4_dentry(struct dentry *);
 int autofs4_expire_wait(struct dentry *dentry, int rcu_walk);
 int autofs4_expire_run(struct super_block *, struct vfsmount *,
-			struct autofs_sb_info *,
-			struct autofs_packet_expire __user *);
+		       struct autofs_sb_info *,
+		       struct autofs_packet_expire __user *);
 int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 			    struct autofs_sb_info *sbi, int when);
 int autofs4_expire_multi(struct super_block *, struct vfsmount *,
-			struct autofs_sb_info *, int __user *);
+			 struct autofs_sb_info *, int __user *);
 struct dentry *autofs4_expire_direct(struct super_block *sb,
 				     struct vfsmount *mnt,
 				     struct autofs_sb_info *sbi, int how);
@@ -224,8 +223,8 @@ static inline int autofs_prepare_pipe(struct file *pipe)
 
 /* Queue management functions */
 
-int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
-int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
+int autofs4_wait(struct autofs_sb_info *, struct dentry *, enum autofs_notify);
+int autofs4_wait_release(struct autofs_sb_info *, autofs_wqt_t, int);
 void autofs4_catatonic_mode(struct autofs_sb_info *);
 
 static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi)
@@ -242,37 +241,37 @@ static inline void __autofs4_add_expiring(struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+
 	if (ino) {
 		if (list_empty(&ino->expiring))
 			list_add(&ino->expiring, &sbi->expiring_list);
 	}
-	return;
 }
 
 static inline void autofs4_add_expiring(struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+
 	if (ino) {
 		spin_lock(&sbi->lookup_lock);
 		if (list_empty(&ino->expiring))
 			list_add(&ino->expiring, &sbi->expiring_list);
 		spin_unlock(&sbi->lookup_lock);
 	}
-	return;
 }
 
 static inline void autofs4_del_expiring(struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+
 	if (ino) {
 		spin_lock(&sbi->lookup_lock);
 		if (!list_empty(&ino->expiring))
 			list_del_init(&ino->expiring);
 		spin_unlock(&sbi->lookup_lock);
 	}
-	return;
 }
 
 extern void autofs4_kill_sb(struct super_block *);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index ac7d921ed984..c64b9fa839c5 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -72,8 +72,8 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
 {
 	int err = 0;
 
-	if ((AUTOFS_DEV_IOCTL_VERSION_MAJOR != param->ver_major) ||
-	    (AUTOFS_DEV_IOCTL_VERSION_MINOR < param->ver_minor)) {
+	if ((param->ver_major != AUTOFS_DEV_IOCTL_VERSION_MAJOR) ||
+	    (param->ver_minor > AUTOFS_DEV_IOCTL_VERSION_MINOR)) {
 		AUTOFS_WARN("ioctl control interface version mismatch: "
 		     "kernel(%u.%u), user(%u.%u), cmd(%d)",
 		     AUTOFS_DEV_IOCTL_VERSION_MAJOR,
@@ -93,7 +93,8 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
  * Copy parameter control struct, including a possible path allocated
  * at the end of the struct.
  */
-static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
+static struct autofs_dev_ioctl *
+		copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
 {
 	struct autofs_dev_ioctl tmp, *res;
 
@@ -116,7 +117,6 @@ static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *i
 static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
 {
 	kfree(param);
-	return;
 }
 
 /*
@@ -197,7 +197,9 @@ static int find_autofs_mount(const char *pathname,
 			     void *data)
 {
 	struct path path;
-	int err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0);
+	int err;
+
+	err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0);
 	if (err)
 		return err;
 	err = -ENOENT;
@@ -225,6 +227,7 @@ static int test_by_dev(struct path *path, void *p)
 static int test_by_type(struct path *path, void *p)
 {
 	struct autofs_info *ino = autofs4_dentry_ino(path->dentry);
+
 	return ino && ino->sbi->type & *(unsigned *)p;
 }
 
@@ -456,8 +459,10 @@ static int autofs_dev_ioctl_requester(struct file *fp,
 		err = 0;
 		autofs4_expire_wait(path.dentry, 0);
 		spin_lock(&sbi->fs_lock);
-		param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid);
-		param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid);
+		param->requester.uid =
+			from_kuid_munged(current_user_ns(), ino->uid);
+		param->requester.gid =
+			from_kgid_munged(current_user_ns(), ino->gid);
 		spin_unlock(&sbi->fs_lock);
 	}
 	path_put(&path);
@@ -619,7 +624,8 @@ static ioctl_fn lookup_dev_ioctl(unsigned int cmd)
 }
 
 /* ioctl dispatcher */
-static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __user *user)
+static int _autofs_dev_ioctl(unsigned int command,
+			     struct autofs_dev_ioctl __user *user)
 {
 	struct autofs_dev_ioctl *param;
 	struct file *fp;
@@ -711,6 +717,7 @@ out:
 static long autofs_dev_ioctl(struct file *file, uint command, ulong u)
 {
 	int err;
+
 	err = _autofs_dev_ioctl(command, (struct autofs_dev_ioctl __user *) u);
 	return (long) err;
 }
@@ -733,8 +740,8 @@ static const struct file_operations _dev_ioctl_fops = {
 
 static struct miscdevice _autofs_dev_ioctl_misc = {
 	.minor		= AUTOFS_MINOR,
-	.name  		= AUTOFS_DEVICE_NAME,
-	.fops  		= &_dev_ioctl_fops
+	.name		= AUTOFS_DEVICE_NAME,
+	.fops		= &_dev_ioctl_fops
 };
 
 MODULE_ALIAS_MISCDEV(AUTOFS_MINOR);
@@ -757,6 +764,5 @@ int __init autofs_dev_ioctl_init(void)
 void autofs_dev_ioctl_exit(void)
 {
 	misc_deregister(&_autofs_dev_ioctl_misc);
-	return;
 }
 
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 1cebc3c52fa5..8ba37c73b372 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -1,16 +1,12 @@
-/* -*- c -*- --------------------------------------------------------------- *
- *
- * linux/fs/autofs/expire.c
- *
- *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *  Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
- *  Copyright 2001-2006 Ian Kent <raven@themaw.net>
+/*
+ * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+ * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
+ * Copyright 2001-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
  * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
+ */
 
 #include "autofs_i.h"
 
@@ -18,7 +14,7 @@ static unsigned long now;
 
 /* Check if a dentry can be expired */
 static inline int autofs4_can_expire(struct dentry *dentry,
-					unsigned long timeout, int do_now)
+				     unsigned long timeout, int do_now)
 {
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 
@@ -58,7 +54,9 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 
 	/* Update the expiry counter if fs is busy */
 	if (!may_umount_tree(path.mnt)) {
-		struct autofs_info *ino = autofs4_dentry_ino(top);
+		struct autofs_info *ino;
+
+		ino = autofs4_dentry_ino(top);
 		ino->last_used = jiffies;
 		goto done;
 	}
@@ -74,7 +72,7 @@ done:
  * Calculate and dget next entry in the subdirs list under root.
  */
 static struct dentry *get_next_positive_subdir(struct dentry *prev,
-						struct dentry *root)
+					       struct dentry *root)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
 	struct list_head *next;
@@ -121,7 +119,7 @@ cont:
  * Calculate and dget next entry in top down tree traversal.
  */
 static struct dentry *get_next_positive_dentry(struct dentry *prev,
-						struct dentry *root)
+					       struct dentry *root)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
 	struct list_head *next;
@@ -187,15 +185,17 @@ again:
  * autofs submounts.
  */
 static int autofs4_direct_busy(struct vfsmount *mnt,
-				struct dentry *top,
-				unsigned long timeout,
-				int do_now)
+			       struct dentry *top,
+			       unsigned long timeout,
+			       int do_now)
 {
 	DPRINTK("top %p %pd", top, top);
 
 	/* If it's busy update the expiry counters */
 	if (!may_umount_tree(mnt)) {
-		struct autofs_info *ino = autofs4_dentry_ino(top);
+		struct autofs_info *ino;
+
+		ino = autofs4_dentry_ino(top);
 		if (ino)
 			ino->last_used = jiffies;
 		return 1;
@@ -208,7 +208,8 @@ static int autofs4_direct_busy(struct vfsmount *mnt,
 	return 0;
 }
 
-/* Check a directory tree of mount points for busyness
+/*
+ * Check a directory tree of mount points for busyness
  * The tree is not busy iff no mountpoints are busy
  */
 static int autofs4_tree_busy(struct vfsmount *mnt,
@@ -404,6 +405,7 @@ static struct dentry *should_expire(struct dentry *dentry,
 	} else {
 		/* Path walk currently on this dentry? */
 		struct dentry *expired;
+
 		ino_count = atomic_read(&ino->count) + 1;
 		if (d_count(dentry) > ino_count)
 			return NULL;
@@ -522,21 +524,22 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
 
 /* Perform an expiry operation */
 int autofs4_expire_run(struct super_block *sb,
-		      struct vfsmount *mnt,
-		      struct autofs_sb_info *sbi,
-		      struct autofs_packet_expire __user *pkt_p)
+		       struct vfsmount *mnt,
+		       struct autofs_sb_info *sbi,
+		       struct autofs_packet_expire __user *pkt_p)
 {
 	struct autofs_packet_expire pkt;
 	struct autofs_info *ino;
 	struct dentry *dentry;
 	int ret = 0;
 
-	memset(&pkt,0,sizeof pkt);
+	memset(&pkt, 0, sizeof(pkt));
 
 	pkt.hdr.proto_version = sbi->version;
 	pkt.hdr.type = autofs_ptype_expire;
 
-	if ((dentry = autofs4_expire_indirect(sb, mnt, sbi, 0)) == NULL)
+	dentry = autofs4_expire_indirect(sb, mnt, sbi, 0);
+	if (!dentry)
 		return -EAGAIN;
 
 	pkt.len = dentry->d_name.len;
@@ -544,7 +547,7 @@ int autofs4_expire_run(struct super_block *sb,
 	pkt.name[pkt.len] = '\0';
 	dput(dentry);
 
-	if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) )
+	if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
 		ret = -EFAULT;
 
 	spin_lock(&sbi->fs_lock);
@@ -573,7 +576,8 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 		struct autofs_info *ino = autofs4_dentry_ino(dentry);
 
 		/* This is synchronous because it makes the daemon a
-                   little easier */
+		 * little easier
+		 */
 		ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
 
 		spin_lock(&sbi->fs_lock);
@@ -588,8 +592,10 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 	return ret;
 }
 
-/* Call repeatedly until it returns -EAGAIN, meaning there's nothing
-   more to be done */
+/*
+ * Call repeatedly until it returns -EAGAIN, meaning there's nothing
+ * more to be done.
+ */
 int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 			struct autofs_sb_info *sbi, int __user *arg)
 {
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index b3db517e89ec..8cf0e63389ae 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -1,14 +1,10 @@
-/* -*- c -*- --------------------------------------------------------------- *
- *
- * linux/fs/autofs/init.c
- *
- *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+/*
+ * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
  * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
+ */
 
 #include <linux/module.h>
 #include <linux/init.h>
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 4320faa2d2dc..ad03705aac43 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -1,15 +1,11 @@
-/* -*- c -*- --------------------------------------------------------------- *
- *
- * linux/fs/autofs/inode.c
- *
- *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *  Copyright 2005-2006 Ian Kent <raven@themaw.net>
+/*
+ * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+ * Copyright 2005-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
  * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
+ */
 
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -24,7 +20,9 @@
 
 struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi)
 {
-	struct autofs_info *ino = kzalloc(sizeof(*ino), GFP_KERNEL);
+	struct autofs_info *ino;
+
+	ino = kzalloc(sizeof(*ino), GFP_KERNEL);
 	if (ino) {
 		INIT_LIST_HEAD(&ino->active);
 		INIT_LIST_HEAD(&ino->expiring);
@@ -152,6 +150,7 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
 
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token;
+
 		if (!*p)
 			continue;
 
@@ -209,9 +208,9 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
 
 int autofs4_fill_super(struct super_block *s, void *data, int silent)
 {
-	struct inode * root_inode;
-	struct dentry * root;
-	struct file * pipe;
+	struct inode *root_inode;
+	struct dentry *root;
+	struct file *pipe;
 	int pipefd;
 	struct autofs_sb_info *sbi;
 	struct autofs_info *ino;
@@ -222,7 +221,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
 	if (!sbi)
 		return -ENOMEM;
-	DPRINTK("starting up, sbi = %p",sbi);
+	DPRINTK("starting up, sbi = %p", sbi);
 
 	s->s_fs_info = sbi;
 	sbi->magic = AUTOFS_SBI_MAGIC;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c6d7d3dbd52a..bdeb8838a901 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -1,16 +1,12 @@
-/* -*- c -*- --------------------------------------------------------------- *
- *
- * linux/fs/autofs/root.c
- *
- *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *  Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
- *  Copyright 2001-2006 Ian Kent <raven@themaw.net>
+/*
+ * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+ * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
+ * Copyright 2001-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
  * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
+ */
 
 #include <linux/capability.h>
 #include <linux/errno.h>
@@ -23,16 +19,18 @@
 
 #include "autofs_i.h"
 
-static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
-static int autofs4_dir_unlink(struct inode *,struct dentry *);
-static int autofs4_dir_rmdir(struct inode *,struct dentry *);
-static int autofs4_dir_mkdir(struct inode *,struct dentry *,umode_t);
-static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
+static int autofs4_dir_symlink(struct inode *, struct dentry *, const char *);
+static int autofs4_dir_unlink(struct inode *, struct dentry *);
+static int autofs4_dir_rmdir(struct inode *, struct dentry *);
+static int autofs4_dir_mkdir(struct inode *, struct dentry *, umode_t);
+static long autofs4_root_ioctl(struct file *, unsigned int, unsigned long);
 #ifdef CONFIG_COMPAT
-static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
+static long autofs4_root_compat_ioctl(struct file *,
+				      unsigned int, unsigned long);
 #endif
 static int autofs4_dir_open(struct inode *inode, struct file *file);
-static struct dentry *autofs4_lookup(struct inode *,struct dentry *, unsigned int);
+static struct dentry *autofs4_lookup(struct inode *,
+				     struct dentry *, unsigned int);
 static struct vfsmount *autofs4_d_automount(struct path *);
 static int autofs4_d_manage(struct dentry *, bool);
 static void autofs4_dentry_release(struct dentry *);
@@ -74,7 +72,9 @@ const struct dentry_operations autofs4_dentry_operations = {
 static void autofs4_add_active(struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
-	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	struct autofs_info *ino;
+
+	ino = autofs4_dentry_ino(dentry);
 	if (ino) {
 		spin_lock(&sbi->lookup_lock);
 		if (!ino->active_count) {
@@ -84,13 +84,14 @@ static void autofs4_add_active(struct dentry *dentry)
 		ino->active_count++;
 		spin_unlock(&sbi->lookup_lock);
 	}
-	return;
 }
 
 static void autofs4_del_active(struct dentry *dentry)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
-	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+	struct autofs_info *ino;
+
+	ino = autofs4_dentry_ino(dentry);
 	if (ino) {
 		spin_lock(&sbi->lookup_lock);
 		ino->active_count--;
@@ -100,7 +101,6 @@ static void autofs4_del_active(struct dentry *dentry)
 		}
 		spin_unlock(&sbi->lookup_lock);
 	}
-	return;
 }
 
 static int autofs4_dir_open(struct inode *inode, struct file *file)
@@ -320,7 +320,9 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
 	if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
 		struct dentry *parent = dentry->d_parent;
 		struct autofs_info *ino;
-		struct dentry *new = d_lookup(parent, &dentry->d_name);
+		struct dentry *new;
+
+		new = d_lookup(parent, &dentry->d_name);
 		if (!new)
 			return NULL;
 		ino = autofs4_dentry_ino(new);
@@ -455,6 +457,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
 		 * a mount-trap.
 		 */
 		struct inode *inode;
+
 		if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU))
 			return 0;
 		if (d_mountpoint(dentry))
@@ -494,7 +497,8 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
 }
 
 /* Lookups in the root directory */
-static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+static struct dentry *autofs4_lookup(struct inode *dir,
+				     struct dentry *dentry, unsigned int flags)
 {
 	struct autofs_sb_info *sbi;
 	struct autofs_info *ino;
@@ -513,9 +517,9 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, u
 		autofs4_oz_mode(sbi));
 
 	active = autofs4_lookup_active(dentry);
-	if (active) {
+	if (active)
 		return active;
-	} else {
+	else {
 		/*
 		 * A dentry that is not within the root can never trigger a
 		 * mount operation, unless the directory already exists, so we
@@ -526,7 +530,8 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, u
 			return ERR_PTR(-ENOENT);
 
 		/* Mark entries in the root as mount triggers */
-		if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent))
+		if (IS_ROOT(dentry->d_parent) &&
+		    autofs_type_indirect(sbi->type))
 			__managed_dentry_set_managed(dentry);
 
 		ino = autofs4_new_ino(sbi);
@@ -664,7 +669,6 @@ static void autofs_set_leaf_automount_flags(struct dentry *dentry)
 	if (IS_ROOT(parent->d_parent))
 		return;
 	managed_dentry_clear_managed(parent);
-	return;
 }
 
 static void autofs_clear_leaf_automount_flags(struct dentry *dentry)
@@ -687,7 +691,6 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry)
 	if (d_child->next == &parent->d_subdirs &&
 	    d_child->prev == &parent->d_subdirs)
 		managed_dentry_set_managed(parent);
-	return;
 }
 
 static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
@@ -728,7 +731,8 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 	return 0;
 }
 
-static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int autofs4_dir_mkdir(struct inode *dir,
+			     struct dentry *dentry, umode_t mode)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
@@ -768,7 +772,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m
 /* Get/set timeout ioctl() operation */
 #ifdef CONFIG_COMPAT
 static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi,
-					 compat_ulong_t __user *p)
+						 compat_ulong_t __user *p)
 {
 	int rv;
 	unsigned long ntimeout;
@@ -787,7 +791,7 @@ static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi,
 #endif
 
 static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi,
-					 unsigned long __user *p)
+					  unsigned long __user *p)
 {
 	int rv;
 	unsigned long ntimeout;
@@ -805,13 +809,15 @@ static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi,
 }
 
 /* Return protocol version */
-static inline int autofs4_get_protover(struct autofs_sb_info *sbi, int __user *p)
+static inline int autofs4_get_protover(struct autofs_sb_info *sbi,
+				       int __user *p)
 {
 	return put_user(sbi->version, p);
 }
 
 /* Return protocol sub version */
-static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, int __user *p)
+static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi,
+					  int __user *p)
 {
 	return put_user(sbi->sub_version, p);
 }
@@ -834,9 +840,9 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
 }
 
 /* Identify autofs4_dentries - this is so we can tell if there's
-   an extra dentry refcount or not.  We only hold a refcount on the
-   dentry if its non-negative (ie, d_inode != NULL)
-*/
+ * an extra dentry refcount or not.  We only hold a refcount on the
+ * dentry if its non-negative (ie, d_inode != NULL)
+ */
 int is_autofs4_dentry(struct dentry *dentry)
 {
 	return dentry && d_really_is_positive(dentry) &&
@@ -855,7 +861,7 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
 	void __user *p = (void __user *)arg;
 
 	DPRINTK("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u",
-		cmd,arg,sbi,task_pgrp_nr(current));
+		cmd, arg, sbi, task_pgrp_nr(current));
 
 	if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
@@ -864,11 +870,11 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
 	if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	
-	switch(cmd) {
+	switch (cmd) {
 	case AUTOFS_IOC_READY:	/* Wait queue: go ahead and retry */
-		return autofs4_wait_release(sbi,(autofs_wqt_t)arg,0);
+		return autofs4_wait_release(sbi, (autofs_wqt_t) arg, 0);
 	case AUTOFS_IOC_FAIL:	/* Wait queue: fail with ENOENT */
-		return autofs4_wait_release(sbi,(autofs_wqt_t)arg,-ENOENT);
+		return autofs4_wait_release(sbi, (autofs_wqt_t) arg, -ENOENT);
 	case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */
 		autofs4_catatonic_mode(sbi);
 		return 0;
@@ -888,10 +894,12 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
 
 	/* return a single thing to expire */
 	case AUTOFS_IOC_EXPIRE:
-		return autofs4_expire_run(inode->i_sb,filp->f_path.mnt,sbi, p);
+		return autofs4_expire_run(inode->i_sb,
+					  filp->f_path.mnt, sbi, p);
 	/* same as above, but can send multiple expires through pipe */
 	case AUTOFS_IOC_EXPIRE_MULTI:
-		return autofs4_expire_multi(inode->i_sb,filp->f_path.mnt,sbi, p);
+		return autofs4_expire_multi(inode->i_sb,
+					    filp->f_path.mnt, sbi, p);
 
 	default:
 		return -ENOSYS;
@@ -902,12 +910,13 @@ static long autofs4_root_ioctl(struct file *filp,
 			       unsigned int cmd, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
+
 	return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
 }
 
 #ifdef CONFIG_COMPAT
 static long autofs4_root_compat_ioctl(struct file *filp,
-			     unsigned int cmd, unsigned long arg)
+				      unsigned int cmd, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
 	int ret;
@@ -916,7 +925,7 @@ static long autofs4_root_compat_ioctl(struct file *filp,
 		ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
 	else
 		ret = autofs4_root_ioctl_unlocked(inode, filp, cmd,
-			(unsigned long)compat_ptr(arg));
+					      (unsigned long) compat_ptr(arg));
 
 	return ret;
 }
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index 84e037d1d129..99aab00dc217 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -1,14 +1,10 @@
-/* -*- c -*- --------------------------------------------------------------- *
- *
- * linux/fs/autofs/symlink.c
- *
- *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+/*
+ * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
  * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
+ */
 
 #include "autofs_i.h"
 
@@ -18,6 +14,7 @@ static const char *autofs4_get_link(struct dentry *dentry,
 {
 	struct autofs_sb_info *sbi;
 	struct autofs_info *ino;
+
 	if (!dentry)
 		return ERR_PTR(-ECHILD);
 	sbi = autofs4_sbi(dentry->d_sb);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 35b755e79c2d..4e0c8d62dc1f 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -1,15 +1,11 @@
-/* -*- c -*- --------------------------------------------------------------- *
- *
- * linux/fs/autofs/waitq.c
- *
- *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *  Copyright 2001-2006 Ian Kent <raven@themaw.net>
+/*
+ * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+ * Copyright 2001-2006 Ian Kent <raven@themaw.net>
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
  * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
+ */
 
 #include <linux/slab.h>
 #include <linux/time.h>
@@ -18,7 +14,8 @@
 #include "autofs_i.h"
 
 /* We make this a static variable rather than a part of the superblock; it
-   is better if we don't reassign numbers easily even across filesystems */
+ * is better if we don't reassign numbers easily even across filesystems
+ */
 static autofs_wqt_t autofs4_next_wait_queue = 1;
 
 /* These are the signals we allow interrupting a pending mount */
@@ -69,17 +66,19 @@ static int autofs4_write(struct autofs_sb_info *sbi,
 	set_fs(KERNEL_DS);
 
 	mutex_lock(&sbi->pipe_mutex);
-	while (bytes &&
-	       (wr = __vfs_write(file,data,bytes,&file->f_pos)) > 0) {
+	wr = __vfs_write(file, data, bytes, &file->f_pos);
+	while (bytes && wr) {
 		data += wr;
 		bytes -= wr;
+		wr = __vfs_write(file, data, bytes, &file->f_pos);
 	}
 	mutex_unlock(&sbi->pipe_mutex);
 
 	set_fs(fs);
 
 	/* Keep the currently executing process from receiving a
-	   SIGPIPE unless it was already supposed to get one */
+	 * SIGPIPE unless it was already supposed to get one
+	 */
 	if (wr == -EPIPE && !sigpipe) {
 		spin_lock_irqsave(&current->sighand->siglock, flags);
 		sigdelset(&current->pending.signal, SIGPIPE);
@@ -103,9 +102,10 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 	size_t pktsz;
 
 	DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
-		(unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type);
+		(unsigned long) wq->wait_queue_token,
+		wq->name.len, wq->name.name, type);
 
-	memset(&pkt,0,sizeof pkt); /* For security reasons */
+	memset(&pkt, 0, sizeof(pkt)); /* For security reasons */
 
 	pkt.hdr.proto_version = sbi->version;
 	pkt.hdr.type = type;
@@ -126,7 +126,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 	}
 	case autofs_ptype_expire_multi:
 	{
-		struct autofs_packet_expire_multi *ep = &pkt.v4_pkt.expire_multi;
+		struct autofs_packet_expire_multi *ep =
+					&pkt.v4_pkt.expire_multi;
 
 		pktsz = sizeof(*ep);
 
@@ -231,7 +232,7 @@ autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr)
 		if (wq->name.hash == qstr->hash &&
 		    wq->name.len == qstr->len &&
 		    wq->name.name &&
-			 !memcmp(wq->name.name, qstr->name, qstr->len))
+		    !memcmp(wq->name.name, qstr->name, qstr->len))
 			break;
 	}
 	return wq;
@@ -248,7 +249,7 @@ autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr)
 static int validate_request(struct autofs_wait_queue **wait,
 			    struct autofs_sb_info *sbi,
 			    struct qstr *qstr,
-			    struct dentry*dentry, enum autofs_notify notify)
+			    struct dentry *dentry, enum autofs_notify notify)
 {
 	struct autofs_wait_queue *wq;
 	struct autofs_info *ino;
@@ -322,8 +323,10 @@ static int validate_request(struct autofs_wait_queue **wait,
 		 * continue on and create a new request.
 		 */
 		if (!IS_ROOT(dentry)) {
-			if (d_really_is_positive(dentry) && d_unhashed(dentry)) {
+			if (d_unhashed(dentry) &&
+			    d_really_is_positive(dentry)) {
 				struct dentry *parent = dentry->d_parent;
+
 				new = d_lookup(parent, &dentry->d_name);
 				if (new)
 					dentry = new;
@@ -340,8 +343,8 @@ static int validate_request(struct autofs_wait_queue **wait,
 	return 1;
 }
 
-int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
-		enum autofs_notify notify)
+int autofs4_wait(struct autofs_sb_info *sbi,
+		 struct dentry *dentry, enum autofs_notify notify)
 {
 	struct autofs_wait_queue *wq;
 	struct qstr qstr;
@@ -411,7 +414,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 
 	if (!wq) {
 		/* Create a new wait queue */
-		wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
+		wq = kmalloc(sizeof(struct autofs_wait_queue), GFP_KERNEL);
 		if (!wq) {
 			kfree(qstr.name);
 			mutex_unlock(&sbi->wq_mutex);
@@ -454,7 +457,9 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 			(unsigned long) wq->wait_queue_token, wq->name.len,
 			wq->name.name, notify);
 
-		/* autofs4_notify_daemon() may block; it will unlock ->wq_mutex */
+		/*
+		 * autofs4_notify_daemon() may block; it will unlock ->wq_mutex
+		 */
 		autofs4_notify_daemon(sbi, wq, type);
 	} else {
 		wq->wait_ctr++;
diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h
index 850f39b33e74..642781612062 100644
--- a/include/linux/auto_dev-ioctl.h
+++ b/include/linux/auto_dev-ioctl.h
@@ -125,7 +125,6 @@ static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
 	in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
 	in->size = sizeof(struct autofs_dev_ioctl);
 	in->ioctlfd = -1;
-	return;
 }
 
 /*
diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h
index fcd704d354c4..b4066bb89083 100644
--- a/include/linux/auto_fs.h
+++ b/include/linux/auto_fs.h
@@ -1,14 +1,10 @@
-/* -*- linux-c -*- ------------------------------------------------------- *
- *   
- * linux/include/linux/auto_fs.h
- *
- *   Copyright 1997 Transmeta Corporation - All Rights Reserved
+/*
+ * Copyright 1997 Transmeta Corporation - All Rights Reserved
  *
  * This file is part of the Linux kernel and is made available under
  * the terms of the GNU General Public License, version 2, or at your
  * option, any later version, incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
+ */
 
 #ifndef _LINUX_AUTO_FS_H
 #define _LINUX_AUTO_FS_H
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index bb991dfe134f..5fe176aa61d1 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h
@@ -1,7 +1,4 @@
-/* -*- linux-c -*- ------------------------------------------------------- *
- *   
- * linux/include/linux/auto_fs.h
- *
+/*
  *   Copyright 1997 Transmeta Corporation - All Rights Reserved
  *
  * This file is part of the Linux kernel and is made available under
@@ -63,12 +60,12 @@ struct autofs_packet_expire {
 	char name[NAME_MAX+1];
 };
 
-#define AUTOFS_IOC_READY      _IO(0x93,0x60)
-#define AUTOFS_IOC_FAIL       _IO(0x93,0x61)
-#define AUTOFS_IOC_CATATONIC  _IO(0x93,0x62)
-#define AUTOFS_IOC_PROTOVER   _IOR(0x93,0x63,int)
-#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,compat_ulong_t)
-#define AUTOFS_IOC_SETTIMEOUT _IOWR(0x93,0x64,unsigned long)
-#define AUTOFS_IOC_EXPIRE     _IOR(0x93,0x65,struct autofs_packet_expire)
+#define AUTOFS_IOC_READY      _IO(0x93, 0x60)
+#define AUTOFS_IOC_FAIL       _IO(0x93, 0x61)
+#define AUTOFS_IOC_CATATONIC  _IO(0x93, 0x62)
+#define AUTOFS_IOC_PROTOVER   _IOR(0x93, 0x63, int)
+#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93, 0x64, compat_ulong_t)
+#define AUTOFS_IOC_SETTIMEOUT _IOWR(0x93, 0x64, unsigned long)
+#define AUTOFS_IOC_EXPIRE     _IOR(0x93, 0x65, struct autofs_packet_expire)
 
 #endif /* _UAPI_LINUX_AUTO_FS_H */
diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h
index e02982fa2953..924fb1adab0b 100644
--- a/include/uapi/linux/auto_fs4.h
+++ b/include/uapi/linux/auto_fs4.h
@@ -1,6 +1,4 @@
-/* -*- c -*-
- * linux/include/linux/auto_fs4.h
- *
+/*
  * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
  *
  * This file is part of the Linux kernel and is made available under
@@ -38,7 +36,6 @@
 static inline void set_autofs_type_indirect(unsigned int *type)
 {
 	*type = AUTOFS_TYPE_INDIRECT;
-	return;
 }
 
 static inline unsigned int autofs_type_indirect(unsigned int type)
@@ -49,7 +46,6 @@ static inline unsigned int autofs_type_indirect(unsigned int type)
 static inline void set_autofs_type_direct(unsigned int *type)
 {
 	*type = AUTOFS_TYPE_DIRECT;
-	return;
 }
 
 static inline unsigned int autofs_type_direct(unsigned int type)
@@ -60,7 +56,6 @@ static inline unsigned int autofs_type_direct(unsigned int type)
 static inline void set_autofs_type_offset(unsigned int *type)
 {
 	*type = AUTOFS_TYPE_OFFSET;
-	return;
 }
 
 static inline unsigned int autofs_type_offset(unsigned int type)
@@ -81,7 +76,6 @@ static inline unsigned int autofs_type_trigger(unsigned int type)
 static inline void set_autofs_type_any(unsigned int *type)
 {
 	*type = AUTOFS_TYPE_ANY;
-	return;
 }
 
 static inline unsigned int autofs_type_any(unsigned int type)
@@ -154,11 +148,10 @@ union autofs_v5_packet_union {
 	autofs_packet_expire_direct_t expire_direct;
 };
 
-#define AUTOFS_IOC_EXPIRE_MULTI		_IOW(0x93,0x66,int)
+#define AUTOFS_IOC_EXPIRE_MULTI		_IOW(0x93, 0x66, int)
 #define AUTOFS_IOC_EXPIRE_INDIRECT	AUTOFS_IOC_EXPIRE_MULTI
 #define AUTOFS_IOC_EXPIRE_DIRECT	AUTOFS_IOC_EXPIRE_MULTI
-#define AUTOFS_IOC_PROTOSUBVER		_IOR(0x93,0x67,int)
-#define AUTOFS_IOC_ASKUMOUNT		_IOR(0x93,0x70,int)
-
+#define AUTOFS_IOC_PROTOSUBVER		_IOR(0x93, 0x67, int)
+#define AUTOFS_IOC_ASKUMOUNT		_IOR(0x93, 0x70, int)
 
 #endif /* _LINUX_AUTO_FS4_H */
-- 
cgit v1.2.3


From 63c06227a22b098a3849c5c99e836aea161ca0d7 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 15 Mar 2016 14:58:47 -0700
Subject: autofs4: fix string.h include in auto_dev-ioctl.h

Since including linux/string.h will now do the right thing remove the
conditional check.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/auto_dev-ioctl.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h
index 642781612062..7caaf298f539 100644
--- a/include/linux/auto_dev-ioctl.h
+++ b/include/linux/auto_dev-ioctl.h
@@ -11,12 +11,7 @@
 #define _LINUX_AUTO_DEV_IOCTL_H
 
 #include <linux/auto_fs.h>
-
-#ifdef __KERNEL__
 #include <linux/string.h>
-#else
-#include <string.h>
-#endif /* __KERNEL__ */
 
 #define AUTOFS_DEVICE_NAME		"autofs"
 
-- 
cgit v1.2.3


From 09c32427c9e9e861e5aeaf9e08ba0b4a3cfca917 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms+renesas@verge.net.au>
Date: Tue, 8 Mar 2016 09:42:07 +0900
Subject: clk: renesas: Rename header file renesas.h

This is part of an ongoing process to migrate from ARCH_SHMOBILE to
ARCH_RENESAS the motivation for which being that RENESAS seems to be a more
appropriate name than SHMOBILE for the majority of Renesas ARM based SoCs.

Along with the above mentioned Kconfig changes it seems appropriate
to also rename files.

Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 arch/arm/mach-shmobile/pm-rmobile.c      |  2 +-
 arch/arm/mach-shmobile/setup-r8a7778.c   |  2 +-
 arch/arm/mach-shmobile/setup-r8a7779.c   |  2 +-
 arch/arm/mach-shmobile/setup-rcar-gen2.c |  2 +-
 drivers/clk/renesas/clk-mstp.c           |  2 +-
 drivers/clk/renesas/clk-r8a73a4.c        |  2 +-
 drivers/clk/renesas/clk-r8a7740.c        |  2 +-
 drivers/clk/renesas/clk-r8a7778.c        |  2 +-
 drivers/clk/renesas/clk-r8a7779.c        |  2 +-
 drivers/clk/renesas/clk-rcar-gen2.c      |  2 +-
 drivers/clk/renesas/clk-rz.c             |  2 +-
 drivers/clk/renesas/clk-sh73a0.c         |  2 +-
 include/linux/clk/renesas.h              | 35 ++++++++++++++++++++++++++++++++
 include/linux/clk/shmobile.h             | 35 --------------------------------
 14 files changed, 47 insertions(+), 47 deletions(-)
 create mode 100644 include/linux/clk/renesas.h
 delete mode 100644 include/linux/clk/shmobile.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-shmobile/pm-rmobile.c b/arch/arm/mach-shmobile/pm-rmobile.c
index 46d0a1ddce75..c0b05e9e6442 100644
--- a/arch/arm/mach-shmobile/pm-rmobile.c
+++ b/arch/arm/mach-shmobile/pm-rmobile.c
@@ -12,7 +12,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/console.h>
 #include <linux/delay.h>
 #include <linux/of.h>
diff --git a/arch/arm/mach-shmobile/setup-r8a7778.c b/arch/arm/mach-shmobile/setup-r8a7778.c
index fab95d1271bc..cf236db686a9 100644
--- a/arch/arm/mach-shmobile/setup-r8a7778.c
+++ b/arch/arm/mach-shmobile/setup-r8a7778.c
@@ -15,7 +15,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/io.h>
 #include <linux/irqchip.h>
 
diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c
index 1e572a903f8e..0007ff51d180 100644
--- a/arch/arm/mach-shmobile/setup-r8a7779.c
+++ b/arch/arm/mach-shmobile/setup-r8a7779.c
@@ -14,7 +14,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/clocksource.h>
 #include <linux/init.h>
 #include <linux/irq.h>
diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c
index 9eccde3c7b13..949b402c0f19 100644
--- a/arch/arm/mach-shmobile/setup-rcar-gen2.c
+++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c
@@ -15,7 +15,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/clocksource.h>
 #include <linux/device.h>
 #include <linux/dma-contiguous.h>
diff --git a/drivers/clk/renesas/clk-mstp.c b/drivers/clk/renesas/clk-mstp.c
index 3b09716ebda2..3d44e183aedd 100644
--- a/drivers/clk/renesas/clk-mstp.c
+++ b/drivers/clk/renesas/clk-mstp.c
@@ -14,7 +14,7 @@
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/of.h>
diff --git a/drivers/clk/renesas/clk-r8a73a4.c b/drivers/clk/renesas/clk-r8a73a4.c
index 9326204bed9d..28d204bb659e 100644
--- a/drivers/clk/renesas/clk-r8a73a4.c
+++ b/drivers/clk/renesas/clk-r8a73a4.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
diff --git a/drivers/clk/renesas/clk-r8a7740.c b/drivers/clk/renesas/clk-r8a7740.c
index 1e6b1da58065..2f7ce6696b6c 100644
--- a/drivers/clk/renesas/clk-r8a7740.c
+++ b/drivers/clk/renesas/clk-r8a7740.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
diff --git a/drivers/clk/renesas/clk-r8a7778.c b/drivers/clk/renesas/clk-r8a7778.c
index b1741551fff2..40e3a501a50e 100644
--- a/drivers/clk/renesas/clk-r8a7778.c
+++ b/drivers/clk/renesas/clk-r8a7778.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
 
diff --git a/drivers/clk/renesas/clk-r8a7779.c b/drivers/clk/renesas/clk-r8a7779.c
index 92275c5f2c60..cf2a37df03b1 100644
--- a/drivers/clk/renesas/clk-r8a7779.c
+++ b/drivers/clk/renesas/clk-r8a7779.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
diff --git a/drivers/clk/renesas/clk-rcar-gen2.c b/drivers/clk/renesas/clk-rcar-gen2.c
index 841977240305..00e6aba4b9c0 100644
--- a/drivers/clk/renesas/clk-rcar-gen2.c
+++ b/drivers/clk/renesas/clk-rcar-gen2.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/math64.h>
diff --git a/drivers/clk/renesas/clk-rz.c b/drivers/clk/renesas/clk-rz.c
index 9766e3cb595f..f6312c62f16b 100644
--- a/drivers/clk/renesas/clk-rz.c
+++ b/drivers/clk/renesas/clk-rz.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
diff --git a/drivers/clk/renesas/clk-sh73a0.c b/drivers/clk/renesas/clk-sh73a0.c
index 8966f8bbfd72..eea38f6ea77e 100644
--- a/drivers/clk/renesas/clk-sh73a0.c
+++ b/drivers/clk/renesas/clk-sh73a0.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h
new file mode 100644
index 000000000000..7adfd80fbf55
--- /dev/null
+++ b/include/linux/clk/renesas.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2013 Ideas On Board SPRL
+ * Copyright 2013, 2014 Horms Solutions Ltd.
+ *
+ * Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ * Contact: Simon Horman <horms@verge.net.au>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __LINUX_CLK_RENESAS_H_
+#define __LINUX_CLK_RENESAS_H_
+
+#include <linux/types.h>
+
+struct device;
+struct device_node;
+struct generic_pm_domain;
+
+void r8a7778_clocks_init(u32 mode);
+void r8a7779_clocks_init(u32 mode);
+void rcar_gen2_clocks_init(u32 mode);
+
+#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
+void cpg_mstp_add_clk_domain(struct device_node *np);
+int cpg_mstp_attach_dev(struct generic_pm_domain *domain, struct device *dev);
+void cpg_mstp_detach_dev(struct generic_pm_domain *domain, struct device *dev);
+#else
+static inline void cpg_mstp_add_clk_domain(struct device_node *np) {}
+#endif
+
+#endif
diff --git a/include/linux/clk/shmobile.h b/include/linux/clk/shmobile.h
deleted file mode 100644
index cb19cc1865ca..000000000000
--- a/include/linux/clk/shmobile.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2013 Ideas On Board SPRL
- * Copyright 2013, 2014 Horms Solutions Ltd.
- *
- * Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
- * Contact: Simon Horman <horms@verge.net.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __LINUX_CLK_SHMOBILE_H_
-#define __LINUX_CLK_SHMOBILE_H_
-
-#include <linux/types.h>
-
-struct device;
-struct device_node;
-struct generic_pm_domain;
-
-void r8a7778_clocks_init(u32 mode);
-void r8a7779_clocks_init(u32 mode);
-void rcar_gen2_clocks_init(u32 mode);
-
-#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
-void cpg_mstp_add_clk_domain(struct device_node *np);
-int cpg_mstp_attach_dev(struct generic_pm_domain *domain, struct device *dev);
-void cpg_mstp_detach_dev(struct generic_pm_domain *domain, struct device *dev);
-#else
-static inline void cpg_mstp_add_clk_domain(struct device_node *np) {}
-#endif
-
-#endif
-- 
cgit v1.2.3


From b45b719ee03162eb54772c30a6474d57b41b6b54 Mon Sep 17 00:00:00 2001
From: "Andrew F. Davis" <afd@ti.com>
Date: Mon, 25 Jan 2016 09:50:11 -0600
Subject: mfd: tps65086: Add driver for the TPS65086 PMIC

Add support for the TPS65912 device. It provides communication
through I2C and contains the following components:

 - Regulators
 - Load switches
 - GPO controller

Signed-off-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig          |  13 ++++
 drivers/mfd/Makefile         |   1 +
 drivers/mfd/tps65086.c       | 149 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/tps65086.h | 117 +++++++++++++++++++++++++++++++++
 4 files changed, 280 insertions(+)
 create mode 100644 drivers/mfd/tps65086.c
 create mode 100644 include/linux/mfd/tps65086.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f47965834a6b..97d94068cb4d 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1130,6 +1130,19 @@ config TPS6507X
 	  This driver can also be built as a module.  If so, the module
 	  will be called tps6507x.
 
+config MFD_TPS65086
+	tristate "TI TPS65086 Power Management Integrated Chips (PMICs)"
+	select REGMAP
+	select REGMAP_IRQ
+	select REGMAP_I2C
+	depends on I2C
+	help
+	  If you say yes here you get support for the TPS65086 series of
+	  Power Management chips.
+	  This driver provides common support for accessing the device,
+	  additional drivers must be enabled in order to use the
+	  functionality of the device.
+
 config TPS65911_COMPARATOR
 	tristate
 
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index b7a3cd9adaee..d2ddcf45ccd1 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_MFD_WM8994)	+= wm8994.o
 obj-$(CONFIG_TPS6105X)		+= tps6105x.o
 obj-$(CONFIG_TPS65010)		+= tps65010.o
 obj-$(CONFIG_TPS6507X)		+= tps6507x.o
+obj-$(CONFIG_MFD_TPS65086)	+= tps65086.o
 obj-$(CONFIG_MFD_TPS65217)	+= tps65217.o
 obj-$(CONFIG_MFD_TPS65218)	+= tps65218.o
 obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o
diff --git a/drivers/mfd/tps65086.c b/drivers/mfd/tps65086.c
new file mode 100644
index 000000000000..43119a6867fe
--- /dev/null
+++ b/drivers/mfd/tps65086.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/
+ *	Andrew F. Davis <afd@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether expressed or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.
+ *
+ * Based on the TPS65912 driver
+ */
+
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+
+#include <linux/mfd/tps65086.h>
+
+static const struct mfd_cell tps65086_cells[] = {
+	{ .name = "tps65086-regulator", },
+	{ .name = "tps65086-gpio", },
+};
+
+static const struct regmap_range tps65086_yes_ranges[] = {
+	regmap_reg_range(TPS65086_IRQ, TPS65086_IRQ),
+	regmap_reg_range(TPS65086_PMICSTAT, TPS65086_SHUTDNSRC),
+	regmap_reg_range(TPS65086_GPOCTRL, TPS65086_GPOCTRL),
+	regmap_reg_range(TPS65086_PG_STATUS1, TPS65086_OC_STATUS),
+};
+
+static const struct regmap_access_table tps65086_volatile_table = {
+	.yes_ranges = tps65086_yes_ranges,
+	.n_yes_ranges = ARRAY_SIZE(tps65086_yes_ranges),
+};
+
+static const struct regmap_config tps65086_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_table = &tps65086_volatile_table,
+};
+
+static const struct regmap_irq tps65086_irqs[] = {
+	REGMAP_IRQ_REG(TPS65086_IRQ_DIETEMP, 0, TPS65086_IRQ_DIETEMP_MASK),
+	REGMAP_IRQ_REG(TPS65086_IRQ_SHUTDN, 0, TPS65086_IRQ_SHUTDN_MASK),
+	REGMAP_IRQ_REG(TPS65086_IRQ_FAULT, 0, TPS65086_IRQ_FAULT_MASK),
+};
+
+static struct regmap_irq_chip tps65086_irq_chip = {
+	.name = "tps65086",
+	.status_base = TPS65086_IRQ,
+	.mask_base = TPS65086_IRQ_MASK,
+	.ack_base = TPS65086_IRQ,
+	.init_ack_masked = true,
+	.num_regs = 1,
+	.irqs = tps65086_irqs,
+	.num_irqs = ARRAY_SIZE(tps65086_irqs),
+};
+
+static const struct of_device_id tps65086_of_match_table[] = {
+	{ .compatible = "ti,tps65086", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, tps65086_of_match_table);
+
+static int tps65086_probe(struct i2c_client *client,
+			  const struct i2c_device_id *ids)
+{
+	struct tps65086 *tps;
+	unsigned int version;
+	int ret;
+
+	tps = devm_kzalloc(&client->dev, sizeof(*tps), GFP_KERNEL);
+	if (!tps)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, tps);
+	tps->dev = &client->dev;
+	tps->irq = client->irq;
+
+	tps->regmap = devm_regmap_init_i2c(client, &tps65086_regmap_config);
+	if (IS_ERR(tps->regmap)) {
+		dev_err(tps->dev, "Failed to initialize register map\n");
+		return PTR_ERR(tps->regmap);
+	}
+
+	ret = regmap_read(tps->regmap, TPS65086_DEVICEID, &version);
+	if (ret) {
+		dev_err(tps->dev, "Failed to read revision register\n");
+		return ret;
+	}
+
+	dev_info(tps->dev, "Device: TPS65086%01lX, OTP: %c, Rev: %ld\n",
+		 (version & TPS65086_DEVICEID_PART_MASK),
+		 (char)((version & TPS65086_DEVICEID_OTP_MASK) >> 4) + 'A',
+		 (version & TPS65086_DEVICEID_REV_MASK) >> 6);
+
+	ret = regmap_add_irq_chip(tps->regmap, tps->irq, IRQF_ONESHOT, 0,
+				  &tps65086_irq_chip, &tps->irq_data);
+	if (ret) {
+		dev_err(tps->dev, "Failed to register IRQ chip\n");
+		return ret;
+	}
+
+	ret = mfd_add_devices(tps->dev, PLATFORM_DEVID_AUTO, tps65086_cells,
+			      ARRAY_SIZE(tps65086_cells), NULL, 0,
+			      regmap_irq_get_domain(tps->irq_data));
+	if (ret) {
+		regmap_del_irq_chip(tps->irq, tps->irq_data);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int tps65086_remove(struct i2c_client *client)
+{
+	struct tps65086 *tps = i2c_get_clientdata(client);
+
+	regmap_del_irq_chip(tps->irq, tps->irq_data);
+
+	return 0;
+}
+
+static const struct i2c_device_id tps65086_id_table[] = {
+	{ "tps65086", 0 },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(i2c, tps65086_id_table);
+
+static struct i2c_driver tps65086_driver = {
+	.driver		= {
+		.name	= "tps65086",
+		.of_match_table = tps65086_of_match_table,
+	},
+	.probe		= tps65086_probe,
+	.remove		= tps65086_remove,
+	.id_table       = tps65086_id_table,
+};
+module_i2c_driver(tps65086_driver);
+
+MODULE_AUTHOR("Andrew F. Davis <afd@ti.com>");
+MODULE_DESCRIPTION("TPS65086 PMIC Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/tps65086.h b/include/linux/mfd/tps65086.h
new file mode 100644
index 000000000000..a228ae4c88d9
--- /dev/null
+++ b/include/linux/mfd/tps65086.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/
+ *	Andrew F. Davis <afd@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether expressed or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.
+ *
+ * Based on the TPS65912 driver
+ */
+
+#ifndef __LINUX_MFD_TPS65086_H
+#define __LINUX_MFD_TPS65086_H
+
+#include <linux/device.h>
+#include <linux/regmap.h>
+
+/* List of registers for TPS65086 */
+#define TPS65086_DEVICEID		0x01
+#define TPS65086_IRQ			0x02
+#define TPS65086_IRQ_MASK		0x03
+#define TPS65086_PMICSTAT		0x04
+#define TPS65086_SHUTDNSRC		0x05
+#define TPS65086_BUCK1CTRL		0x20
+#define TPS65086_BUCK2CTRL		0x21
+#define TPS65086_BUCK3DECAY		0x22
+#define TPS65086_BUCK3VID		0x23
+#define TPS65086_BUCK3SLPCTRL		0x24
+#define TPS65086_BUCK4CTRL		0x25
+#define TPS65086_BUCK5CTRL		0x26
+#define TPS65086_BUCK6CTRL		0x27
+#define TPS65086_LDOA2CTRL		0x28
+#define TPS65086_LDOA3CTRL		0x29
+#define TPS65086_DISCHCTRL1		0x40
+#define TPS65086_DISCHCTRL2		0x41
+#define TPS65086_DISCHCTRL3		0x42
+#define TPS65086_PG_DELAY1		0x43
+#define TPS65086_FORCESHUTDN		0x91
+#define TPS65086_BUCK1SLPCTRL		0x92
+#define TPS65086_BUCK2SLPCTRL		0x93
+#define TPS65086_BUCK4VID		0x94
+#define TPS65086_BUCK4SLPVID		0x95
+#define TPS65086_BUCK5VID		0x96
+#define TPS65086_BUCK5SLPVID		0x97
+#define TPS65086_BUCK6VID		0x98
+#define TPS65086_BUCK6SLPVID		0x99
+#define TPS65086_LDOA2VID		0x9A
+#define TPS65086_LDOA3VID		0x9B
+#define TPS65086_BUCK123CTRL		0x9C
+#define TPS65086_PG_DELAY2		0x9D
+#define TPS65086_PIN_EN_MASK1		0x9E
+#define TPS65086_PIN_EN_MASK2		0x9F
+#define TPS65086_SWVTT_EN		0x9F
+#define TPS65086_PIN_EN_OVR1		0xA0
+#define TPS65086_PIN_EN_OVR2		0xA1
+#define TPS65086_GPOCTRL		0xA1
+#define TPS65086_PWR_FAULT_MASK1	0xA2
+#define TPS65086_PWR_FAULT_MASK2	0xA3
+#define TPS65086_GPO1PG_CTRL1		0xA4
+#define TPS65086_GPO1PG_CTRL2		0xA5
+#define TPS65086_GPO4PG_CTRL1		0xA6
+#define TPS65086_GPO4PG_CTRL2		0xA7
+#define TPS65086_GPO2PG_CTRL1		0xA8
+#define TPS65086_GPO2PG_CTRL2		0xA9
+#define TPS65086_GPO3PG_CTRL1		0xAA
+#define TPS65086_GPO3PG_CTRL2		0xAB
+#define TPS65086_LDOA1CTRL		0xAE
+#define TPS65086_PG_STATUS1		0xB0
+#define TPS65086_PG_STATUS2		0xB1
+#define TPS65086_PWR_FAULT_STATUS1	0xB2
+#define TPS65086_PWR_FAULT_STATUS2	0xB3
+#define TPS65086_TEMPCRIT		0xB4
+#define TPS65086_TEMPHOT		0xB5
+#define TPS65086_OC_STATUS		0xB6
+
+/* IRQ Register field definitions */
+#define TPS65086_IRQ_DIETEMP_MASK	BIT(0)
+#define TPS65086_IRQ_SHUTDN_MASK	BIT(3)
+#define TPS65086_IRQ_FAULT_MASK		BIT(7)
+
+/* DEVICEID Register field definitions */
+#define TPS65086_DEVICEID_PART_MASK	GENMASK(3, 0)
+#define TPS65086_DEVICEID_OTP_MASK	GENMASK(5, 4)
+#define TPS65086_DEVICEID_REV_MASK	GENMASK(7, 6)
+
+/* VID Masks */
+#define BUCK_VID_MASK			GENMASK(7, 1)
+#define VDOA1_VID_MASK			GENMASK(4, 1)
+#define VDOA23_VID_MASK			GENMASK(3, 0)
+
+/* Define the TPS65086 IRQ numbers */
+enum tps65086_irqs {
+	TPS65086_IRQ_DIETEMP,
+	TPS65086_IRQ_SHUTDN,
+	TPS65086_IRQ_FAULT,
+};
+
+/**
+ * struct tps65086 - state holder for the tps65086 driver
+ *
+ * Device data may be used to access the TPS65086 chip
+ */
+struct tps65086 {
+	struct device *dev;
+	struct regmap *regmap;
+
+	/* IRQ Data */
+	int irq;
+	struct regmap_irq_chip_data *irq_data;
+};
+
+#endif /* __LINUX_MFD_TPS65086_H */
-- 
cgit v1.2.3


From f4bcf5a29d57fe515b8eb6c612090376086b5f79 Mon Sep 17 00:00:00 2001
From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Date: Fri, 5 Feb 2016 14:32:56 +0100
Subject: mfd: cros_ec: Small kerneldoc fix

s/cros_ec_register/cros_ec_query_all

Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 494682ce4bf3..a677c2bd485c 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -245,7 +245,7 @@ int cros_ec_remove(struct cros_ec_device *ec_dev);
 int cros_ec_register(struct cros_ec_device *ec_dev);
 
 /**
- * cros_ec_register -  Query the protocol version supported by the ChromeOS EC
+ * cros_ec_query_all -  Query the protocol version supported by the ChromeOS EC
  *
  * @ec_dev: Device to register
  * @return 0 if ok, -ve on error
-- 
cgit v1.2.3


From feec4799ac4d214abd62e2bdfccb3ca9c5801d2f Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Wed, 27 Jan 2016 12:47:36 +0100
Subject: mfd: mt6397: int_con and int_status may vary in location

MT6323 has the INT_CON and INT_STATUS located at a different position.
Make the registers locations configurable.

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/mt6397-core.c       | 27 +++++++++++++++++----------
 include/linux/mfd/mt6397/core.h |  2 ++
 2 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mt6397-core.c b/drivers/mfd/mt6397-core.c
index 1749c1c9f405..75ad0fe656f7 100644
--- a/drivers/mfd/mt6397-core.c
+++ b/drivers/mfd/mt6397-core.c
@@ -69,8 +69,10 @@ static void mt6397_irq_sync_unlock(struct irq_data *data)
 {
 	struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
 
-	regmap_write(mt6397->regmap, MT6397_INT_CON0, mt6397->irq_masks_cur[0]);
-	regmap_write(mt6397->regmap, MT6397_INT_CON1, mt6397->irq_masks_cur[1]);
+	regmap_write(mt6397->regmap, mt6397->int_con[0],
+		     mt6397->irq_masks_cur[0]);
+	regmap_write(mt6397->regmap, mt6397->int_con[1],
+		     mt6397->irq_masks_cur[1]);
 
 	mutex_unlock(&mt6397->irqlock);
 }
@@ -147,8 +149,8 @@ static irqreturn_t mt6397_irq_thread(int irq, void *data)
 {
 	struct mt6397_chip *mt6397 = data;
 
-	mt6397_irq_handle_reg(mt6397, MT6397_INT_STATUS0, 0);
-	mt6397_irq_handle_reg(mt6397, MT6397_INT_STATUS1, 16);
+	mt6397_irq_handle_reg(mt6397, mt6397->int_status[0], 0);
+	mt6397_irq_handle_reg(mt6397, mt6397->int_status[1], 16);
 
 	return IRQ_HANDLED;
 }
@@ -177,8 +179,8 @@ static int mt6397_irq_init(struct mt6397_chip *mt6397)
 	mutex_init(&mt6397->irqlock);
 
 	/* Mask all interrupt sources */
-	regmap_write(mt6397->regmap, MT6397_INT_CON0, 0x0);
-	regmap_write(mt6397->regmap, MT6397_INT_CON1, 0x0);
+	regmap_write(mt6397->regmap, mt6397->int_con[0], 0x0);
+	regmap_write(mt6397->regmap, mt6397->int_con[1], 0x0);
 
 	mt6397->irq_domain = irq_domain_add_linear(mt6397->dev->of_node,
 		MT6397_IRQ_NR, &mt6397_irq_domain_ops, mt6397);
@@ -203,8 +205,8 @@ static int mt6397_irq_suspend(struct device *dev)
 {
 	struct mt6397_chip *chip = dev_get_drvdata(dev);
 
-	regmap_write(chip->regmap, MT6397_INT_CON0, chip->wake_mask[0]);
-	regmap_write(chip->regmap, MT6397_INT_CON1, chip->wake_mask[1]);
+	regmap_write(chip->regmap, chip->int_con[0], chip->wake_mask[0]);
+	regmap_write(chip->regmap, chip->int_con[1], chip->wake_mask[1]);
 
 	enable_irq_wake(chip->irq);
 
@@ -215,8 +217,8 @@ static int mt6397_irq_resume(struct device *dev)
 {
 	struct mt6397_chip *chip = dev_get_drvdata(dev);
 
-	regmap_write(chip->regmap, MT6397_INT_CON0, chip->irq_masks_cur[0]);
-	regmap_write(chip->regmap, MT6397_INT_CON1, chip->irq_masks_cur[1]);
+	regmap_write(chip->regmap, chip->int_con[0], chip->irq_masks_cur[0]);
+	regmap_write(chip->regmap, chip->int_con[1], chip->irq_masks_cur[1]);
 
 	disable_irq_wake(chip->irq);
 
@@ -237,6 +239,11 @@ static int mt6397_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	mt6397->dev = &pdev->dev;
+	mt6397->int_con[0] = MT6397_INT_CON0;
+	mt6397->int_con[1] = MT6397_INT_CON1;
+	mt6397->int_status[0] = MT6397_INT_STATUS0;
+	mt6397->int_status[1] = MT6397_INT_STATUS1;
+
 	/*
 	 * mt6397 MFD is child device of soc pmic wrapper.
 	 * Regmap is set from its parent.
diff --git a/include/linux/mfd/mt6397/core.h b/include/linux/mfd/mt6397/core.h
index 45b8e8aa1fbf..d678f526e498 100644
--- a/include/linux/mfd/mt6397/core.h
+++ b/include/linux/mfd/mt6397/core.h
@@ -60,6 +60,8 @@ struct mt6397_chip {
 	u16 wake_mask[2];
 	u16 irq_masks_cur[2];
 	u16 irq_masks_cache[2];
+	u16 int_con[2];
+	u16 int_status[2];
 };
 
 #endif /* __MFD_MT6397_CORE_H__ */
-- 
cgit v1.2.3


From 44760cf3bf0a29da8f5cc271698c8772b8f79673 Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Wed, 27 Jan 2016 12:47:38 +0100
Subject: mfd: mt6397: Add MT6323 support to MT6397 driver

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/mt6397-core.c            |  20 ++
 include/linux/mfd/mt6323/core.h      |  36 ++++
 include/linux/mfd/mt6323/registers.h | 408 +++++++++++++++++++++++++++++++++++
 3 files changed, 464 insertions(+)
 create mode 100644 include/linux/mfd/mt6323/core.h
 create mode 100644 include/linux/mfd/mt6323/registers.h

(limited to 'include/linux')

diff --git a/drivers/mfd/mt6397-core.c b/drivers/mfd/mt6397-core.c
index aa91606f9a30..8234cd34e438 100644
--- a/drivers/mfd/mt6397-core.c
+++ b/drivers/mfd/mt6397-core.c
@@ -19,11 +19,14 @@
 #include <linux/regmap.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/mt6397/core.h>
+#include <linux/mfd/mt6323/core.h>
 #include <linux/mfd/mt6397/registers.h>
+#include <linux/mfd/mt6323/registers.h>
 
 #define MT6397_RTC_BASE		0xe000
 #define MT6397_RTC_SIZE		0x3e
 
+#define MT6323_CID_CODE		0x23
 #define MT6391_CID_CODE		0x91
 #define MT6397_CID_CODE		0x97
 
@@ -40,6 +43,13 @@ static const struct resource mt6397_rtc_resources[] = {
 	},
 };
 
+static const struct mfd_cell mt6323_devs[] = {
+	{
+		.name = "mt6323-regulator",
+		.of_compatible = "mediatek,mt6323-regulator"
+	},
+};
+
 static const struct mfd_cell mt6397_devs[] = {
 	{
 		.name = "mt6397-rtc",
@@ -261,6 +271,15 @@ static int mt6397_probe(struct platform_device *pdev)
 	}
 
 	switch (id & 0xff) {
+	case MT6323_CID_CODE:
+		pmic->int_con[0] = MT6323_INT_CON0;
+		pmic->int_con[1] = MT6323_INT_CON1;
+		pmic->int_status[0] = MT6323_INT_STATUS0;
+		pmic->int_status[1] = MT6323_INT_STATUS1;
+		ret = mfd_add_devices(&pdev->dev, -1, mt6323_devs,
+				ARRAY_SIZE(mt6323_devs), NULL, 0, NULL);
+		break;
+
 	case MT6397_CID_CODE:
 	case MT6391_CID_CODE:
 		pmic->int_con[0] = MT6397_INT_CON0;
@@ -302,6 +321,7 @@ static int mt6397_remove(struct platform_device *pdev)
 
 static const struct of_device_id mt6397_of_match[] = {
 	{ .compatible = "mediatek,mt6397" },
+	{ .compatible = "mediatek,mt6323" },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, mt6397_of_match);
diff --git a/include/linux/mfd/mt6323/core.h b/include/linux/mfd/mt6323/core.h
new file mode 100644
index 000000000000..06d0ec3b1f8f
--- /dev/null
+++ b/include/linux/mfd/mt6323/core.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016 Chen Zhong <chen.zhong@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MFD_MT6323_CORE_H__
+#define __MFD_MT6323_CORE_H__
+
+enum MT6323_IRQ_STATUS_numbers {
+	MT6323_IRQ_STATUS_SPKL_AB = 0,
+	MT6323_IRQ_STATUS_SPKL,
+	MT6323_IRQ_STATUS_BAT_L,
+	MT6323_IRQ_STATUS_BAT_H,
+	MT6323_IRQ_STATUS_WATCHDOG,
+	MT6323_IRQ_STATUS_PWRKEY,
+	MT6323_IRQ_STATUS_THR_L,
+	MT6323_IRQ_STATUS_THR_H,
+	MT6323_IRQ_STATUS_VBATON_UNDET,
+	MT6323_IRQ_STATUS_BVALID_DET,
+	MT6323_IRQ_STATUS_CHRDET,
+	MT6323_IRQ_STATUS_OV,
+	MT6323_IRQ_STATUS_LDO = 16,
+	MT6323_IRQ_STATUS_FCHRKEY,
+	MT6323_IRQ_STATUS_ACCDET,
+	MT6323_IRQ_STATUS_AUDIO,
+	MT6323_IRQ_STATUS_RTC,
+	MT6323_IRQ_STATUS_VPROC,
+	MT6323_IRQ_STATUS_VSYS,
+	MT6323_IRQ_STATUS_VPA,
+	MT6323_IRQ_STATUS_NR,
+};
+
+#endif /* __MFD_MT6323_CORE_H__ */
diff --git a/include/linux/mfd/mt6323/registers.h b/include/linux/mfd/mt6323/registers.h
new file mode 100644
index 000000000000..160f3c0e2589
--- /dev/null
+++ b/include/linux/mfd/mt6323/registers.h
@@ -0,0 +1,408 @@
+/*
+ * Copyright (c) 2016 Chen Zhong <chen.zhong@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MFD_MT6323_REGISTERS_H__
+#define __MFD_MT6323_REGISTERS_H__
+
+/* PMIC Registers */
+#define MT6323_CHR_CON0           0x0000
+#define MT6323_CHR_CON1           0x0002
+#define MT6323_CHR_CON2           0x0004
+#define MT6323_CHR_CON3           0x0006
+#define MT6323_CHR_CON4           0x0008
+#define MT6323_CHR_CON5           0x000A
+#define MT6323_CHR_CON6           0x000C
+#define MT6323_CHR_CON7           0x000E
+#define MT6323_CHR_CON8           0x0010
+#define MT6323_CHR_CON9           0x0012
+#define MT6323_CHR_CON10          0x0014
+#define MT6323_CHR_CON11          0x0016
+#define MT6323_CHR_CON12          0x0018
+#define MT6323_CHR_CON13          0x001A
+#define MT6323_CHR_CON14          0x001C
+#define MT6323_CHR_CON15          0x001E
+#define MT6323_CHR_CON16          0x0020
+#define MT6323_CHR_CON17          0x0022
+#define MT6323_CHR_CON18          0x0024
+#define MT6323_CHR_CON19          0x0026
+#define MT6323_CHR_CON20          0x0028
+#define MT6323_CHR_CON21          0x002A
+#define MT6323_CHR_CON22          0x002C
+#define MT6323_CHR_CON23          0x002E
+#define MT6323_CHR_CON24          0x0030
+#define MT6323_CHR_CON25          0x0032
+#define MT6323_CHR_CON26          0x0034
+#define MT6323_CHR_CON27          0x0036
+#define MT6323_CHR_CON28          0x0038
+#define MT6323_CHR_CON29          0x003A
+#define MT6323_STRUP_CON0         0x003C
+#define MT6323_STRUP_CON2         0x003E
+#define MT6323_STRUP_CON3         0x0040
+#define MT6323_STRUP_CON4         0x0042
+#define MT6323_STRUP_CON5         0x0044
+#define MT6323_STRUP_CON6         0x0046
+#define MT6323_STRUP_CON7         0x0048
+#define MT6323_STRUP_CON8         0x004A
+#define MT6323_STRUP_CON9         0x004C
+#define MT6323_STRUP_CON10        0x004E
+#define MT6323_STRUP_CON11        0x0050
+#define MT6323_SPK_CON0           0x0052
+#define MT6323_SPK_CON1           0x0054
+#define MT6323_SPK_CON2           0x0056
+#define MT6323_SPK_CON6           0x005E
+#define MT6323_SPK_CON7           0x0060
+#define MT6323_SPK_CON8           0x0062
+#define MT6323_SPK_CON9           0x0064
+#define MT6323_SPK_CON10          0x0066
+#define MT6323_SPK_CON11          0x0068
+#define MT6323_SPK_CON12          0x006A
+#define MT6323_CID                0x0100
+#define MT6323_TOP_CKPDN0         0x0102
+#define MT6323_TOP_CKPDN0_SET     0x0104
+#define MT6323_TOP_CKPDN0_CLR     0x0106
+#define MT6323_TOP_CKPDN1         0x0108
+#define MT6323_TOP_CKPDN1_SET     0x010A
+#define MT6323_TOP_CKPDN1_CLR     0x010C
+#define MT6323_TOP_CKPDN2         0x010E
+#define MT6323_TOP_CKPDN2_SET     0x0110
+#define MT6323_TOP_CKPDN2_CLR     0x0112
+#define MT6323_TOP_RST_CON        0x0114
+#define MT6323_TOP_RST_CON_SET    0x0116
+#define MT6323_TOP_RST_CON_CLR    0x0118
+#define MT6323_TOP_RST_MISC       0x011A
+#define MT6323_TOP_RST_MISC_SET   0x011C
+#define MT6323_TOP_RST_MISC_CLR   0x011E
+#define MT6323_TOP_CKCON0         0x0120
+#define MT6323_TOP_CKCON0_SET     0x0122
+#define MT6323_TOP_CKCON0_CLR     0x0124
+#define MT6323_TOP_CKCON1         0x0126
+#define MT6323_TOP_CKCON1_SET     0x0128
+#define MT6323_TOP_CKCON1_CLR     0x012A
+#define MT6323_TOP_CKTST0         0x012C
+#define MT6323_TOP_CKTST1         0x012E
+#define MT6323_TOP_CKTST2         0x0130
+#define MT6323_TEST_OUT           0x0132
+#define MT6323_TEST_CON0          0x0134
+#define MT6323_TEST_CON1          0x0136
+#define MT6323_EN_STATUS0         0x0138
+#define MT6323_EN_STATUS1         0x013A
+#define MT6323_OCSTATUS0          0x013C
+#define MT6323_OCSTATUS1          0x013E
+#define MT6323_PGSTATUS           0x0140
+#define MT6323_CHRSTATUS          0x0142
+#define MT6323_TDSEL_CON          0x0144
+#define MT6323_RDSEL_CON          0x0146
+#define MT6323_SMT_CON0           0x0148
+#define MT6323_SMT_CON1           0x014A
+#define MT6323_SMT_CON2           0x014C
+#define MT6323_SMT_CON3           0x014E
+#define MT6323_SMT_CON4           0x0150
+#define MT6323_DRV_CON0           0x0152
+#define MT6323_DRV_CON1           0x0154
+#define MT6323_DRV_CON2           0x0156
+#define MT6323_DRV_CON3           0x0158
+#define MT6323_DRV_CON4           0x015A
+#define MT6323_SIMLS1_CON         0x015C
+#define MT6323_SIMLS2_CON         0x015E
+#define MT6323_INT_CON0           0x0160
+#define MT6323_INT_CON0_SET       0x0162
+#define MT6323_INT_CON0_CLR       0x0164
+#define MT6323_INT_CON1           0x0166
+#define MT6323_INT_CON1_SET       0x0168
+#define MT6323_INT_CON1_CLR       0x016A
+#define MT6323_INT_MISC_CON       0x016C
+#define MT6323_INT_MISC_CON_SET   0x016E
+#define MT6323_INT_MISC_CON_CLR   0x0170
+#define MT6323_INT_STATUS0        0x0172
+#define MT6323_INT_STATUS1        0x0174
+#define MT6323_OC_GEAR_0          0x0176
+#define MT6323_OC_GEAR_1          0x0178
+#define MT6323_OC_GEAR_2          0x017A
+#define MT6323_OC_CTL_VPROC       0x017C
+#define MT6323_OC_CTL_VSYS        0x017E
+#define MT6323_OC_CTL_VPA         0x0180
+#define MT6323_FQMTR_CON0         0x0182
+#define MT6323_FQMTR_CON1         0x0184
+#define MT6323_FQMTR_CON2         0x0186
+#define MT6323_RG_SPI_CON         0x0188
+#define MT6323_DEW_DIO_EN         0x018A
+#define MT6323_DEW_READ_TEST      0x018C
+#define MT6323_DEW_WRITE_TEST     0x018E
+#define MT6323_DEW_CRC_SWRST      0x0190
+#define MT6323_DEW_CRC_EN         0x0192
+#define MT6323_DEW_CRC_VAL        0x0194
+#define MT6323_DEW_DBG_MON_SEL    0x0196
+#define MT6323_DEW_CIPHER_KEY_SEL 0x0198
+#define MT6323_DEW_CIPHER_IV_SEL  0x019A
+#define MT6323_DEW_CIPHER_EN      0x019C
+#define MT6323_DEW_CIPHER_RDY     0x019E
+#define MT6323_DEW_CIPHER_MODE    0x01A0
+#define MT6323_DEW_CIPHER_SWRST   0x01A2
+#define MT6323_DEW_RDDMY_NO       0x01A4
+#define MT6323_DEW_RDATA_DLY_SEL  0x01A6
+#define MT6323_BUCK_CON0          0x0200
+#define MT6323_BUCK_CON1          0x0202
+#define MT6323_BUCK_CON2          0x0204
+#define MT6323_BUCK_CON3          0x0206
+#define MT6323_BUCK_CON4          0x0208
+#define MT6323_BUCK_CON5          0x020A
+#define MT6323_VPROC_CON0         0x020C
+#define MT6323_VPROC_CON1         0x020E
+#define MT6323_VPROC_CON2         0x0210
+#define MT6323_VPROC_CON3         0x0212
+#define MT6323_VPROC_CON4         0x0214
+#define MT6323_VPROC_CON5         0x0216
+#define MT6323_VPROC_CON7         0x021A
+#define MT6323_VPROC_CON8         0x021C
+#define MT6323_VPROC_CON9         0x021E
+#define MT6323_VPROC_CON10        0x0220
+#define MT6323_VPROC_CON11        0x0222
+#define MT6323_VPROC_CON12        0x0224
+#define MT6323_VPROC_CON13        0x0226
+#define MT6323_VPROC_CON14        0x0228
+#define MT6323_VPROC_CON15        0x022A
+#define MT6323_VPROC_CON18        0x0230
+#define MT6323_VSYS_CON0          0x0232
+#define MT6323_VSYS_CON1          0x0234
+#define MT6323_VSYS_CON2          0x0236
+#define MT6323_VSYS_CON3          0x0238
+#define MT6323_VSYS_CON4          0x023A
+#define MT6323_VSYS_CON5          0x023C
+#define MT6323_VSYS_CON7          0x0240
+#define MT6323_VSYS_CON8          0x0242
+#define MT6323_VSYS_CON9          0x0244
+#define MT6323_VSYS_CON10         0x0246
+#define MT6323_VSYS_CON11         0x0248
+#define MT6323_VSYS_CON12         0x024A
+#define MT6323_VSYS_CON13         0x024C
+#define MT6323_VSYS_CON14         0x024E
+#define MT6323_VSYS_CON15         0x0250
+#define MT6323_VSYS_CON18         0x0256
+#define MT6323_VPA_CON0           0x0300
+#define MT6323_VPA_CON1           0x0302
+#define MT6323_VPA_CON2           0x0304
+#define MT6323_VPA_CON3           0x0306
+#define MT6323_VPA_CON4           0x0308
+#define MT6323_VPA_CON5           0x030A
+#define MT6323_VPA_CON7           0x030E
+#define MT6323_VPA_CON8           0x0310
+#define MT6323_VPA_CON9           0x0312
+#define MT6323_VPA_CON10          0x0314
+#define MT6323_VPA_CON11          0x0316
+#define MT6323_VPA_CON12          0x0318
+#define MT6323_VPA_CON14          0x031C
+#define MT6323_VPA_CON16          0x0320
+#define MT6323_VPA_CON17          0x0322
+#define MT6323_VPA_CON18          0x0324
+#define MT6323_VPA_CON19          0x0326
+#define MT6323_VPA_CON20          0x0328
+#define MT6323_BUCK_K_CON0        0x032A
+#define MT6323_BUCK_K_CON1        0x032C
+#define MT6323_BUCK_K_CON2        0x032E
+#define MT6323_ISINK0_CON0        0x0330
+#define MT6323_ISINK0_CON1        0x0332
+#define MT6323_ISINK0_CON2        0x0334
+#define MT6323_ISINK0_CON3        0x0336
+#define MT6323_ISINK1_CON0        0x0338
+#define MT6323_ISINK1_CON1        0x033A
+#define MT6323_ISINK1_CON2        0x033C
+#define MT6323_ISINK1_CON3        0x033E
+#define MT6323_ISINK2_CON0        0x0340
+#define MT6323_ISINK2_CON1        0x0342
+#define MT6323_ISINK2_CON2        0x0344
+#define MT6323_ISINK2_CON3        0x0346
+#define MT6323_ISINK3_CON0        0x0348
+#define MT6323_ISINK3_CON1        0x034A
+#define MT6323_ISINK3_CON2        0x034C
+#define MT6323_ISINK3_CON3        0x034E
+#define MT6323_ISINK_ANA0         0x0350
+#define MT6323_ISINK_ANA1         0x0352
+#define MT6323_ISINK_PHASE_DLY    0x0354
+#define MT6323_ISINK_EN_CTRL      0x0356
+#define MT6323_ANALDO_CON0        0x0400
+#define MT6323_ANALDO_CON1        0x0402
+#define MT6323_ANALDO_CON2        0x0404
+#define MT6323_ANALDO_CON3        0x0406
+#define MT6323_ANALDO_CON4        0x0408
+#define MT6323_ANALDO_CON5        0x040A
+#define MT6323_ANALDO_CON6        0x040C
+#define MT6323_ANALDO_CON7        0x040E
+#define MT6323_ANALDO_CON8        0x0410
+#define MT6323_ANALDO_CON10       0x0412
+#define MT6323_ANALDO_CON15       0x0414
+#define MT6323_ANALDO_CON16       0x0416
+#define MT6323_ANALDO_CON17       0x0418
+#define MT6323_ANALDO_CON18       0x041A
+#define MT6323_ANALDO_CON19       0x041C
+#define MT6323_ANALDO_CON20       0x041E
+#define MT6323_ANALDO_CON21       0x0420
+#define MT6323_DIGLDO_CON0        0x0500
+#define MT6323_DIGLDO_CON2        0x0502
+#define MT6323_DIGLDO_CON3        0x0504
+#define MT6323_DIGLDO_CON5        0x0506
+#define MT6323_DIGLDO_CON6        0x0508
+#define MT6323_DIGLDO_CON7        0x050A
+#define MT6323_DIGLDO_CON8        0x050C
+#define MT6323_DIGLDO_CON9        0x050E
+#define MT6323_DIGLDO_CON10       0x0510
+#define MT6323_DIGLDO_CON11       0x0512
+#define MT6323_DIGLDO_CON12       0x0514
+#define MT6323_DIGLDO_CON13       0x0516
+#define MT6323_DIGLDO_CON14       0x0518
+#define MT6323_DIGLDO_CON15       0x051A
+#define MT6323_DIGLDO_CON16       0x051C
+#define MT6323_DIGLDO_CON17       0x051E
+#define MT6323_DIGLDO_CON18       0x0520
+#define MT6323_DIGLDO_CON19       0x0522
+#define MT6323_DIGLDO_CON20       0x0524
+#define MT6323_DIGLDO_CON21       0x0526
+#define MT6323_DIGLDO_CON23       0x0528
+#define MT6323_DIGLDO_CON24       0x052A
+#define MT6323_DIGLDO_CON26       0x052C
+#define MT6323_DIGLDO_CON27       0x052E
+#define MT6323_DIGLDO_CON28       0x0530
+#define MT6323_DIGLDO_CON29       0x0532
+#define MT6323_DIGLDO_CON30       0x0534
+#define MT6323_DIGLDO_CON31       0x0536
+#define MT6323_DIGLDO_CON32       0x0538
+#define MT6323_DIGLDO_CON33       0x053A
+#define MT6323_DIGLDO_CON34       0x053C
+#define MT6323_DIGLDO_CON35       0x053E
+#define MT6323_DIGLDO_CON36       0x0540
+#define MT6323_DIGLDO_CON39       0x0542
+#define MT6323_DIGLDO_CON40       0x0544
+#define MT6323_DIGLDO_CON41       0x0546
+#define MT6323_DIGLDO_CON42       0x0548
+#define MT6323_DIGLDO_CON43       0x054A
+#define MT6323_DIGLDO_CON44       0x054C
+#define MT6323_DIGLDO_CON45       0x054E
+#define MT6323_DIGLDO_CON46       0x0550
+#define MT6323_DIGLDO_CON47       0x0552
+#define MT6323_DIGLDO_CON48       0x0554
+#define MT6323_DIGLDO_CON49       0x0556
+#define MT6323_DIGLDO_CON50       0x0558
+#define MT6323_DIGLDO_CON51       0x055A
+#define MT6323_DIGLDO_CON52       0x055C
+#define MT6323_DIGLDO_CON53       0x055E
+#define MT6323_DIGLDO_CON54       0x0560
+#define MT6323_EFUSE_CON0         0x0600
+#define MT6323_EFUSE_CON1         0x0602
+#define MT6323_EFUSE_CON2         0x0604
+#define MT6323_EFUSE_CON3         0x0606
+#define MT6323_EFUSE_CON4         0x0608
+#define MT6323_EFUSE_CON5         0x060A
+#define MT6323_EFUSE_CON6         0x060C
+#define MT6323_EFUSE_VAL_0_15     0x060E
+#define MT6323_EFUSE_VAL_16_31    0x0610
+#define MT6323_EFUSE_VAL_32_47    0x0612
+#define MT6323_EFUSE_VAL_48_63    0x0614
+#define MT6323_EFUSE_VAL_64_79    0x0616
+#define MT6323_EFUSE_VAL_80_95    0x0618
+#define MT6323_EFUSE_VAL_96_111   0x061A
+#define MT6323_EFUSE_VAL_112_127  0x061C
+#define MT6323_EFUSE_VAL_128_143  0x061E
+#define MT6323_EFUSE_VAL_144_159  0x0620
+#define MT6323_EFUSE_VAL_160_175  0x0622
+#define MT6323_EFUSE_VAL_176_191  0x0624
+#define MT6323_EFUSE_DOUT_0_15    0x0626
+#define MT6323_EFUSE_DOUT_16_31   0x0628
+#define MT6323_EFUSE_DOUT_32_47   0x062A
+#define MT6323_EFUSE_DOUT_48_63   0x062C
+#define MT6323_EFUSE_DOUT_64_79   0x062E
+#define MT6323_EFUSE_DOUT_80_95   0x0630
+#define MT6323_EFUSE_DOUT_96_111  0x0632
+#define MT6323_EFUSE_DOUT_112_127 0x0634
+#define MT6323_EFUSE_DOUT_128_143 0x0636
+#define MT6323_EFUSE_DOUT_144_159 0x0638
+#define MT6323_EFUSE_DOUT_160_175 0x063A
+#define MT6323_EFUSE_DOUT_176_191 0x063C
+#define MT6323_EFUSE_CON7         0x063E
+#define MT6323_EFUSE_CON8         0x0640
+#define MT6323_EFUSE_CON9         0x0642
+#define MT6323_RTC_MIX_CON0       0x0644
+#define MT6323_RTC_MIX_CON1       0x0646
+#define MT6323_AUDTOP_CON0        0x0700
+#define MT6323_AUDTOP_CON1        0x0702
+#define MT6323_AUDTOP_CON2        0x0704
+#define MT6323_AUDTOP_CON3        0x0706
+#define MT6323_AUDTOP_CON4        0x0708
+#define MT6323_AUDTOP_CON5        0x070A
+#define MT6323_AUDTOP_CON6        0x070C
+#define MT6323_AUDTOP_CON7        0x070E
+#define MT6323_AUDTOP_CON8        0x0710
+#define MT6323_AUDTOP_CON9        0x0712
+#define MT6323_AUXADC_ADC0        0x0714
+#define MT6323_AUXADC_ADC1        0x0716
+#define MT6323_AUXADC_ADC2        0x0718
+#define MT6323_AUXADC_ADC3        0x071A
+#define MT6323_AUXADC_ADC4        0x071C
+#define MT6323_AUXADC_ADC5        0x071E
+#define MT6323_AUXADC_ADC6        0x0720
+#define MT6323_AUXADC_ADC7        0x0722
+#define MT6323_AUXADC_ADC8        0x0724
+#define MT6323_AUXADC_ADC9        0x0726
+#define MT6323_AUXADC_ADC10       0x0728
+#define MT6323_AUXADC_ADC11       0x072A
+#define MT6323_AUXADC_ADC12       0x072C
+#define MT6323_AUXADC_ADC13       0x072E
+#define MT6323_AUXADC_ADC14       0x0730
+#define MT6323_AUXADC_ADC15       0x0732
+#define MT6323_AUXADC_ADC16       0x0734
+#define MT6323_AUXADC_ADC17       0x0736
+#define MT6323_AUXADC_ADC18       0x0738
+#define MT6323_AUXADC_ADC19       0x073A
+#define MT6323_AUXADC_ADC20       0x073C
+#define MT6323_AUXADC_RSV1        0x073E
+#define MT6323_AUXADC_RSV2        0x0740
+#define MT6323_AUXADC_CON0        0x0742
+#define MT6323_AUXADC_CON1        0x0744
+#define MT6323_AUXADC_CON2        0x0746
+#define MT6323_AUXADC_CON3        0x0748
+#define MT6323_AUXADC_CON4        0x074A
+#define MT6323_AUXADC_CON5        0x074C
+#define MT6323_AUXADC_CON6        0x074E
+#define MT6323_AUXADC_CON7        0x0750
+#define MT6323_AUXADC_CON8        0x0752
+#define MT6323_AUXADC_CON9        0x0754
+#define MT6323_AUXADC_CON10       0x0756
+#define MT6323_AUXADC_CON11       0x0758
+#define MT6323_AUXADC_CON12       0x075A
+#define MT6323_AUXADC_CON13       0x075C
+#define MT6323_AUXADC_CON14       0x075E
+#define MT6323_AUXADC_CON15       0x0760
+#define MT6323_AUXADC_CON16       0x0762
+#define MT6323_AUXADC_CON17       0x0764
+#define MT6323_AUXADC_CON18       0x0766
+#define MT6323_AUXADC_CON19       0x0768
+#define MT6323_AUXADC_CON20       0x076A
+#define MT6323_AUXADC_CON21       0x076C
+#define MT6323_AUXADC_CON22       0x076E
+#define MT6323_AUXADC_CON23       0x0770
+#define MT6323_AUXADC_CON24       0x0772
+#define MT6323_AUXADC_CON25       0x0774
+#define MT6323_AUXADC_CON26       0x0776
+#define MT6323_AUXADC_CON27       0x0778
+#define MT6323_ACCDET_CON0        0x077A
+#define MT6323_ACCDET_CON1        0x077C
+#define MT6323_ACCDET_CON2        0x077E
+#define MT6323_ACCDET_CON3        0x0780
+#define MT6323_ACCDET_CON4        0x0782
+#define MT6323_ACCDET_CON5        0x0784
+#define MT6323_ACCDET_CON6        0x0786
+#define MT6323_ACCDET_CON7        0x0788
+#define MT6323_ACCDET_CON8        0x078A
+#define MT6323_ACCDET_CON9        0x078C
+#define MT6323_ACCDET_CON10       0x078E
+#define MT6323_ACCDET_CON11       0x0790
+#define MT6323_ACCDET_CON12       0x0792
+#define MT6323_ACCDET_CON13       0x0794
+#define MT6323_ACCDET_CON14       0x0796
+#define MT6323_ACCDET_CON15       0x0798
+#define MT6323_ACCDET_CON16       0x079A
+
+#endif /* __MFD_MT6323_REGISTERS_H__ */
-- 
cgit v1.2.3


From a862dc3ea793256a1364991f52e68198a2c5f27d Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <mail@maciej.szmigiero.name>
Date: Sun, 31 Jan 2016 22:58:41 +0100
Subject: mfd: rc5t583: Set regmap config reg counts properly

Regmap config max_register field should contain number of
device last register, however num_reg_defaults_raw field
should be set to register count instead
(usually one register more than max_register).

rc5t583 driver had both of these fields set to the same value,
fix this by introducing separate defines for max register
number and total count of registers.

Signed-off-by: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/rc5t583.c       | 4 ++--
 include/linux/mfd/rc5t583.h | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rc5t583.c b/drivers/mfd/rc5t583.c
index e10f02f5d551..fc2b2d93f354 100644
--- a/drivers/mfd/rc5t583.c
+++ b/drivers/mfd/rc5t583.c
@@ -241,8 +241,8 @@ static const struct regmap_config rc5t583_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
 	.volatile_reg = volatile_reg,
-	.max_register = RC5T583_MAX_REGS,
-	.num_reg_defaults_raw = RC5T583_MAX_REGS,
+	.max_register = RC5T583_MAX_REG,
+	.num_reg_defaults_raw = RC5T583_NUM_REGS,
 	.cache_type = REGCACHE_RBTREE,
 };
 
diff --git a/include/linux/mfd/rc5t583.h b/include/linux/mfd/rc5t583.h
index fd413ccab915..8d0a392e0a7f 100644
--- a/include/linux/mfd/rc5t583.h
+++ b/include/linux/mfd/rc5t583.h
@@ -28,8 +28,6 @@
 #include <linux/types.h>
 #include <linux/regmap.h>
 
-#define RC5T583_MAX_REGS		0xF8
-
 /* Maximum number of main interrupts */
 #define MAX_MAIN_INTERRUPT		5
 #define RC5T583_MAX_GPEDGE_REG		2
@@ -169,6 +167,9 @@
 #define RC5T583_RTC_AY_MONTH 0xF3
 #define RC5T583_RTC_AY_YEAR	0xF4
 
+#define RC5T583_MAX_REG		0xF7
+#define RC5T583_NUM_REGS	(RC5T583_MAX_REG + 1)
+
 /* RICOH_RC5T583 IRQ definitions */
 enum {
 	RC5T583_IRQ_ONKEY,
-- 
cgit v1.2.3


From e9b7ba7954fa8df6e021ee4bef084ed10eba2c2b Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <mail@maciej.szmigiero.name>
Date: Sun, 31 Jan 2016 22:56:59 +0100
Subject: mfd: as3711: Set regmap config reg counts properly

Regmap config max_register field should contain number of
device last register, however num_reg_defaults_raw field
should be set to register count instead
(usually one register more than max_register).

as3711 driver had both of these fields set to the same value,
fix this by introducing separate defines for max register
number and total count of registers.

Signed-off-by: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/as3711.c       | 4 ++--
 include/linux/mfd/as3711.h | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/as3711.c b/drivers/mfd/as3711.c
index 94d67a6e1eb7..09e1483b99bc 100644
--- a/drivers/mfd/as3711.c
+++ b/drivers/mfd/as3711.c
@@ -108,8 +108,8 @@ static const struct regmap_config as3711_regmap_config = {
 	.volatile_reg = as3711_volatile_reg,
 	.readable_reg = as3711_readable_reg,
 	.precious_reg = as3711_precious_reg,
-	.max_register = AS3711_MAX_REGS,
-	.num_reg_defaults_raw = AS3711_MAX_REGS,
+	.max_register = AS3711_MAX_REG,
+	.num_reg_defaults_raw = AS3711_NUM_REGS,
 	.cache_type = REGCACHE_RBTREE,
 };
 
diff --git a/include/linux/mfd/as3711.h b/include/linux/mfd/as3711.h
index 38452ce1e892..34cc85864be5 100644
--- a/include/linux/mfd/as3711.h
+++ b/include/linux/mfd/as3711.h
@@ -51,7 +51,8 @@
 #define AS3711_ASIC_ID_1		0x90
 #define AS3711_ASIC_ID_2		0x91
 
-#define AS3711_MAX_REGS			0x92
+#define AS3711_MAX_REG		AS3711_ASIC_ID_2
+#define AS3711_NUM_REGS		(AS3711_MAX_REG + 1)
 
 /* Regulators */
 enum {
-- 
cgit v1.2.3


From 8c037e0c8eaa7dcb3a190b9be841ee81edb865ff Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Fri, 29 Jan 2016 09:42:19 +0100
Subject: mfd: syscon: Return ENOTSUPP instead of ENOSYS when disabled

When CONFIG_MFD_SYSCON is disabled, have the function stubs return
ENOTSUPP to indicate the syscon functionality is not available.
There are currently no callers that depend on the ENOSYS return value.

This patchfixes a checkpatch warning:
    WARNING: ENOSYS means 'invalid syscall nr' and nothing else

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/syscon.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h
index 75e543b78f53..1088149be0c9 100644
--- a/include/linux/mfd/syscon.h
+++ b/include/linux/mfd/syscon.h
@@ -29,24 +29,24 @@ extern struct regmap *syscon_regmap_lookup_by_phandle(
 #else
 static inline struct regmap *syscon_node_to_regmap(struct device_node *np)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-ENOTSUPP);
 }
 
 static inline struct regmap *syscon_regmap_lookup_by_compatible(const char *s)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-ENOTSUPP);
 }
 
 static inline struct regmap *syscon_regmap_lookup_by_pdevname(const char *s)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-ENOTSUPP);
 }
 
 static inline struct regmap *syscon_regmap_lookup_by_phandle(
 					struct device_node *np,
 					const char *property)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-ENOTSUPP);
 }
 #endif
 
-- 
cgit v1.2.3


From 5c1488906f852473b4d7837c3e33e411e0eb6910 Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <mail@maciej.szmigiero.name>
Date: Sun, 31 Jan 2016 23:00:06 +0100
Subject: mfd: tps65090: Set regmap config reg counts properly

Regmap config max_register field should contain number of
device last register, however num_reg_defaults_raw field
should be set to register count instead
(usually one register more than max_register).

tps65090 driver had both of these fields set to the same value,
fix this by introducing separate defines for max register
number and total count of registers.

Signed-off-by: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/tps65090.c       | 5 ++---
 include/linux/mfd/tps65090.h | 5 +++++
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c
index f88085ad9772..d7ec318c40c3 100644
--- a/drivers/mfd/tps65090.c
+++ b/drivers/mfd/tps65090.c
@@ -30,7 +30,6 @@
 #include <linux/err.h>
 
 #define NUM_INT_REG 2
-#define TOTAL_NUM_REG 0x18
 
 #define TPS65090_INT1_MASK_VAC_STATUS_CHANGE		1
 #define TPS65090_INT1_MASK_VSYS_STATUS_CHANGE		2
@@ -161,8 +160,8 @@ static bool is_volatile_reg(struct device *dev, unsigned int reg)
 static const struct regmap_config tps65090_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
-	.max_register = TOTAL_NUM_REG,
-	.num_reg_defaults_raw = TOTAL_NUM_REG,
+	.max_register = TPS65090_MAX_REG,
+	.num_reg_defaults_raw = TPS65090_NUM_REGS,
 	.cache_type = REGCACHE_RBTREE,
 	.volatile_reg = is_volatile_reg,
 };
diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index 0bf2708df150..67d144b3b8f9 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h
@@ -77,6 +77,11 @@ enum {
 #define TPS65090_REG_CG_CTRL5	0x09
 #define TPS65090_REG_CG_STATUS1	0x0a
 #define TPS65090_REG_CG_STATUS2	0x0b
+#define TPS65090_REG_AD_OUT1	0x17
+#define TPS65090_REG_AD_OUT2	0x18
+
+#define TPS65090_MAX_REG	TPS65090_REG_AD_OUT2
+#define TPS65090_NUM_REGS	(TPS65090_MAX_REG + 1)
 
 struct tps65090 {
 	struct device		*dev;
-- 
cgit v1.2.3


From 2609e4daaaff930548e35793d46ae079d39fb722 Mon Sep 17 00:00:00 2001
From: Christoph Fritz <chf.fritz@googlemail.com>
Date: Thu, 25 Feb 2016 15:47:48 +0100
Subject: mfd: imx6sx: Add PCIe register definitions for iomuxc gpr

This patch adds macros to define masks and bits for imx6sx
PCIe registers. This is based on a patch by Richard Zhu.

Signed-off-by: Christoph Fritz <chf.fritz@googlemail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index 558a485d03ab..238c8db953eb 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -422,6 +422,7 @@
 #define IMX6SX_GPR5_VADC_TO_CSI_CAPTURE_EN_MASK		(0x1 << 26)
 #define IMX6SX_GPR5_VADC_TO_CSI_CAPTURE_EN_ENABLE	(0x1 << 26)
 #define IMX6SX_GPR5_VADC_TO_CSI_CAPTURE_EN_DISABLE	(0x0 << 26)
+#define IMX6SX_GPR5_PCIE_BTNRST_RESET			BIT(19)
 #define IMX6SX_GPR5_CSI1_MUX_CTRL_MASK			(0x3 << 4)
 #define IMX6SX_GPR5_CSI1_MUX_CTRL_EXT_PIN		(0x0 << 4)
 #define IMX6SX_GPR5_CSI1_MUX_CTRL_CVD			(0x1 << 4)
@@ -435,6 +436,10 @@
 #define IMX6SX_GPR5_DISP_MUX_DCIC1_LVDS			(0x1 << 1)
 #define IMX6SX_GPR5_DISP_MUX_DCIC1_MASK			(0x1 << 1)
 
+#define IMX6SX_GPR12_PCIE_TEST_POWERDOWN		BIT(30)
+#define IMX6SX_GPR12_PCIE_RX_EQ_MASK			(0x7 << 0)
+#define IMX6SX_GPR12_PCIE_RX_EQ_2			(0x2 << 0)
+
 /* For imx6ul iomux gpr register field define */
 #define IMX6UL_GPR1_ENET1_CLK_DIR		(0x1 << 17)
 #define IMX6UL_GPR1_ENET2_CLK_DIR		(0x1 << 18)
-- 
cgit v1.2.3


From 664a39236e718f9f03fa73fc01006da9ced04efc Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 28 Feb 2016 13:12:15 -0800
Subject: watchdog: Introduce hardware maximum heartbeat in watchdog core

Introduce an optional hardware maximum heartbeat in the watchdog core.
The hardware maximum heartbeat can be lower than the maximum timeout.

Drivers can set the maximum hardware heartbeat value in the watchdog data
structure. If the configured timeout exceeds the maximum hardware heartbeat,
the watchdog core enables a timer function to assist sending keepalive
requests to the watchdog driver.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 Documentation/watchdog/watchdog-kernel-api.txt |  19 +++-
 drivers/watchdog/watchdog_dev.c                | 129 +++++++++++++++++++++++--
 include/linux/watchdog.h                       |  28 ++++--
 3 files changed, 158 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt
index dd8f912c0576..15a02595ade1 100644
--- a/Documentation/watchdog/watchdog-kernel-api.txt
+++ b/Documentation/watchdog/watchdog-kernel-api.txt
@@ -52,6 +52,7 @@ struct watchdog_device {
 	unsigned int timeout;
 	unsigned int min_timeout;
 	unsigned int max_timeout;
+	unsigned int max_hw_heartbeat_ms;
 	struct notifier_block reboot_nb;
 	struct notifier_block restart_nb;
 	void *driver_data;
@@ -73,8 +74,18 @@ It contains following fields:
   additional information about the watchdog timer itself. (Like it's unique name)
 * ops: a pointer to the list of watchdog operations that the watchdog supports.
 * timeout: the watchdog timer's timeout value (in seconds).
+  This is the time after which the system will reboot if user space does
+  not send a heartbeat request if WDOG_ACTIVE is set.
 * min_timeout: the watchdog timer's minimum timeout value (in seconds).
-* max_timeout: the watchdog timer's maximum timeout value (in seconds).
+  If set, the minimum configurable value for 'timeout'.
+* max_timeout: the watchdog timer's maximum timeout value (in seconds),
+  as seen from userspace. If set, the maximum configurable value for
+  'timeout'. Not used if max_hw_heartbeat_ms is non-zero.
+* max_hw_heartbeat_ms: Maximum hardware heartbeat, in milli-seconds.
+  If set, the infrastructure will send heartbeats to the watchdog driver
+  if 'timeout' is larger than max_hw_heartbeat_ms, unless WDOG_ACTIVE
+  is set and userspace failed to send a heartbeat for at least 'timeout'
+  seconds.
 * reboot_nb: notifier block that is registered for reboot notifications, for
   internal use only. If the driver calls watchdog_stop_on_reboot, watchdog core
   will stop the watchdog on such notifications.
@@ -153,7 +164,11 @@ they are supported. These optional routines/operations are:
   and -EIO for "could not write value to the watchdog". On success this
   routine should set the timeout value of the watchdog_device to the
   achieved timeout value (which may be different from the requested one
-  because the watchdog does not necessarily has a 1 second resolution).
+  because the watchdog does not necessarily have a 1 second resolution).
+  Drivers implementing max_hw_heartbeat_ms set the hardware watchdog heartbeat
+  to the minimum of timeout and max_hw_heartbeat_ms. Those drivers set the
+  timeout value of the watchdog_device either to the requested timeout value
+  (if it is larger than max_hw_heartbeat_ms), or to the achieved timeout value.
   (Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the
   watchdog's info structure).
   If the watchdog driver does not have to perform any action but setting the
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index b5e700186ae0..e668a9e8b648 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -36,6 +36,7 @@
 #include <linux/errno.h>	/* For the -ENODEV/... values */
 #include <linux/fs.h>		/* For file operations */
 #include <linux/init.h>		/* For __init/__exit/... */
+#include <linux/jiffies.h>	/* For timeout functions */
 #include <linux/kernel.h>	/* For printk/panic/... */
 #include <linux/kref.h>		/* For data references */
 #include <linux/miscdevice.h>	/* For handling misc devices */
@@ -44,6 +45,7 @@
 #include <linux/slab.h>		/* For memory functions */
 #include <linux/types.h>	/* For standard types (like size_t) */
 #include <linux/watchdog.h>	/* For watchdog specific items */
+#include <linux/workqueue.h>	/* For workqueue */
 #include <linux/uaccess.h>	/* For copy_to_user/put_user/... */
 
 #include "watchdog_core.h"
@@ -61,6 +63,8 @@ struct watchdog_core_data {
 	struct cdev cdev;
 	struct watchdog_device *wdd;
 	struct mutex lock;
+	unsigned long last_keepalive;
+	struct delayed_work work;
 	unsigned long status;		/* Internal status bits */
 #define _WDOG_DEV_OPEN		0	/* Opened ? */
 #define _WDOG_ALLOW_RELEASE	1	/* Did we receive the magic char ? */
@@ -71,6 +75,76 @@ static dev_t watchdog_devt;
 /* Reference to watchdog device behind /dev/watchdog */
 static struct watchdog_core_data *old_wd_data;
 
+static struct workqueue_struct *watchdog_wq;
+
+static inline bool watchdog_need_worker(struct watchdog_device *wdd)
+{
+	/* All variables in milli-seconds */
+	unsigned int hm = wdd->max_hw_heartbeat_ms;
+	unsigned int t = wdd->timeout * 1000;
+
+	/*
+	 * A worker to generate heartbeat requests is needed if all of the
+	 * following conditions are true.
+	 * - Userspace activated the watchdog.
+	 * - The driver provided a value for the maximum hardware timeout, and
+	 *   thus is aware that the framework supports generating heartbeat
+	 *   requests.
+	 * - Userspace requests a longer timeout than the hardware can handle.
+	 */
+	return watchdog_active(wdd) && hm && t > hm;
+}
+
+static long watchdog_next_keepalive(struct watchdog_device *wdd)
+{
+	struct watchdog_core_data *wd_data = wdd->wd_data;
+	unsigned int timeout_ms = wdd->timeout * 1000;
+	unsigned long keepalive_interval;
+	unsigned long last_heartbeat;
+	unsigned long virt_timeout;
+	unsigned int hw_heartbeat_ms;
+
+	virt_timeout = wd_data->last_keepalive + msecs_to_jiffies(timeout_ms);
+	hw_heartbeat_ms = min(timeout_ms, wdd->max_hw_heartbeat_ms);
+	keepalive_interval = msecs_to_jiffies(hw_heartbeat_ms / 2);
+
+	/*
+	 * To ensure that the watchdog times out wdd->timeout seconds
+	 * after the most recent ping from userspace, the last
+	 * worker ping has to come in hw_heartbeat_ms before this timeout.
+	 */
+	last_heartbeat = virt_timeout - msecs_to_jiffies(hw_heartbeat_ms);
+	return min_t(long, last_heartbeat - jiffies, keepalive_interval);
+}
+
+static inline void watchdog_update_worker(struct watchdog_device *wdd)
+{
+	struct watchdog_core_data *wd_data = wdd->wd_data;
+
+	if (watchdog_need_worker(wdd)) {
+		long t = watchdog_next_keepalive(wdd);
+
+		if (t > 0)
+			mod_delayed_work(watchdog_wq, &wd_data->work, t);
+	} else {
+		cancel_delayed_work(&wd_data->work);
+	}
+}
+
+static int __watchdog_ping(struct watchdog_device *wdd)
+{
+	int err;
+
+	if (wdd->ops->ping)
+		err = wdd->ops->ping(wdd);  /* ping the watchdog */
+	else
+		err = wdd->ops->start(wdd); /* restart watchdog */
+
+	watchdog_update_worker(wdd);
+
+	return err;
+}
+
 /*
  *	watchdog_ping: ping the watchdog.
  *	@wdd: the watchdog device to ping
@@ -85,17 +159,28 @@ static struct watchdog_core_data *old_wd_data;
 
 static int watchdog_ping(struct watchdog_device *wdd)
 {
-	int err;
+	struct watchdog_core_data *wd_data = wdd->wd_data;
 
 	if (!watchdog_active(wdd))
 		return 0;
 
-	if (wdd->ops->ping)
-		err = wdd->ops->ping(wdd);	/* ping the watchdog */
-	else
-		err = wdd->ops->start(wdd);	/* restart watchdog */
+	wd_data->last_keepalive = jiffies;
+	return __watchdog_ping(wdd);
+}
 
-	return err;
+static void watchdog_ping_work(struct work_struct *work)
+{
+	struct watchdog_core_data *wd_data;
+	struct watchdog_device *wdd;
+
+	wd_data = container_of(to_delayed_work(work), struct watchdog_core_data,
+			       work);
+
+	mutex_lock(&wd_data->lock);
+	wdd = wd_data->wdd;
+	if (wdd && watchdog_active(wdd))
+		__watchdog_ping(wdd);
+	mutex_unlock(&wd_data->lock);
 }
 
 /*
@@ -111,14 +196,20 @@ static int watchdog_ping(struct watchdog_device *wdd)
 
 static int watchdog_start(struct watchdog_device *wdd)
 {
+	struct watchdog_core_data *wd_data = wdd->wd_data;
+	unsigned long started_at;
 	int err;
 
 	if (watchdog_active(wdd))
 		return 0;
 
+	started_at = jiffies;
 	err = wdd->ops->start(wdd);
-	if (err == 0)
+	if (err == 0) {
 		set_bit(WDOG_ACTIVE, &wdd->status);
+		wd_data->last_keepalive = started_at;
+		watchdog_update_worker(wdd);
+	}
 
 	return err;
 }
@@ -137,6 +228,7 @@ static int watchdog_start(struct watchdog_device *wdd)
 
 static int watchdog_stop(struct watchdog_device *wdd)
 {
+	struct watchdog_core_data *wd_data = wdd->wd_data;
 	int err;
 
 	if (!watchdog_active(wdd))
@@ -149,8 +241,10 @@ static int watchdog_stop(struct watchdog_device *wdd)
 	}
 
 	err = wdd->ops->stop(wdd);
-	if (err == 0)
+	if (err == 0) {
 		clear_bit(WDOG_ACTIVE, &wdd->status);
+		cancel_delayed_work(&wd_data->work);
+	}
 
 	return err;
 }
@@ -196,6 +290,8 @@ static int watchdog_set_timeout(struct watchdog_device *wdd,
 	else
 		wdd->timeout = timeout;
 
+	watchdog_update_worker(wdd);
+
 	return err;
 }
 
@@ -616,6 +712,8 @@ static int watchdog_release(struct inode *inode, struct file *file)
 		watchdog_ping(wdd);
 	}
 
+	cancel_delayed_work_sync(&wd_data->work);
+
 	/* make sure that /dev/watchdog can be re-opened */
 	clear_bit(_WDOG_DEV_OPEN, &wd_data->status);
 
@@ -665,6 +763,11 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno)
 	wd_data->wdd = wdd;
 	wdd->wd_data = wd_data;
 
+	if (!watchdog_wq)
+		return -ENODEV;
+
+	INIT_DELAYED_WORK(&wd_data->work, watchdog_ping_work);
+
 	if (wdd->id == 0) {
 		old_wd_data = wd_data;
 		watchdog_miscdev.parent = wdd->parent;
@@ -722,6 +825,8 @@ static void watchdog_cdev_unregister(struct watchdog_device *wdd)
 	wdd->wd_data = NULL;
 	mutex_unlock(&wd_data->lock);
 
+	cancel_delayed_work_sync(&wd_data->work);
+
 	kref_put(&wd_data->kref, watchdog_core_data_release);
 }
 
@@ -787,6 +892,13 @@ int __init watchdog_dev_init(void)
 {
 	int err;
 
+	watchdog_wq = alloc_workqueue("watchdogd",
+				      WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
+	if (!watchdog_wq) {
+		pr_err("Failed to create watchdog workqueue\n");
+		return -ENOMEM;
+	}
+
 	err = class_register(&watchdog_class);
 	if (err < 0) {
 		pr_err("couldn't register class\n");
@@ -813,4 +925,5 @@ void __exit watchdog_dev_exit(void)
 {
 	unregister_chrdev_region(watchdog_devt, MAX_DOGS);
 	class_unregister(&watchdog_class);
+	destroy_workqueue(watchdog_wq);
 }
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 0b565f2ad242..8e82daecb7d3 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -10,8 +10,9 @@
 
 
 #include <linux/bitops.h>
-#include <linux/device.h>
 #include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
 #include <linux/notifier.h>
 #include <uapi/linux/watchdog.h>
 
@@ -61,14 +62,19 @@ struct watchdog_ops {
  * @bootstatus:	Status of the watchdog device at boot.
  * @timeout:	The watchdog devices timeout value (in seconds).
  * @min_timeout:The watchdog devices minimum timeout value (in seconds).
- * @max_timeout:The watchdog devices maximum timeout value (in seconds).
+ * @max_timeout:The watchdog devices maximum timeout value (in seconds)
+ *		as configurable from user space. Only relevant if
+ *		max_hw_heartbeat_ms is not provided.
+ * @max_hw_heartbeat_ms:
+ *		Hardware limit for maximum timeout, in milli-seconds.
+ *		Replaces max_timeout if specified.
  * @reboot_nb:	The notifier block to stop watchdog on reboot.
  * @restart_nb:	The notifier block to register a restart function.
  * @driver_data:Pointer to the drivers private data.
  * @wd_data:	Pointer to watchdog core internal data.
  * @status:	Field that contains the devices internal status bits.
- * @deferred: entry in wtd_deferred_reg_list which is used to
- *			   register early initialized watchdogs.
+ * @deferred:	Entry in wtd_deferred_reg_list which is used to
+ *		register early initialized watchdogs.
  *
  * The watchdog_device structure contains all information about a
  * watchdog timer device.
@@ -89,6 +95,7 @@ struct watchdog_device {
 	unsigned int timeout;
 	unsigned int min_timeout;
 	unsigned int max_timeout;
+	unsigned int max_hw_heartbeat_ms;
 	struct notifier_block reboot_nb;
 	struct notifier_block restart_nb;
 	void *driver_data;
@@ -128,13 +135,18 @@ static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigne
 {
 	/*
 	 * The timeout is invalid if
+	 * - the requested value is larger than UINT_MAX / 1000
+	 *   (since internal calculations are done in milli-seconds),
+	 * or
 	 * - the requested value is smaller than the configured minimum timeout,
 	 * or
-	 * - a maximum timeout is configured, and the requested value is larger
-	 *   than the maximum timeout.
+	 * - a maximum hardware timeout is not configured, a maximum timeout
+	 *   is configured, and the requested value is larger than the
+	 *   configured maximum timeout.
 	 */
-	return t < wdd->min_timeout ||
-		(wdd->max_timeout && t > wdd->max_timeout);
+	return t > UINT_MAX / 1000 || t < wdd->min_timeout ||
+		(!wdd->max_hw_heartbeat_ms && wdd->max_timeout &&
+		 t > wdd->max_timeout);
 }
 
 /* Use the following functions to manipulate watchdog driver specific data */
-- 
cgit v1.2.3


From ee142889e32f564f9b5e57b68b06693ec5473074 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 28 Feb 2016 13:12:16 -0800
Subject: watchdog: Introduce WDOG_HW_RUNNING flag

The WDOG_HW_RUNNING flag is expected to be set by watchdog drivers if
the hardware watchdog is running. If the flag is set, the watchdog
subsystem will ping the watchdog even if the watchdog device is closed.

The watchdog driver stop function is now optional and may be omitted
if the watchdog can not be stopped. If stopping the watchdog is not
possible but the driver implements a stop function, it is responsible
to set the WDOG_HW_RUNNING flag in its stop function.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 Documentation/watchdog/watchdog-kernel-api.txt | 22 +++++++----
 drivers/watchdog/watchdog_dev.c                | 52 +++++++++++++++++++-------
 include/linux/watchdog.h                       | 10 +++++
 3 files changed, 64 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt
index 15a02595ade1..954134a5c4a4 100644
--- a/Documentation/watchdog/watchdog-kernel-api.txt
+++ b/Documentation/watchdog/watchdog-kernel-api.txt
@@ -137,10 +137,10 @@ are:
 * stop: with this routine the watchdog timer device is being stopped.
   The routine needs a pointer to the watchdog timer device structure as a
   parameter. It returns zero on success or a negative errno code for failure.
-  Some watchdog timer hardware can only be started and not be stopped. The
-  driver supporting this hardware needs to make sure that a start and stop
-  routine is being provided. This can be done by using a timer in the driver
-  that regularly sends a keepalive ping to the watchdog timer hardware.
+  Some watchdog timer hardware can only be started and not be stopped.
+  If a watchdog can not be stopped, the watchdog driver must set the
+  WDOG_HW_RUNNING flag in its stop function to inform the watchdog core that
+  the watchdog is still running.
 
 Not all watchdog timer hardware supports the same functionality. That's why
 all other routines/operations are optional. They only need to be provided if
@@ -189,11 +189,19 @@ The 'ref' and 'unref' operations are no longer used and deprecated.
 The status bits should (preferably) be set with the set_bit and clear_bit alike
 bit-operations. The status bits that are defined are:
 * WDOG_ACTIVE: this status bit indicates whether or not a watchdog timer device
-  is active or not. When the watchdog is active after booting, then you should
-  set this status bit (Note: when you register the watchdog timer device with
-  this bit set, then opening /dev/watchdog will skip the start operation)
+  is active or not from user perspective. User space is expected to send
+  heartbeat requests to the driver while this flag is set.
 * WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog.
   If this bit is set then the watchdog timer will not be able to stop.
+* WDOG_HW_RUNNING: Set by the watchdog driver if the hardware watchdog is
+  running. The bit must be set if the watchdog timer hardware can not be
+  stopped. The bit may also be set if the watchdog timer is running after
+  booting, before the watchdog device is opened. If set, the watchdog
+  infrastructure will send keepalives to the watchdog hardware while
+  WDOG_ACTIVE is not set.
+  Note: when you register the watchdog timer device with this bit set,
+  then opening /dev/watchdog will skip the start operation but send a keepalive
+  request instead.
 
   To set the WDOG_NO_WAY_OUT status bit (before registering your watchdog
   timer device) you can either:
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index e668a9e8b648..5d3a9fa4856e 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -92,7 +92,8 @@ static inline bool watchdog_need_worker(struct watchdog_device *wdd)
 	 *   requests.
 	 * - Userspace requests a longer timeout than the hardware can handle.
 	 */
-	return watchdog_active(wdd) && hm && t > hm;
+	return hm && ((watchdog_active(wdd) && t > hm) ||
+		      (t && !watchdog_active(wdd) && watchdog_hw_running(wdd)));
 }
 
 static long watchdog_next_keepalive(struct watchdog_device *wdd)
@@ -108,6 +109,9 @@ static long watchdog_next_keepalive(struct watchdog_device *wdd)
 	hw_heartbeat_ms = min(timeout_ms, wdd->max_hw_heartbeat_ms);
 	keepalive_interval = msecs_to_jiffies(hw_heartbeat_ms / 2);
 
+	if (!watchdog_active(wdd))
+		return keepalive_interval;
+
 	/*
 	 * To ensure that the watchdog times out wdd->timeout seconds
 	 * after the most recent ping from userspace, the last
@@ -161,7 +165,7 @@ static int watchdog_ping(struct watchdog_device *wdd)
 {
 	struct watchdog_core_data *wd_data = wdd->wd_data;
 
-	if (!watchdog_active(wdd))
+	if (!watchdog_active(wdd) && !watchdog_hw_running(wdd))
 		return 0;
 
 	wd_data->last_keepalive = jiffies;
@@ -178,7 +182,7 @@ static void watchdog_ping_work(struct work_struct *work)
 
 	mutex_lock(&wd_data->lock);
 	wdd = wd_data->wdd;
-	if (wdd && watchdog_active(wdd))
+	if (wdd && (watchdog_active(wdd) || watchdog_hw_running(wdd)))
 		__watchdog_ping(wdd);
 	mutex_unlock(&wd_data->lock);
 }
@@ -204,7 +208,10 @@ static int watchdog_start(struct watchdog_device *wdd)
 		return 0;
 
 	started_at = jiffies;
-	err = wdd->ops->start(wdd);
+	if (watchdog_hw_running(wdd) && wdd->ops->ping)
+		err = wdd->ops->ping(wdd);
+	else
+		err = wdd->ops->start(wdd);
 	if (err == 0) {
 		set_bit(WDOG_ACTIVE, &wdd->status);
 		wd_data->last_keepalive = started_at;
@@ -228,8 +235,7 @@ static int watchdog_start(struct watchdog_device *wdd)
 
 static int watchdog_stop(struct watchdog_device *wdd)
 {
-	struct watchdog_core_data *wd_data = wdd->wd_data;
-	int err;
+	int err = 0;
 
 	if (!watchdog_active(wdd))
 		return 0;
@@ -243,7 +249,7 @@ static int watchdog_stop(struct watchdog_device *wdd)
 	err = wdd->ops->stop(wdd);
 	if (err == 0) {
 		clear_bit(WDOG_ACTIVE, &wdd->status);
-		cancel_delayed_work(&wd_data->work);
+		watchdog_update_worker(wdd);
 	}
 
 	return err;
@@ -641,7 +647,7 @@ static int watchdog_open(struct inode *inode, struct file *file)
 	 * If the /dev/watchdog device is open, we don't want the module
 	 * to be unloaded.
 	 */
-	if (!try_module_get(wdd->ops->owner)) {
+	if (!watchdog_hw_running(wdd) && !try_module_get(wdd->ops->owner)) {
 		err = -EBUSY;
 		goto out_clear;
 	}
@@ -652,7 +658,8 @@ static int watchdog_open(struct inode *inode, struct file *file)
 
 	file->private_data = wd_data;
 
-	kref_get(&wd_data->kref);
+	if (!watchdog_hw_running(wdd))
+		kref_get(&wd_data->kref);
 
 	/* dev/watchdog is a virtual (and thus non-seekable) filesystem */
 	return nonseekable_open(inode, file);
@@ -713,15 +720,22 @@ static int watchdog_release(struct inode *inode, struct file *file)
 	}
 
 	cancel_delayed_work_sync(&wd_data->work);
+	watchdog_update_worker(wdd);
 
 	/* make sure that /dev/watchdog can be re-opened */
 	clear_bit(_WDOG_DEV_OPEN, &wd_data->status);
 
 done:
 	mutex_unlock(&wd_data->lock);
-	/* Allow the owner module to be unloaded again */
-	module_put(wd_data->cdev.owner);
-	kref_put(&wd_data->kref, watchdog_core_data_release);
+	/*
+	 * Allow the owner module to be unloaded again unless the watchdog
+	 * is still running. If the watchdog is still running, it can not
+	 * be stopped, and its driver must not be unloaded.
+	 */
+	if (!watchdog_hw_running(wdd)) {
+		module_put(wdd->ops->owner);
+		kref_put(&wd_data->kref, watchdog_core_data_release);
+	}
 	return 0;
 }
 
@@ -798,8 +812,20 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno)
 			old_wd_data = NULL;
 			kref_put(&wd_data->kref, watchdog_core_data_release);
 		}
+		return err;
 	}
-	return err;
+
+	/*
+	 * If the watchdog is running, prevent its driver from being unloaded,
+	 * and schedule an immediate ping.
+	 */
+	if (watchdog_hw_running(wdd)) {
+		__module_get(wdd->ops->owner);
+		kref_get(&wd_data->kref);
+		queue_delayed_work(watchdog_wq, &wd_data->work, 0);
+	}
+
+	return 0;
 }
 
 /*
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 8e82daecb7d3..e2f45549b243 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -105,6 +105,7 @@ struct watchdog_device {
 #define WDOG_ACTIVE		0	/* Is the watchdog running/active */
 #define WDOG_NO_WAY_OUT		1	/* Is 'nowayout' feature set ? */
 #define WDOG_STOP_ON_REBOOT	2	/* Should be stopped on reboot */
+#define WDOG_HW_RUNNING		3	/* True if HW watchdog running */
 	struct list_head deferred;
 };
 
@@ -117,6 +118,15 @@ static inline bool watchdog_active(struct watchdog_device *wdd)
 	return test_bit(WDOG_ACTIVE, &wdd->status);
 }
 
+/*
+ * Use the following function to check whether or not the hardware watchdog
+ * is running
+ */
+static inline bool watchdog_hw_running(struct watchdog_device *wdd)
+{
+	return test_bit(WDOG_HW_RUNNING, &wdd->status);
+}
+
 /* Use the following function to set the nowayout feature */
 static inline void watchdog_set_nowayout(struct watchdog_device *wdd, bool nowayout)
 {
-- 
cgit v1.2.3


From 15013ad813f6544be8e79afc23672745950d59bc Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 28 Feb 2016 13:12:18 -0800
Subject: watchdog: Add support for minimum time between heartbeats

Some watchdogs require a minimum time between heartbeats.
Examples are the watchdogs in DA9062 and AT91SAM9x.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 Documentation/watchdog/watchdog-kernel-api.txt |  3 +++
 drivers/watchdog/watchdog_dev.c                | 15 +++++++++++++++
 include/linux/watchdog.h                       |  3 +++
 3 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt
index 9eabca1d9355..917eeeabfa5e 100644
--- a/Documentation/watchdog/watchdog-kernel-api.txt
+++ b/Documentation/watchdog/watchdog-kernel-api.txt
@@ -52,6 +52,7 @@ struct watchdog_device {
 	unsigned int timeout;
 	unsigned int min_timeout;
 	unsigned int max_timeout;
+	unsigned int min_hw_heartbeat_ms;
 	unsigned int max_hw_heartbeat_ms;
 	struct notifier_block reboot_nb;
 	struct notifier_block restart_nb;
@@ -81,6 +82,8 @@ It contains following fields:
 * max_timeout: the watchdog timer's maximum timeout value (in seconds),
   as seen from userspace. If set, the maximum configurable value for
   'timeout'. Not used if max_hw_heartbeat_ms is non-zero.
+* min_hw_heartbeat_ms: Minimum time between heartbeats sent to the chip,
+  in milli-seconds.
 * max_hw_heartbeat_ms: Maximum hardware heartbeat, in milli-seconds.
   If set, the infrastructure will send heartbeats to the watchdog driver
   if 'timeout' is larger than max_hw_heartbeat_ms, unless WDOG_ACTIVE
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index 5163c3eb3428..2d6110278eac 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -64,6 +64,7 @@ struct watchdog_core_data {
 	struct watchdog_device *wdd;
 	struct mutex lock;
 	unsigned long last_keepalive;
+	unsigned long last_hw_keepalive;
 	struct delayed_work work;
 	unsigned long status;		/* Internal status bits */
 #define _WDOG_DEV_OPEN		0	/* Opened ? */
@@ -137,8 +138,19 @@ static inline void watchdog_update_worker(struct watchdog_device *wdd)
 
 static int __watchdog_ping(struct watchdog_device *wdd)
 {
+	struct watchdog_core_data *wd_data = wdd->wd_data;
+	unsigned long earliest_keepalive = wd_data->last_hw_keepalive +
+				msecs_to_jiffies(wdd->min_hw_heartbeat_ms);
 	int err;
 
+	if (time_is_after_jiffies(earliest_keepalive)) {
+		mod_delayed_work(watchdog_wq, &wd_data->work,
+				 earliest_keepalive - jiffies);
+		return 0;
+	}
+
+	wd_data->last_hw_keepalive = jiffies;
+
 	if (wdd->ops->ping)
 		err = wdd->ops->ping(wdd);  /* ping the watchdog */
 	else
@@ -819,6 +831,9 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno)
 		return err;
 	}
 
+	/* Record time of most recent heartbeat as 'just before now'. */
+	wd_data->last_hw_keepalive = jiffies - 1;
+
 	/*
 	 * If the watchdog is running, prevent its driver from being unloaded,
 	 * and schedule an immediate ping.
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index e2f45549b243..51732d6c9555 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -65,6 +65,8 @@ struct watchdog_ops {
  * @max_timeout:The watchdog devices maximum timeout value (in seconds)
  *		as configurable from user space. Only relevant if
  *		max_hw_heartbeat_ms is not provided.
+ * @min_hw_heartbeat_ms:
+ *		Minimum time between heartbeats, in milli-seconds.
  * @max_hw_heartbeat_ms:
  *		Hardware limit for maximum timeout, in milli-seconds.
  *		Replaces max_timeout if specified.
@@ -95,6 +97,7 @@ struct watchdog_device {
 	unsigned int timeout;
 	unsigned int min_timeout;
 	unsigned int max_timeout;
+	unsigned int min_hw_heartbeat_ms;
 	unsigned int max_hw_heartbeat_ms;
 	struct notifier_block reboot_nb;
 	struct notifier_block restart_nb;
-- 
cgit v1.2.3


From 2b021cbf3cb6208f0d40fd2f1869f237934340ed Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 15 Mar 2016 20:43:04 -0400
Subject: cgroup: ignore css_sets associated with dead cgroups during migration

Before 2e91fa7f6d45 ("cgroup: keep zombies associated with their
original cgroups"), all dead tasks were associated with init_css_set.
If a zombie task is requested for migration, while migration prep
operations would still be performed on init_css_set, the actual
migration would ignore zombie tasks.  As init_css_set is always valid,
this worked fine.

However, after 2e91fa7f6d45, zombie tasks stay with the css_set it was
associated with at the time of death.  Let's say a task T associated
with cgroup A on hierarchy H-1 and cgroup B on hiearchy H-2.  After T
becomes a zombie, it would still remain associated with A and B.  If A
only contains zombie tasks, it can be removed.  On removal, A gets
marked offline but stays pinned until all zombies are drained.  At
this point, if migration is initiated on T to a cgroup C on hierarchy
H-2, migration path would try to prepare T's css_set for migration and
trigger the following.

 WARNING: CPU: 0 PID: 1576 at kernel/cgroup.c:474 cgroup_get+0x121/0x160()
 CPU: 0 PID: 1576 Comm: bash Not tainted 4.4.0-work+ #289
 ...
 Call Trace:
  [<ffffffff8127e63c>] dump_stack+0x4e/0x82
  [<ffffffff810445e8>] warn_slowpath_common+0x78/0xb0
  [<ffffffff810446d5>] warn_slowpath_null+0x15/0x20
  [<ffffffff810c33e1>] cgroup_get+0x121/0x160
  [<ffffffff810c349b>] link_css_set+0x7b/0x90
  [<ffffffff810c4fbc>] find_css_set+0x3bc/0x5e0
  [<ffffffff810c5269>] cgroup_migrate_prepare_dst+0x89/0x1f0
  [<ffffffff810c7547>] cgroup_attach_task+0x157/0x230
  [<ffffffff810c7a17>] __cgroup_procs_write+0x2b7/0x470
  [<ffffffff810c7bdc>] cgroup_tasks_write+0xc/0x10
  [<ffffffff810c4790>] cgroup_file_write+0x30/0x1b0
  [<ffffffff811c68fc>] kernfs_fop_write+0x13c/0x180
  [<ffffffff81151673>] __vfs_write+0x23/0xe0
  [<ffffffff81152494>] vfs_write+0xa4/0x1a0
  [<ffffffff811532d4>] SyS_write+0x44/0xa0
  [<ffffffff814af2d7>] entry_SYSCALL_64_fastpath+0x12/0x6f

It doesn't make sense to prepare migration for css_sets pointing to
dead cgroups as they are guaranteed to contain only zombies which are
ignored later during migration.  This patch makes cgroup destruction
path mark all affected css_sets as dead and updates the migration path
to ignore them during preparation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups")
Cc: stable@vger.kernel.org # v4.4+
---
 include/linux/cgroup-defs.h |  3 +++
 kernel/cgroup.c             | 20 ++++++++++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 34b42f03fcd8..3e39ae5bc799 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -212,6 +212,9 @@ struct css_set {
 	/* all css_task_iters currently walking this cset */
 	struct list_head task_iters;
 
+	/* dead and being drained, ignore for migration */
+	bool dead;
+
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
 };
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e22df5d81e59..d57318950076 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2516,6 +2516,14 @@ static void cgroup_migrate_add_src(struct css_set *src_cset,
 	lockdep_assert_held(&cgroup_mutex);
 	lockdep_assert_held(&css_set_lock);
 
+	/*
+	 * If ->dead, @src_set is associated with one or more dead cgroups
+	 * and doesn't contain any migratable tasks.  Ignore it early so
+	 * that the rest of migration path doesn't get confused by it.
+	 */
+	if (src_cset->dead)
+		return;
+
 	src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
 
 	if (!list_empty(&src_cset->mg_preload_node))
@@ -5258,6 +5266,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
 {
 	struct cgroup_subsys_state *css;
+	struct cgrp_cset_link *link;
 	int ssid;
 
 	lockdep_assert_held(&cgroup_mutex);
@@ -5278,11 +5287,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 		return -EBUSY;
 
 	/*
-	 * Mark @cgrp dead.  This prevents further task migration and child
-	 * creation by disabling cgroup_lock_live_group().
+	 * Mark @cgrp and the associated csets dead.  The former prevents
+	 * further task migration and child creation by disabling
+	 * cgroup_lock_live_group().  The latter makes the csets ignored by
+	 * the migration path.
 	 */
 	cgrp->self.flags &= ~CSS_ONLINE;
 
+	spin_lock_bh(&css_set_lock);
+	list_for_each_entry(link, &cgrp->cset_links, cset_link)
+		link->cset->dead = true;
+	spin_unlock_bh(&css_set_lock);
+
 	/* initiate massacre of all css's */
 	for_each_css(css, ssid, cgrp)
 		kill_css(css);
-- 
cgit v1.2.3


From 02113ba93ea414b7113d64e42b87dc0720e3e578 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Tue, 15 Mar 2016 11:33:40 +0000
Subject: PM / clk: Add support for obtaining clocks from device-tree

The PM clocks framework requires clients to pass either a con-id or a
valid clk pointer in order to add a clock to a device. Add a new
function of_pm_clk_add_clks() to allows device clocks to be retrieved
from device-tree and populated for a given device. Note that it is
not necessary to make the compilation of this new function dependent
upon CONFIG_OF because there are stubs functions for the device-tree
APIs used.

In order to handle errors encountered when adding clocks from
device-tree, add a function pm_clk_remove_clk() to remove any clocks
(using a pointer to the clk structure) that have been added
successfully before the error occurred.

Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/clock_ops.c | 89 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm_clock.h       |  9 +++++
 2 files changed, 98 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 272a52ebafc0..0e64a1b5e62a 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -137,6 +137,62 @@ int pm_clk_add_clk(struct device *dev, struct clk *clk)
 	return __pm_clk_add(dev, NULL, clk);
 }
 
+
+/**
+ * of_pm_clk_add_clks - Start using device clock(s) for power management.
+ * @dev: Device whose clock(s) is going to be used for power management.
+ *
+ * Add a series of clocks described in the 'clocks' device-tree node for
+ * a device to the list of clocks used for the power management of @dev.
+ * On success, returns the number of clocks added. Returns a negative
+ * error code if there are no clocks in the device node for the device
+ * or if adding a clock fails.
+ */
+int of_pm_clk_add_clks(struct device *dev)
+{
+	struct clk **clks;
+	unsigned int i, count;
+	int ret;
+
+	if (!dev || !dev->of_node)
+		return -EINVAL;
+
+	count = of_count_phandle_with_args(dev->of_node, "clocks",
+					   "#clock-cells");
+	if (count == 0)
+		return -ENODEV;
+
+	clks = kcalloc(count, sizeof(*clks), GFP_KERNEL);
+	if (!clks)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		clks[i] = of_clk_get(dev->of_node, i);
+		if (IS_ERR(clks[i])) {
+			ret = PTR_ERR(clks[i]);
+			goto error;
+		}
+
+		ret = pm_clk_add_clk(dev, clks[i]);
+		if (ret) {
+			clk_put(clks[i]);
+			goto error;
+		}
+	}
+
+	kfree(clks);
+
+	return i;
+
+error:
+	while (i--)
+		pm_clk_remove_clk(dev, clks[i]);
+
+	kfree(clks);
+
+	return ret;
+}
+
 /**
  * __pm_clk_remove - Destroy PM clock entry.
  * @ce: PM clock entry to destroy.
@@ -197,6 +253,39 @@ void pm_clk_remove(struct device *dev, const char *con_id)
 	__pm_clk_remove(ce);
 }
 
+/**
+ * pm_clk_remove_clk - Stop using a device clock for power management.
+ * @dev: Device whose clock should not be used for PM any more.
+ * @clk: Clock pointer
+ *
+ * Remove the clock pointed to by @clk from the list of clocks used for
+ * the power management of @dev.
+ */
+void pm_clk_remove_clk(struct device *dev, struct clk *clk)
+{
+	struct pm_subsys_data *psd = dev_to_psd(dev);
+	struct pm_clock_entry *ce;
+
+	if (!psd || !clk)
+		return;
+
+	spin_lock_irq(&psd->lock);
+
+	list_for_each_entry(ce, &psd->clock_list, node) {
+		if (clk == ce->clk)
+			goto remove;
+	}
+
+	spin_unlock_irq(&psd->lock);
+	return;
+
+ remove:
+	list_del(&ce->node);
+	spin_unlock_irq(&psd->lock);
+
+	__pm_clk_remove(ce);
+}
+
 /**
  * pm_clk_init - Initialize a device's list of power management clocks.
  * @dev: Device to initialize the list of PM clocks for.
diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h
index 25266c600021..308d6044f153 100644
--- a/include/linux/pm_clock.h
+++ b/include/linux/pm_clock.h
@@ -42,7 +42,9 @@ extern int pm_clk_create(struct device *dev);
 extern void pm_clk_destroy(struct device *dev);
 extern int pm_clk_add(struct device *dev, const char *con_id);
 extern int pm_clk_add_clk(struct device *dev, struct clk *clk);
+extern int of_pm_clk_add_clks(struct device *dev);
 extern void pm_clk_remove(struct device *dev, const char *con_id);
+extern void pm_clk_remove_clk(struct device *dev, struct clk *clk);
 extern int pm_clk_suspend(struct device *dev);
 extern int pm_clk_resume(struct device *dev);
 #else
@@ -69,11 +71,18 @@ static inline int pm_clk_add_clk(struct device *dev, struct clk *clk)
 {
 	return -EINVAL;
 }
+static inline int of_pm_clk_add_clks(struct device *dev)
+{
+	return -EINVAL;
+}
 static inline void pm_clk_remove(struct device *dev, const char *con_id)
 {
 }
 #define pm_clk_suspend	NULL
 #define pm_clk_resume	NULL
+static inline void pm_clk_remove_clk(struct device *dev, struct clk *clk)
+{
+}
 #endif
 
 #ifdef CONFIG_HAVE_CLK
-- 
cgit v1.2.3


From 7e545d6eca20ce8ef7f66a63146cbff82b2ba760 Mon Sep 17 00:00:00 2001
From: Jessica Yu <jeyu@redhat.com>
Date: Wed, 16 Mar 2016 20:55:39 -0400
Subject: livepatch/module: remove livepatch module notifier

Remove the livepatch module notifier in favor of directly enabling and
disabling patches to modules in the module loader. Hard-coding the
function calls ensures that ftrace_module_enable() is run before
klp_module_coming() during module load, and that klp_module_going() is
run before ftrace_release_mod() during module unload. This way, ftrace
and livepatch code is run in the correct order during the module
load/unload sequence without dependence on the module notifier call chain.

Signed-off-by: Jessica Yu <jeyu@redhat.com>
Reviewed-by: Petr Mladek <pmladek@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/livepatch.h |  13 ++++
 kernel/livepatch/core.c   | 147 ++++++++++++++++++++++------------------------
 kernel/module.c           |  10 ++++
 3 files changed, 94 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index c05679701e10..bd830d590465 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -24,6 +24,8 @@
 #include <linux/module.h>
 #include <linux/ftrace.h>
 
+#if IS_ENABLED(CONFIG_LIVEPATCH)
+
 #include <asm/livepatch.h>
 
 enum klp_state {
@@ -132,4 +134,15 @@ int klp_unregister_patch(struct klp_patch *);
 int klp_enable_patch(struct klp_patch *);
 int klp_disable_patch(struct klp_patch *);
 
+/* Called from the module loader during module coming/going states */
+int klp_module_coming(struct module *mod);
+void klp_module_going(struct module *mod);
+
+#else /* !CONFIG_LIVEPATCH */
+
+static inline int klp_module_coming(struct module *mod) { return 0; }
+static inline void klp_module_going(struct module *mod) { }
+
+#endif /* CONFIG_LIVEPATCH */
+
 #endif /* _LINUX_LIVEPATCH_H_ */
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 780f00cdb7e5..d68fbf63b083 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -99,12 +99,12 @@ static void klp_find_object_module(struct klp_object *obj)
 	/*
 	 * We do not want to block removal of patched modules and therefore
 	 * we do not take a reference here. The patches are removed by
-	 * a going module handler instead.
+	 * klp_module_going() instead.
 	 */
 	mod = find_module(obj->name);
 	/*
-	 * Do not mess work of the module coming and going notifiers.
-	 * Note that the patch might still be needed before the going handler
+	 * Do not mess work of klp_module_coming() and klp_module_going().
+	 * Note that the patch might still be needed before klp_module_going()
 	 * is called. Module functions can be called even in the GOING state
 	 * until mod->exit() finishes. This is especially important for
 	 * patches that modify semantic of the functions.
@@ -866,103 +866,108 @@ int klp_register_patch(struct klp_patch *patch)
 }
 EXPORT_SYMBOL_GPL(klp_register_patch);
 
-static int klp_module_notify_coming(struct klp_patch *patch,
-				     struct klp_object *obj)
+int klp_module_coming(struct module *mod)
 {
-	struct module *pmod = patch->mod;
-	struct module *mod = obj->mod;
 	int ret;
+	struct klp_patch *patch;
+	struct klp_object *obj;
 
-	ret = klp_init_object_loaded(patch, obj);
-	if (ret) {
-		pr_warn("failed to initialize patch '%s' for module '%s' (%d)\n",
-			pmod->name, mod->name, ret);
-		return ret;
-	}
+	if (WARN_ON(mod->state != MODULE_STATE_COMING))
+		return -EINVAL;
 
-	if (patch->state == KLP_DISABLED)
-		return 0;
+	mutex_lock(&klp_mutex);
+	/*
+	 * Each module has to know that klp_module_coming()
+	 * has been called. We never know what module will
+	 * get patched by a new patch.
+	 */
+	mod->klp_alive = true;
 
-	pr_notice("applying patch '%s' to loading module '%s'\n",
-		  pmod->name, mod->name);
+	list_for_each_entry(patch, &klp_patches, list) {
+		klp_for_each_object(patch, obj) {
+			if (!klp_is_module(obj) || strcmp(obj->name, mod->name))
+				continue;
 
-	ret = klp_enable_object(obj);
-	if (ret)
-		pr_warn("failed to apply patch '%s' to module '%s' (%d)\n",
-			pmod->name, mod->name, ret);
-	return ret;
-}
+			obj->mod = mod;
 
-static void klp_module_notify_going(struct klp_patch *patch,
-				    struct klp_object *obj)
-{
-	struct module *pmod = patch->mod;
-	struct module *mod = obj->mod;
+			ret = klp_init_object_loaded(patch, obj);
+			if (ret) {
+				pr_warn("failed to initialize patch '%s' for module '%s' (%d)\n",
+					patch->mod->name, obj->mod->name, ret);
+				goto err;
+			}
 
-	if (patch->state == KLP_DISABLED)
-		goto disabled;
+			if (patch->state == KLP_DISABLED)
+				break;
+
+			pr_notice("applying patch '%s' to loading module '%s'\n",
+				  patch->mod->name, obj->mod->name);
+
+			ret = klp_enable_object(obj);
+			if (ret) {
+				pr_warn("failed to apply patch '%s' to module '%s' (%d)\n",
+					patch->mod->name, obj->mod->name, ret);
+				goto err;
+			}
+
+			break;
+		}
+	}
 
-	pr_notice("reverting patch '%s' on unloading module '%s'\n",
-		  pmod->name, mod->name);
+	mutex_unlock(&klp_mutex);
 
-	klp_disable_object(obj);
+	return 0;
 
-disabled:
+err:
+	/*
+	 * If a patch is unsuccessfully applied, return
+	 * error to the module loader.
+	 */
+	pr_warn("patch '%s' failed for module '%s', refusing to load module '%s'\n",
+		patch->mod->name, obj->mod->name, obj->mod->name);
+	mod->klp_alive = false;
 	klp_free_object_loaded(obj);
+	mutex_unlock(&klp_mutex);
+
+	return ret;
 }
 
-static int klp_module_notify(struct notifier_block *nb, unsigned long action,
-			     void *data)
+void klp_module_going(struct module *mod)
 {
-	int ret;
-	struct module *mod = data;
 	struct klp_patch *patch;
 	struct klp_object *obj;
 
-	if (action != MODULE_STATE_COMING && action != MODULE_STATE_GOING)
-		return 0;
+	if (WARN_ON(mod->state != MODULE_STATE_GOING &&
+		    mod->state != MODULE_STATE_COMING))
+		return;
 
 	mutex_lock(&klp_mutex);
-
 	/*
-	 * Each module has to know that the notifier has been called.
-	 * We never know what module will get patched by a new patch.
+	 * Each module has to know that klp_module_going()
+	 * has been called. We never know what module will
+	 * get patched by a new patch.
 	 */
-	if (action == MODULE_STATE_COMING)
-		mod->klp_alive = true;
-	else /* MODULE_STATE_GOING */
-		mod->klp_alive = false;
+	mod->klp_alive = false;
 
 	list_for_each_entry(patch, &klp_patches, list) {
 		klp_for_each_object(patch, obj) {
 			if (!klp_is_module(obj) || strcmp(obj->name, mod->name))
 				continue;
 
-			if (action == MODULE_STATE_COMING) {
-				obj->mod = mod;
-				ret = klp_module_notify_coming(patch, obj);
-				if (ret) {
-					obj->mod = NULL;
-					pr_warn("patch '%s' is in an inconsistent state!\n",
-						patch->mod->name);
-				}
-			} else /* MODULE_STATE_GOING */
-				klp_module_notify_going(patch, obj);
+			if (patch->state != KLP_DISABLED) {
+				pr_notice("reverting patch '%s' on unloading module '%s'\n",
+					  patch->mod->name, obj->mod->name);
+				klp_disable_object(obj);
+			}
 
+			klp_free_object_loaded(obj);
 			break;
 		}
 	}
 
 	mutex_unlock(&klp_mutex);
-
-	return 0;
 }
 
-static struct notifier_block klp_module_nb = {
-	.notifier_call = klp_module_notify,
-	.priority = INT_MIN+1, /* called late but before ftrace notifier */
-};
-
 static int __init klp_init(void)
 {
 	int ret;
@@ -973,21 +978,11 @@ static int __init klp_init(void)
 		return -EINVAL;
 	}
 
-	ret = register_module_notifier(&klp_module_nb);
-	if (ret)
-		return ret;
-
 	klp_root_kobj = kobject_create_and_add("livepatch", kernel_kobj);
-	if (!klp_root_kobj) {
-		ret = -ENOMEM;
-		goto unregister;
-	}
+	if (!klp_root_kobj)
+		return -ENOMEM;
 
 	return 0;
-
-unregister:
-	unregister_module_notifier(&klp_module_nb);
-	return ret;
 }
 
 module_init(klp_init);
diff --git a/kernel/module.c b/kernel/module.c
index 6dbfad415d51..4b65fbb10bdc 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -53,6 +53,7 @@
 #include <asm/sections.h>
 #include <linux/tracepoint.h>
 #include <linux/ftrace.h>
+#include <linux/livepatch.h>
 #include <linux/async.h>
 #include <linux/percpu.h>
 #include <linux/kmemleak.h>
@@ -984,6 +985,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
 		mod->exit();
 	blocking_notifier_call_chain(&module_notify_list,
 				     MODULE_STATE_GOING, mod);
+	klp_module_going(mod);
 	ftrace_release_mod(mod);
 
 	async_synchronize_full();
@@ -3315,6 +3317,7 @@ fail:
 	module_put(mod);
 	blocking_notifier_call_chain(&module_notify_list,
 				     MODULE_STATE_GOING, mod);
+	klp_module_going(mod);
 	ftrace_release_mod(mod);
 	free_module(mod);
 	wake_up_all(&module_wq);
@@ -3401,7 +3404,13 @@ out:
 
 static int prepare_coming_module(struct module *mod)
 {
+	int err;
+
 	ftrace_module_enable(mod);
+	err = klp_module_coming(mod);
+	if (err)
+		return err;
+
 	blocking_notifier_call_chain(&module_notify_list,
 				     MODULE_STATE_COMING, mod);
 	return 0;
@@ -3553,6 +3562,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
  coming_cleanup:
 	blocking_notifier_call_chain(&module_notify_list,
 				     MODULE_STATE_GOING, mod);
+	klp_module_going(mod);
 
  bug_cleanup:
 	/* module_bug_cleanup needs module_mutex protection */
-- 
cgit v1.2.3


From 40cf446b9482bd2c681b60062b34cc47c78342f8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Mar 2016 20:46:14 +0100
Subject: nfs4.h: add SCSI layout definitions

Based on draft-ietf-nfsv4-scsi-layout-05 after the WG last call.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/nfs4.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index d6f9b4e6006d..011433478a14 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -529,6 +529,7 @@ enum pnfs_layouttype {
 	LAYOUT_OSD2_OBJECTS = 2,
 	LAYOUT_BLOCK_VOLUME = 3,
 	LAYOUT_FLEX_FILES = 4,
+	LAYOUT_SCSI = 5,
 	LAYOUT_TYPE_MAX
 };
 
@@ -555,6 +556,7 @@ enum pnfs_block_volume_type {
 	PNFS_BLOCK_VOLUME_SLICE		= 1,
 	PNFS_BLOCK_VOLUME_CONCAT	= 2,
 	PNFS_BLOCK_VOLUME_STRIPE	= 3,
+	PNFS_BLOCK_VOLUME_SCSI		= 4,
 };
 
 enum pnfs_block_extent_state {
@@ -568,6 +570,23 @@ enum pnfs_block_extent_state {
 #define PNFS_BLOCK_EXTENT_SIZE \
 	(7 * sizeof(__be32) + NFS4_DEVICEID4_SIZE)
 
+/* on the wire size of a scsi commit range */
+#define PNFS_SCSI_RANGE_SIZE \
+	(4 * sizeof(__be32))
+
+enum scsi_code_set {
+	PS_CODE_SET_BINARY	= 1,
+	PS_CODE_SET_ASCII	= 2,
+	PS_CODE_SET_UTF8	= 3
+};
+
+enum scsi_designator_type {
+	PS_DESIGNATOR_T10	= 1,
+	PS_DESIGNATOR_EUI64	= 2,
+	PS_DESIGNATOR_NAA	= 3,
+	PS_DESIGNATOR_NAME	= 8
+};
+
 #define NFL4_UFLG_MASK			0x0000003F
 #define NFL4_UFLG_DENSE			0x00000001
 #define NFL4_UFLG_COMMIT_THRU_MDS	0x00000002
-- 
cgit v1.2.3


From 27ee57c93ff00b8a2d6c6dd6b0b3dddda7b43b77 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@virtuozzo.com>
Date: Thu, 17 Mar 2016 14:17:35 -0700
Subject: mm: memcontrol: report slab usage in cgroup2 memory.stat

Show how much memory is used for storing reclaimable and unreclaimable
in-kernel data structures allocated from slab caches.

Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroup-v2.txt | 15 +++++++++++++++
 include/linux/memcontrol.h  | 21 +++++++++++++++++++++
 mm/memcontrol.c             |  8 ++++++++
 mm/slab.c                   |  8 +++++---
 mm/slab.h                   | 30 ++++++++++++++++++++++++++++--
 mm/slub.c                   |  3 ++-
 6 files changed, 79 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index ff49cf901148..e4e0c1d78cee 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -843,6 +843,11 @@ PAGE_SIZE multiple when read back.
 		Amount of memory used to cache filesystem data,
 		including tmpfs and shared memory.
 
+	  slab
+
+		Amount of memory used for storing in-kernel data
+		structures.
+
 	  sock
 
 		Amount of memory used in network transmission buffers
@@ -871,6 +876,16 @@ PAGE_SIZE multiple when read back.
 		on the internal memory management lists used by the
 		page reclaim algorithm
 
+	  slab_reclaimable
+
+		Part of "slab" that might be reclaimed, such as
+		dentries and inodes.
+
+	  slab_unreclaimable
+
+		Part of "slab" that cannot be reclaimed on memory
+		pressure.
+
 	  pgfault
 
 		Total number of page faults incurred
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f0c4bec6565b..e7af4834ffea 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -53,6 +53,8 @@ enum mem_cgroup_stat_index {
 	MEM_CGROUP_STAT_NSTATS,
 	/* default hierarchy stats */
 	MEMCG_SOCK = MEM_CGROUP_STAT_NSTATS,
+	MEMCG_SLAB_RECLAIMABLE,
+	MEMCG_SLAB_UNRECLAIMABLE,
 	MEMCG_NR_STAT,
 };
 
@@ -883,6 +885,20 @@ static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
 	if (memcg_kmem_enabled())
 		__memcg_kmem_put_cache(cachep);
 }
+
+/**
+ * memcg_kmem_update_page_stat - update kmem page state statistics
+ * @page: the page
+ * @idx: page state item to account
+ * @val: number of pages (positive or negative)
+ */
+static inline void memcg_kmem_update_page_stat(struct page *page,
+				enum mem_cgroup_stat_index idx, int val)
+{
+	if (memcg_kmem_enabled() && page->mem_cgroup)
+		this_cpu_add(page->mem_cgroup->stat->count[idx], val);
+}
+
 #else
 #define for_each_memcg_cache_index(_idx)	\
 	for (; NULL; )
@@ -928,6 +944,11 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 static inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
 {
 }
+
+static inline void memcg_kmem_update_page_stat(struct page *page,
+				enum mem_cgroup_stat_index idx, int val)
+{
+}
 #endif /* CONFIG_MEMCG && !CONFIG_SLOB */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 430266071c36..3ad64bf464fd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5106,6 +5106,9 @@ static int memory_stat_show(struct seq_file *m, void *v)
 		   (u64)stat[MEM_CGROUP_STAT_RSS] * PAGE_SIZE);
 	seq_printf(m, "file %llu\n",
 		   (u64)stat[MEM_CGROUP_STAT_CACHE] * PAGE_SIZE);
+	seq_printf(m, "slab %llu\n",
+		   (u64)(stat[MEMCG_SLAB_RECLAIMABLE] +
+			 stat[MEMCG_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
 	seq_printf(m, "sock %llu\n",
 		   (u64)stat[MEMCG_SOCK] * PAGE_SIZE);
 
@@ -5126,6 +5129,11 @@ static int memory_stat_show(struct seq_file *m, void *v)
 			   mem_cgroup_lru_names[i], (u64)val * PAGE_SIZE);
 	}
 
+	seq_printf(m, "slab_reclaimable %llu\n",
+		   (u64)stat[MEMCG_SLAB_RECLAIMABLE] * PAGE_SIZE);
+	seq_printf(m, "slab_unreclaimable %llu\n",
+		   (u64)stat[MEMCG_SLAB_UNRECLAIMABLE] * PAGE_SIZE);
+
 	/* Accumulated memory events */
 
 	seq_printf(m, "pgfault %lu\n",
diff --git a/mm/slab.c b/mm/slab.c
index 852fc5c79829..56dd0df2a8ce 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1442,9 +1442,10 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
  */
 static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
 {
-	const unsigned long nr_freed = (1 << cachep->gfporder);
+	int order = cachep->gfporder;
+	unsigned long nr_freed = (1 << order);
 
-	kmemcheck_free_shadow(page, cachep->gfporder);
+	kmemcheck_free_shadow(page, order);
 
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
 		sub_zone_page_state(page_zone(page),
@@ -1461,7 +1462,8 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
 
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
-	__free_kmem_pages(page, cachep->gfporder);
+	memcg_uncharge_slab(page, order, cachep);
+	__free_pages(page, order);
 }
 
 static void kmem_rcu_free(struct rcu_head *head)
diff --git a/mm/slab.h b/mm/slab.h
index b7934361f026..ff39a8fc3b3f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -246,12 +246,33 @@ static __always_inline int memcg_charge_slab(struct page *page,
 					     gfp_t gfp, int order,
 					     struct kmem_cache *s)
 {
+	int ret;
+
 	if (!memcg_kmem_enabled())
 		return 0;
 	if (is_root_cache(s))
 		return 0;
-	return __memcg_kmem_charge_memcg(page, gfp, order,
-					 s->memcg_params.memcg);
+
+	ret = __memcg_kmem_charge_memcg(page, gfp, order,
+					s->memcg_params.memcg);
+	if (ret)
+		return ret;
+
+	memcg_kmem_update_page_stat(page,
+			(s->flags & SLAB_RECLAIM_ACCOUNT) ?
+			MEMCG_SLAB_RECLAIMABLE : MEMCG_SLAB_UNRECLAIMABLE,
+			1 << order);
+	return 0;
+}
+
+static __always_inline void memcg_uncharge_slab(struct page *page, int order,
+						struct kmem_cache *s)
+{
+	memcg_kmem_update_page_stat(page,
+			(s->flags & SLAB_RECLAIM_ACCOUNT) ?
+			MEMCG_SLAB_RECLAIMABLE : MEMCG_SLAB_UNRECLAIMABLE,
+			-(1 << order));
+	memcg_kmem_uncharge(page, order);
 }
 
 extern void slab_init_memcg_params(struct kmem_cache *);
@@ -294,6 +315,11 @@ static inline int memcg_charge_slab(struct page *page, gfp_t gfp, int order,
 	return 0;
 }
 
+static inline void memcg_uncharge_slab(struct page *page, int order,
+				       struct kmem_cache *s)
+{
+}
+
 static inline void slab_init_memcg_params(struct kmem_cache *s)
 {
 }
diff --git a/mm/slub.c b/mm/slub.c
index 6c91324f9370..712d53474082 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1540,7 +1540,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 	page_mapcount_reset(page);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
-	__free_kmem_pages(page, order);
+	memcg_uncharge_slab(page, order, s);
+	__free_pages(page, order);
 }
 
 #define need_reserve_slab_rcu						\
-- 
cgit v1.2.3


From 12580e4b54ba8a1b22ec977c200be0174ca42348 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@virtuozzo.com>
Date: Thu, 17 Mar 2016 14:17:38 -0700
Subject: mm: memcontrol: report kernel stack usage in cgroup2 memory.stat

Show how much memory is allocated to kernel stacks.

Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroup-v2.txt |  4 ++++
 include/linux/memcontrol.h  |  3 ++-
 kernel/fork.c               | 10 +++++++++-
 mm/memcontrol.c             |  2 ++
 4 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index e4e0c1d78cee..e2f4e7948a66 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -843,6 +843,10 @@ PAGE_SIZE multiple when read back.
 		Amount of memory used to cache filesystem data,
 		including tmpfs and shared memory.
 
+	  kernel_stack
+
+		Amount of memory allocated to kernel stacks.
+
 	  slab
 
 		Amount of memory used for storing in-kernel data
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e7af4834ffea..d6300313b298 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -52,9 +52,10 @@ enum mem_cgroup_stat_index {
 	MEM_CGROUP_STAT_SWAP,		/* # of pages, swapped out */
 	MEM_CGROUP_STAT_NSTATS,
 	/* default hierarchy stats */
-	MEMCG_SOCK = MEM_CGROUP_STAT_NSTATS,
+	MEMCG_KERNEL_STACK = MEM_CGROUP_STAT_NSTATS,
 	MEMCG_SLAB_RECLAIMABLE,
 	MEMCG_SLAB_UNRECLAIMABLE,
+	MEMCG_SOCK,
 	MEMCG_NR_STAT,
 };
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 2e391c754ae7..accb7221d547 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -164,12 +164,20 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 	struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
 						  THREAD_SIZE_ORDER);
 
+	if (page)
+		memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
+					    1 << THREAD_SIZE_ORDER);
+
 	return page ? page_address(page) : NULL;
 }
 
 static inline void free_thread_info(struct thread_info *ti)
 {
-	free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+	struct page *page = virt_to_page(ti);
+
+	memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
+				    -(1 << THREAD_SIZE_ORDER));
+	__free_kmem_pages(page, THREAD_SIZE_ORDER);
 }
 # else
 static struct kmem_cache *thread_info_cache;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3ad64bf464fd..4b7dda7c2e74 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5106,6 +5106,8 @@ static int memory_stat_show(struct seq_file *m, void *v)
 		   (u64)stat[MEM_CGROUP_STAT_RSS] * PAGE_SIZE);
 	seq_printf(m, "file %llu\n",
 		   (u64)stat[MEM_CGROUP_STAT_CACHE] * PAGE_SIZE);
+	seq_printf(m, "kernel_stack %llu\n",
+		   (u64)stat[MEMCG_KERNEL_STACK] * PAGE_SIZE);
 	seq_printf(m, "slab %llu\n",
 		   (u64)(stat[MEMCG_SLAB_RECLAIMABLE] +
 			 stat[MEMCG_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
-- 
cgit v1.2.3


From 832fc1de01aea28255cb11d270679b7f1273f0d7 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Thu, 17 Mar 2016 14:17:41 -0700
Subject: /proc/kpageflags: return KPF_BUDDY for "tail" buddy pages

Currently /proc/kpageflags returns nothing for "tail" buddy pages, which
is inconvenient when grasping how free pages are distributed.  This
patch sets KPF_BUDDY for such pages.

With this patch:

  $ grep MemFree /proc/meminfo ; tools/vm/page-types -b buddy
  MemFree:         3134992 kB
               flags      page-count       MB  symbolic-flags                     long-symbolic-flags
  0x0000000000000400          779272     3044  __________B_______________________________ buddy
  0x0000000000000c00            4385       17  __________BM______________________________ buddy,mmap
               total          783657     3061

783657 pages is 3134628 kB (roughly consistent with the global counter,)
so it's OK.

[akpm@linux-foundation.org: update comment, per Naoya]
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/page.c             | 6 ++++--
 include/linux/page-flags.h | 2 ++
 mm/internal.h              | 3 ---
 mm/page_alloc.c            | 2 --
 4 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/page.c b/fs/proc/page.c
index b2855eea5405..0be626d85331 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -103,9 +103,9 @@ u64 stable_page_flags(struct page *page)
 	 * pseudo flags for the well known (anonymous) memory mapped pages
 	 *
 	 * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the
-	 * simple test in page_mapcount() is not enough.
+	 * simple test in page_mapped() is not enough.
 	 */
-	if (!PageSlab(page) && page_mapcount(page))
+	if (!PageSlab(page) && page_mapped(page))
 		u |= 1 << KPF_MMAP;
 	if (PageAnon(page))
 		u |= 1 << KPF_ANON;
@@ -148,6 +148,8 @@ u64 stable_page_flags(struct page *page)
 	 */
 	if (PageBuddy(page))
 		u |= 1 << KPF_BUDDY;
+	else if (page_count(page) == 0 && is_free_buddy_page(page))
+		u |= 1 << KPF_BUDDY;
 
 	if (PageBalloon(page))
 		u |= 1 << KPF_BALLOON;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 19724e6ebd26..597695523679 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -593,6 +593,8 @@ static inline void __ClearPageBuddy(struct page *page)
 	atomic_set(&page->_mapcount, -1);
 }
 
+extern bool is_free_buddy_page(struct page *page);
+
 #define PAGE_BALLOON_MAPCOUNT_VALUE (-256)
 
 static inline int PageBalloon(struct page *page)
diff --git a/mm/internal.h b/mm/internal.h
index ad9400d759c8..b95952c2faec 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -148,9 +148,6 @@ extern int __isolate_free_page(struct page *page, unsigned int order);
 extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
 					unsigned int order);
 extern void prep_compound_page(struct page *page, unsigned int order);
-#ifdef CONFIG_MEMORY_FAILURE
-extern bool is_free_buddy_page(struct page *page);
-#endif
 extern int user_min_free_kbytes;
 
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c46b75d14b6f..c7332a4bc8db 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7152,7 +7152,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
 }
 #endif
 
-#ifdef CONFIG_MEMORY_FAILURE
 bool is_free_buddy_page(struct page *page)
 {
 	struct zone *zone = page_zone(page);
@@ -7171,4 +7170,3 @@ bool is_free_buddy_page(struct page *page)
 
 	return order < MAX_ORDER;
 }
-#endif
-- 
cgit v1.2.3


From 698b1b30642f1ff0ea10ef1de9745ab633031377 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 17 Mar 2016 14:18:08 -0700
Subject: mm, compaction: introduce kcompactd

Memory compaction can be currently performed in several contexts:

 - kswapd balancing a zone after a high-order allocation failure
 - direct compaction to satisfy a high-order allocation, including THP
   page fault attemps
 - khugepaged trying to collapse a hugepage
 - manually from /proc

The purpose of compaction is two-fold.  The obvious purpose is to
satisfy a (pending or future) high-order allocation, and is easy to
evaluate.  The other purpose is to keep overal memory fragmentation low
and help the anti-fragmentation mechanism.  The success wrt the latter
purpose is more

The current situation wrt the purposes has a few drawbacks:

 - compaction is invoked only when a high-order page or hugepage is not
   available (or manually).  This might be too late for the purposes of
   keeping memory fragmentation low.
 - direct compaction increases latency of allocations.  Again, it would
   be better if compaction was performed asynchronously to keep
   fragmentation low, before the allocation itself comes.
 - (a special case of the previous) the cost of compaction during THP
   page faults can easily offset the benefits of THP.
 - kswapd compaction appears to be complex, fragile and not working in
   some scenarios.  It could also end up compacting for a high-order
   allocation request when it should be reclaiming memory for a later
   order-0 request.

To improve the situation, we should be able to benefit from an
equivalent of kswapd, but for compaction - i.e. a background thread
which responds to fragmentation and the need for high-order allocations
(including hugepages) somewhat proactively.

One possibility is to extend the responsibilities of kswapd, which could
however complicate its design too much.  It should be better to let
kswapd handle reclaim, as order-0 allocations are often more critical
than high-order ones.

Another possibility is to extend khugepaged, but this kthread is a
single instance and tied to THP configs.

This patch goes with the option of a new set of per-node kthreads called
kcompactd, and lays the foundations, without introducing any new
tunables.  The lifecycle mimics kswapd kthreads, including the memory
hotplug hooks.

For compaction, kcompactd uses the standard compaction_suitable() and
ompact_finished() criteria and the deferred compaction functionality.
Unlike direct compaction, it uses only sync compaction, as there's no
allocation latency to minimize.

This patch doesn't yet add a call to wakeup_kcompactd.  The kswapd
compact/reclaim loop for high-order pages will be replaced by waking up
kcompactd in the next patch with the description of what's wrong with
the old approach.

Waking up of the kcompactd threads is also tied to kswapd activity and
follows these rules:
 - we don't want to affect any fastpaths, so wake up kcompactd only from
   the slowpath, as it's done for kswapd
 - if kswapd is doing reclaim, it's more important than compaction, so
   don't invoke kcompactd until kswapd goes to sleep
 - the target order used for kswapd is passed to kcompactd

Future possible future uses for kcompactd include the ability to wake up
kcompactd on demand in special situations, such as when hugepages are
not available (currently not done due to __GFP_NO_KSWAPD) or when a
fragmentation event (i.e.  __rmqueue_fallback()) occurs.  It's also
possible to perform periodic compaction with kcompactd.

[arnd@arndb.de: fix build errors with kcompactd]
[paul.gortmaker@windriver.com: don't use modular references for non modular code]
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: David Rientjes <rientjes@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h        |  16 +++
 include/linux/mmzone.h            |   6 ++
 include/linux/vm_event_item.h     |   1 +
 include/trace/events/compaction.h |  55 ++++++++++
 mm/compaction.c                   | 222 ++++++++++++++++++++++++++++++++++++++
 mm/memory_hotplug.c               |   9 +-
 mm/page_alloc.c                   |   3 +
 mm/vmstat.c                       |   1 +
 8 files changed, 311 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 4cd4ddf64cc7..d7c8de583a23 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -52,6 +52,10 @@ extern void compaction_defer_reset(struct zone *zone, int order,
 				bool alloc_success);
 extern bool compaction_restarting(struct zone *zone, int order);
 
+extern int kcompactd_run(int nid);
+extern void kcompactd_stop(int nid);
+extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx);
+
 #else
 static inline unsigned long try_to_compact_pages(gfp_t gfp_mask,
 			unsigned int order, int alloc_flags,
@@ -84,6 +88,18 @@ static inline bool compaction_deferred(struct zone *zone, int order)
 	return true;
 }
 
+static inline int kcompactd_run(int nid)
+{
+	return 0;
+}
+static inline void kcompactd_stop(int nid)
+{
+}
+
+static inline void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
+{
+}
+
 #endif /* CONFIG_COMPACTION */
 
 #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6de02ac378a0..bdd9a270a813 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -668,6 +668,12 @@ typedef struct pglist_data {
 					   mem_hotplug_begin/end() */
 	int kswapd_max_order;
 	enum zone_type classzone_idx;
+#ifdef CONFIG_COMPACTION
+	int kcompactd_max_order;
+	enum zone_type kcompactd_classzone_idx;
+	wait_queue_head_t kcompactd_wait;
+	struct task_struct *kcompactd;
+#endif
 #ifdef CONFIG_NUMA_BALANCING
 	/* Lock serializing the migrate rate limiting window */
 	spinlock_t numabalancing_migrate_lock;
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 67c1dbd19c6d..58ecc056ee45 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -53,6 +53,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED,
 		COMPACTISOLATED,
 		COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
+		KCOMPACTD_WAKE,
 #endif
 #ifdef CONFIG_HUGETLB_PAGE
 		HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index 111e5666e5eb..e215bf68f521 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -350,6 +350,61 @@ DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_defer_reset,
 );
 #endif
 
+TRACE_EVENT(mm_compaction_kcompactd_sleep,
+
+	TP_PROTO(int nid),
+
+	TP_ARGS(nid),
+
+	TP_STRUCT__entry(
+		__field(int, nid)
+	),
+
+	TP_fast_assign(
+		__entry->nid = nid;
+	),
+
+	TP_printk("nid=%d", __entry->nid)
+);
+
+DECLARE_EVENT_CLASS(kcompactd_wake_template,
+
+	TP_PROTO(int nid, int order, enum zone_type classzone_idx),
+
+	TP_ARGS(nid, order, classzone_idx),
+
+	TP_STRUCT__entry(
+		__field(int, nid)
+		__field(int, order)
+		__field(enum zone_type, classzone_idx)
+	),
+
+	TP_fast_assign(
+		__entry->nid = nid;
+		__entry->order = order;
+		__entry->classzone_idx = classzone_idx;
+	),
+
+	TP_printk("nid=%d order=%d classzone_idx=%-8s",
+		__entry->nid,
+		__entry->order,
+		__print_symbolic(__entry->classzone_idx, ZONE_TYPE))
+);
+
+DEFINE_EVENT(kcompactd_wake_template, mm_compaction_wakeup_kcompactd,
+
+	TP_PROTO(int nid, int order, enum zone_type classzone_idx),
+
+	TP_ARGS(nid, order, classzone_idx)
+);
+
+DEFINE_EVENT(kcompactd_wake_template, mm_compaction_kcompactd_wake,
+
+	TP_PROTO(int nid, int order, enum zone_type classzone_idx),
+
+	TP_ARGS(nid, order, classzone_idx)
+);
+
 #endif /* _TRACE_COMPACTION_H */
 
 /* This part must be outside protection */
diff --git a/mm/compaction.c b/mm/compaction.c
index 93f71d968098..5b2bfbaa821a 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -7,6 +7,7 @@
  *
  * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
  */
+#include <linux/cpu.h>
 #include <linux/swap.h>
 #include <linux/migrate.h>
 #include <linux/compaction.h>
@@ -17,6 +18,8 @@
 #include <linux/balloon_compaction.h>
 #include <linux/page-isolation.h>
 #include <linux/kasan.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 #include "internal.h"
 
 #ifdef CONFIG_COMPACTION
@@ -1736,4 +1739,223 @@ void compaction_unregister_node(struct node *node)
 }
 #endif /* CONFIG_SYSFS && CONFIG_NUMA */
 
+static inline bool kcompactd_work_requested(pg_data_t *pgdat)
+{
+	return pgdat->kcompactd_max_order > 0;
+}
+
+static bool kcompactd_node_suitable(pg_data_t *pgdat)
+{
+	int zoneid;
+	struct zone *zone;
+	enum zone_type classzone_idx = pgdat->kcompactd_classzone_idx;
+
+	for (zoneid = 0; zoneid < classzone_idx; zoneid++) {
+		zone = &pgdat->node_zones[zoneid];
+
+		if (!populated_zone(zone))
+			continue;
+
+		if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
+					classzone_idx) == COMPACT_CONTINUE)
+			return true;
+	}
+
+	return false;
+}
+
+static void kcompactd_do_work(pg_data_t *pgdat)
+{
+	/*
+	 * With no special task, compact all zones so that a page of requested
+	 * order is allocatable.
+	 */
+	int zoneid;
+	struct zone *zone;
+	struct compact_control cc = {
+		.order = pgdat->kcompactd_max_order,
+		.classzone_idx = pgdat->kcompactd_classzone_idx,
+		.mode = MIGRATE_SYNC_LIGHT,
+		.ignore_skip_hint = true,
+
+	};
+	bool success = false;
+
+	trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
+							cc.classzone_idx);
+	count_vm_event(KCOMPACTD_WAKE);
+
+	for (zoneid = 0; zoneid < cc.classzone_idx; zoneid++) {
+		int status;
+
+		zone = &pgdat->node_zones[zoneid];
+		if (!populated_zone(zone))
+			continue;
+
+		if (compaction_deferred(zone, cc.order))
+			continue;
+
+		if (compaction_suitable(zone, cc.order, 0, zoneid) !=
+							COMPACT_CONTINUE)
+			continue;
+
+		cc.nr_freepages = 0;
+		cc.nr_migratepages = 0;
+		cc.zone = zone;
+		INIT_LIST_HEAD(&cc.freepages);
+		INIT_LIST_HEAD(&cc.migratepages);
+
+		status = compact_zone(zone, &cc);
+
+		if (zone_watermark_ok(zone, cc.order, low_wmark_pages(zone),
+						cc.classzone_idx, 0)) {
+			success = true;
+			compaction_defer_reset(zone, cc.order, false);
+		} else if (status == COMPACT_COMPLETE) {
+			/*
+			 * We use sync migration mode here, so we defer like
+			 * sync direct compaction does.
+			 */
+			defer_compaction(zone, cc.order);
+		}
+
+		VM_BUG_ON(!list_empty(&cc.freepages));
+		VM_BUG_ON(!list_empty(&cc.migratepages));
+	}
+
+	/*
+	 * Regardless of success, we are done until woken up next. But remember
+	 * the requested order/classzone_idx in case it was higher/tighter than
+	 * our current ones
+	 */
+	if (pgdat->kcompactd_max_order <= cc.order)
+		pgdat->kcompactd_max_order = 0;
+	if (pgdat->kcompactd_classzone_idx >= cc.classzone_idx)
+		pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
+}
+
+void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
+{
+	if (!order)
+		return;
+
+	if (pgdat->kcompactd_max_order < order)
+		pgdat->kcompactd_max_order = order;
+
+	if (pgdat->kcompactd_classzone_idx > classzone_idx)
+		pgdat->kcompactd_classzone_idx = classzone_idx;
+
+	if (!waitqueue_active(&pgdat->kcompactd_wait))
+		return;
+
+	if (!kcompactd_node_suitable(pgdat))
+		return;
+
+	trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order,
+							classzone_idx);
+	wake_up_interruptible(&pgdat->kcompactd_wait);
+}
+
+/*
+ * The background compaction daemon, started as a kernel thread
+ * from the init process.
+ */
+static int kcompactd(void *p)
+{
+	pg_data_t *pgdat = (pg_data_t*)p;
+	struct task_struct *tsk = current;
+
+	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
+
+	if (!cpumask_empty(cpumask))
+		set_cpus_allowed_ptr(tsk, cpumask);
+
+	set_freezable();
+
+	pgdat->kcompactd_max_order = 0;
+	pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
+
+	while (!kthread_should_stop()) {
+		trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
+		wait_event_freezable(pgdat->kcompactd_wait,
+				kcompactd_work_requested(pgdat));
+
+		kcompactd_do_work(pgdat);
+	}
+
+	return 0;
+}
+
+/*
+ * This kcompactd start function will be called by init and node-hot-add.
+ * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added.
+ */
+int kcompactd_run(int nid)
+{
+	pg_data_t *pgdat = NODE_DATA(nid);
+	int ret = 0;
+
+	if (pgdat->kcompactd)
+		return 0;
+
+	pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid);
+	if (IS_ERR(pgdat->kcompactd)) {
+		pr_err("Failed to start kcompactd on node %d\n", nid);
+		ret = PTR_ERR(pgdat->kcompactd);
+		pgdat->kcompactd = NULL;
+	}
+	return ret;
+}
+
+/*
+ * Called by memory hotplug when all memory in a node is offlined. Caller must
+ * hold mem_hotplug_begin/end().
+ */
+void kcompactd_stop(int nid)
+{
+	struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd;
+
+	if (kcompactd) {
+		kthread_stop(kcompactd);
+		NODE_DATA(nid)->kcompactd = NULL;
+	}
+}
+
+/*
+ * It's optimal to keep kcompactd on the same CPUs as their memory, but
+ * not required for correctness. So if the last cpu in a node goes
+ * away, we get changed to run anywhere: as the first one comes back,
+ * restore their cpu bindings.
+ */
+static int cpu_callback(struct notifier_block *nfb, unsigned long action,
+			void *hcpu)
+{
+	int nid;
+
+	if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
+		for_each_node_state(nid, N_MEMORY) {
+			pg_data_t *pgdat = NODE_DATA(nid);
+			const struct cpumask *mask;
+
+			mask = cpumask_of_node(pgdat->node_id);
+
+			if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
+				/* One of our CPUs online: restore mask */
+				set_cpus_allowed_ptr(pgdat->kcompactd, mask);
+		}
+	}
+	return NOTIFY_OK;
+}
+
+static int __init kcompactd_init(void)
+{
+	int nid;
+
+	for_each_node_state(nid, N_MEMORY)
+		kcompactd_run(nid);
+	hotcpu_notifier(cpu_callback, 0);
+	return 0;
+}
+subsys_initcall(kcompactd_init)
+
 #endif /* CONFIG_COMPACTION */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 24ea06393816..d9bcb26fc4df 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -33,6 +33,7 @@
 #include <linux/hugetlb.h>
 #include <linux/memblock.h>
 #include <linux/bootmem.h>
+#include <linux/compaction.h>
 
 #include <asm/tlbflush.h>
 
@@ -1105,8 +1106,10 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 
 	init_per_zone_wmark_min();
 
-	if (onlined_pages)
+	if (onlined_pages) {
 		kswapd_run(zone_to_nid(zone));
+		kcompactd_run(nid);
+	}
 
 	vm_total_pages = nr_free_pagecache_pages();
 
@@ -1880,8 +1883,10 @@ repeat:
 		zone_pcp_update(zone);
 
 	node_states_clear_node(node, &arg);
-	if (arg.status_change_nid >= 0)
+	if (arg.status_change_nid >= 0) {
 		kswapd_stop(node);
+		kcompactd_stop(node);
+	}
 
 	vm_total_pages = nr_free_pagecache_pages();
 	writeback_set_ratelimit();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b1fc19ebb8a2..25a75da53c27 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5405,6 +5405,9 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 #endif
 	init_waitqueue_head(&pgdat->kswapd_wait);
 	init_waitqueue_head(&pgdat->pfmemalloc_wait);
+#ifdef CONFIG_COMPACTION
+	init_waitqueue_head(&pgdat->kcompactd_wait);
+#endif
 	pgdat_page_ext_init(pgdat);
 
 	for (j = 0; j < MAX_NR_ZONES; j++) {
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 69ce64f7b8d7..f80066248c94 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -826,6 +826,7 @@ const char * const vmstat_text[] = {
 	"compact_stall",
 	"compact_fail",
 	"compact_success",
+	"compact_daemon_wake",
 #endif
 
 #ifdef CONFIG_HUGETLB_PAGE
-- 
cgit v1.2.3


From ee91ef6173e81819f5ff610c2485802081635657 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Thu, 17 Mar 2016 14:18:21 -0700
Subject: bufferhead: force inlining of buffer head flag operations

With both gcc 4.7.2 and 4.9.2, sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined.  See

    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122

With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
set_buffer_foo(), clear_buffer_foo() and similar functions get deinlined
about 60 times. Examples of disassembly:

<set_buffer_mapped> (14 copies, 43 calls):
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       f0 80 0f 20             lock orb $0x20,(%rdi)
       5d                      pop    %rbp
       c3                      retq
<buffer_mapped> (3 copies, 34 calls):
       48 8b 07                mov    (%rdi),%rax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       48 c1 e8 05             shr    $0x5,%rax
       83 e0 01                and    $0x1,%eax
       5d                      pop    %rbp
       c3                      retq
<set_buffer_new> (5 copies, 13 calls):
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       f0 80 0f 40             lock orb $0x40,(%rdi)
       5d                      pop    %rbp
       c3                      retq

This patch fixes this via s/inline/__always_inline/.
This decreases vmlinux by about 3 kbytes.

    text	    data	     bss	      dec	    hex	filename
88200439	19905208	36421632	144527279	89d4faf	vmlinux2
88197239	19905240	36421632	144524111	89d434f	vmlinux

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/buffer_head.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 89d9aa9e79bf..c67f052cc5e5 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -82,15 +82,15 @@ struct buffer_head {
  * and buffer_foo() functions.
  */
 #define BUFFER_FNS(bit, name)						\
-static inline void set_buffer_##name(struct buffer_head *bh)		\
+static __always_inline void set_buffer_##name(struct buffer_head *bh)	\
 {									\
 	set_bit(BH_##bit, &(bh)->b_state);				\
 }									\
-static inline void clear_buffer_##name(struct buffer_head *bh)		\
+static __always_inline void clear_buffer_##name(struct buffer_head *bh)	\
 {									\
 	clear_bit(BH_##bit, &(bh)->b_state);				\
 }									\
-static inline int buffer_##name(const struct buffer_head *bh)		\
+static __always_inline int buffer_##name(const struct buffer_head *bh)	\
 {									\
 	return test_bit(BH_##bit, &(bh)->b_state);			\
 }
@@ -99,11 +99,11 @@ static inline int buffer_##name(const struct buffer_head *bh)		\
  * test_set_buffer_foo() and test_clear_buffer_foo()
  */
 #define TAS_BUFFER_FNS(bit, name)					\
-static inline int test_set_buffer_##name(struct buffer_head *bh)	\
+static __always_inline int test_set_buffer_##name(struct buffer_head *bh) \
 {									\
 	return test_and_set_bit(BH_##bit, &(bh)->b_state);		\
 }									\
-static inline int test_clear_buffer_##name(struct buffer_head *bh)	\
+static __always_inline int test_clear_buffer_##name(struct buffer_head *bh) \
 {									\
 	return test_and_clear_bit(BH_##bit, &(bh)->b_state);		\
 }									\
-- 
cgit v1.2.3


From 4b0f326163f09e6d67f09dd5e1a70093cee710fc Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Thu, 17 Mar 2016 14:18:24 -0700
Subject: include/linux/page-flags.h: force inlining of selected page flag
 modifications

Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See

    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122

With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:

<SetPageUptodate> (43 copies, 141 calls):
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       f0 80 0f 08             lock orb $0x8,(%rdi)
       5d                      pop    %rbp
       c3                      retq

<PagePrivate> (10 copies, 134 calls):
       48 8b 07                mov    (%rdi),%rax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       48 c1 e8 0b             shr    $0xb,%rax
       83 e0 01                and    $0x1,%eax
       5d                      pop    %rbp
       c3                      retq

This patch fixes this via s/inline/__always_inline/.

Code size decrease after the patch is ~7k:

    text     data      bss       dec     hex filename
92125002 20826048 36417536 149368586 8e72f0a vmlinux
92118087 20826112 36417536 149361735 8e71447 vmlinux7_pageops_after

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 597695523679..f4ed4f1b0c77 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -144,12 +144,12 @@ static inline struct page *compound_head(struct page *page)
 	return page;
 }
 
-static inline int PageTail(struct page *page)
+static __always_inline int PageTail(struct page *page)
 {
 	return READ_ONCE(page->compound_head) & 1;
 }
 
-static inline int PageCompound(struct page *page)
+static __always_inline int PageCompound(struct page *page)
 {
 	return test_bit(PG_head, &page->flags) || PageTail(page);
 }
@@ -184,31 +184,31 @@ static inline int PageCompound(struct page *page)
  * Macros to create function definitions for page flags
  */
 #define TESTPAGEFLAG(uname, lname, policy)				\
-static inline int Page##uname(struct page *page)			\
+static __always_inline int Page##uname(struct page *page)		\
 	{ return test_bit(PG_##lname, &policy(page, 0)->flags); }
 
 #define SETPAGEFLAG(uname, lname, policy)				\
-static inline void SetPage##uname(struct page *page)			\
+static __always_inline void SetPage##uname(struct page *page)		\
 	{ set_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define CLEARPAGEFLAG(uname, lname, policy)				\
-static inline void ClearPage##uname(struct page *page)			\
+static __always_inline void ClearPage##uname(struct page *page)		\
 	{ clear_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define __SETPAGEFLAG(uname, lname, policy)				\
-static inline void __SetPage##uname(struct page *page)			\
+static __always_inline void __SetPage##uname(struct page *page)		\
 	{ __set_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define __CLEARPAGEFLAG(uname, lname, policy)				\
-static inline void __ClearPage##uname(struct page *page)		\
+static __always_inline void __ClearPage##uname(struct page *page)	\
 	{ __clear_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define TESTSETFLAG(uname, lname, policy)				\
-static inline int TestSetPage##uname(struct page *page)			\
+static __always_inline int TestSetPage##uname(struct page *page)	\
 	{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define TESTCLEARFLAG(uname, lname, policy)				\
-static inline int TestClearPage##uname(struct page *page)		\
+static __always_inline int TestClearPage##uname(struct page *page)	\
 	{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define PAGEFLAG(uname, lname, policy)					\
@@ -371,7 +371,7 @@ PAGEFLAG(Idle, idle, PF_ANY)
 #define PAGE_MAPPING_KSM	2
 #define PAGE_MAPPING_FLAGS	(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM)
 
-static inline int PageAnon(struct page *page)
+static __always_inline int PageAnon(struct page *page)
 {
 	page = compound_head(page);
 	return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
@@ -384,7 +384,7 @@ static inline int PageAnon(struct page *page)
  * is found in VM_MERGEABLE vmas.  It's a PageAnon page, pointing not to any
  * anon_vma, but to that page's node of the stable tree.
  */
-static inline int PageKsm(struct page *page)
+static __always_inline int PageKsm(struct page *page)
 {
 	page = compound_head(page);
 	return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
@@ -415,14 +415,14 @@ static inline int PageUptodate(struct page *page)
 	return ret;
 }
 
-static inline void __SetPageUptodate(struct page *page)
+static __always_inline void __SetPageUptodate(struct page *page)
 {
 	VM_BUG_ON_PAGE(PageTail(page), page);
 	smp_wmb();
 	__set_bit(PG_uptodate, &page->flags);
 }
 
-static inline void SetPageUptodate(struct page *page)
+static __always_inline void SetPageUptodate(struct page *page)
 {
 	VM_BUG_ON_PAGE(PageTail(page), page);
 	/*
@@ -456,12 +456,12 @@ static inline void set_page_writeback_keepwrite(struct page *page)
 
 __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY)
 
-static inline void set_compound_head(struct page *page, struct page *head)
+static __always_inline void set_compound_head(struct page *page, struct page *head)
 {
 	WRITE_ONCE(page->compound_head, (unsigned long)head + 1);
 }
 
-static inline void clear_compound_head(struct page *page)
+static __always_inline void clear_compound_head(struct page *page)
 {
 	WRITE_ONCE(page->compound_head, 0);
 }
-- 
cgit v1.2.3


From b6ecd2dea4435a771a99c497a6ac5df6d3618c5a Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@virtuozzo.com>
Date: Thu, 17 Mar 2016 14:18:33 -0700
Subject: mm: memcontrol: zap memcg_kmem_online helper

As kmem accounting is now either enabled for all cgroups or disabled
system-wide, there's no point in having memcg_kmem_online() helper -
instead one can use memcg_kmem_enabled() and mem_cgroup_online(), as
shrink_slab() now does.

There are only two places left where this helper is used -
__memcg_kmem_charge() and memcg_create_kmem_cache().  The former can
only be called if memcg_kmem_enabled() returned true.  Since the cgroup
it operates on is online, mem_cgroup_is_root() check will be enough.

memcg_create_kmem_cache() can't use mem_cgroup_online() helper instead
of memcg_kmem_online(), because it relies on the fact that in
memcg_offline_kmem() memcg->kmem_state is changed before
memcg_deactivate_kmem_caches() is called, but there we can just
open-code the check.

Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 10 ----------
 mm/memcontrol.c            |  2 +-
 mm/slab_common.c           |  2 +-
 3 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d6300313b298..bc8e4e22f58f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -795,11 +795,6 @@ static inline bool memcg_kmem_enabled(void)
 	return static_branch_unlikely(&memcg_kmem_enabled_key);
 }
 
-static inline bool memcg_kmem_online(struct mem_cgroup *memcg)
-{
-	return memcg->kmem_state == KMEM_ONLINE;
-}
-
 /*
  * In general, we'll do everything in our power to not incur in any overhead
  * for non-memcg users for the kmem functions. Not even a function call, if we
@@ -909,11 +904,6 @@ static inline bool memcg_kmem_enabled(void)
 	return false;
 }
 
-static inline bool memcg_kmem_online(struct mem_cgroup *memcg)
-{
-	return false;
-}
-
 static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
 {
 	return 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 28d1b1e9d4fb..341bf86d26c2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2346,7 +2346,7 @@ int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
 	int ret = 0;
 
 	memcg = get_mem_cgroup_from_mm(current->mm);
-	if (memcg_kmem_online(memcg))
+	if (!mem_cgroup_is_root(memcg))
 		ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg);
 	css_put(&memcg->css);
 	return ret;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 6afb2263a5c5..8addc3c4df37 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -510,7 +510,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
 	 * The memory cgroup could have been offlined while the cache
 	 * creation work was pending.
 	 */
-	if (!memcg_kmem_online(memcg))
+	if (memcg->kmem_state != KMEM_ONLINE)
 		goto out_unlock;
 
 	idx = memcg_cache_id(memcg);
-- 
cgit v1.2.3


From 0a6b76dd23fa08c5fd7b68acdb55018a37afd4aa Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@virtuozzo.com>
Date: Thu, 17 Mar 2016 14:18:42 -0700
Subject: mm: workingset: make shadow node shrinker memcg aware

Workingset code was recently made memcg aware, but shadow node shrinker
is still global.  As a result, one small cgroup can consume all memory
available for shadow nodes, possibly hurting other cgroups by reclaiming
their shadow nodes, even though reclaim distances stored in its shadow
nodes have no effect.  To avoid this, we need to make shadow node
shrinker memcg aware.

Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 10 ++++++++++
 mm/memcontrol.c            |  5 ++---
 mm/workingset.c            | 10 +++++++---
 3 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bc8e4e22f58f..1191d79aa495 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -403,6 +403,9 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 		int nr_pages);
 
+unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
+					   int nid, unsigned int lru_mask);
+
 static inline
 unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 {
@@ -661,6 +664,13 @@ mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 {
 }
 
+static inline unsigned long
+mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
+			     int nid, unsigned int lru_mask)
+{
+	return 0;
+}
+
 static inline void
 mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 341bf86d26c2..ae8b81c55685 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -638,9 +638,8 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
 	__this_cpu_add(memcg->stat->nr_page_events, nr_pages);
 }
 
-static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
-						  int nid,
-						  unsigned int lru_mask)
+unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
+					   int nid, unsigned int lru_mask)
 {
 	unsigned long nr = 0;
 	int zid;
diff --git a/mm/workingset.c b/mm/workingset.c
index 68e8cd94ebe4..8a75f8d2916a 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -349,8 +349,12 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 	shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc);
 	local_irq_enable();
 
-	pages = node_page_state(sc->nid, NR_ACTIVE_FILE) +
-		node_page_state(sc->nid, NR_INACTIVE_FILE);
+	if (memcg_kmem_enabled())
+		pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
+						     LRU_ALL_FILE);
+	else
+		pages = node_page_state(sc->nid, NR_ACTIVE_FILE) +
+			node_page_state(sc->nid, NR_INACTIVE_FILE);
 
 	/*
 	 * Active cache pages are limited to 50% of memory, and shadow
@@ -460,7 +464,7 @@ static struct shrinker workingset_shadow_shrinker = {
 	.count_objects = count_shadow_nodes,
 	.scan_objects = scan_shadow_nodes,
 	.seeks = DEFAULT_SEEKS,
-	.flags = SHRINKER_NUMA_AWARE,
+	.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE,
 };
 
 /*
-- 
cgit v1.2.3


From f9719a03de51e13526d614e79d002f838770b2d6 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 17 Mar 2016 14:18:45 -0700
Subject: thp, vmstats: count deferred split events

Count how many times we put a THP in split queue.  Currently, it happens
on partial unmap of a THP.

Rapidly growing value can indicate that an application behaves
unfriendly wrt THP: often fault in huge page and then unmap part of it.
This leads to unnecessary memory fragmentation and the application may
require tuning.

The event also can help with debugging kernel [mis-]behaviour.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/transhuge.txt | 5 +++++
 include/linux/vm_event_item.h  | 1 +
 mm/huge_memory.c               | 1 +
 mm/vmstat.c                    | 1 +
 4 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
index 21cf34f3ddb2..0dc8632aa01e 100644
--- a/Documentation/vm/transhuge.txt
+++ b/Documentation/vm/transhuge.txt
@@ -229,6 +229,11 @@ thp_split_page is incremented every time a huge page is split into base
 thp_split_page_failed is is incremented if kernel fails to split huge
 	page. This can happen if the page was pinned by somebody.
 
+thp_deferred_split_page is incremented when a huge page is put onto split
+	queue. This happens when a huge page is partially unmapped and
+	splitting it would free up some memory. Pages on split queue are
+	going to be split under memory pressure.
+
 thp_split_pmd is incremented every time a PMD split into table of PTEs.
 	This can happen, for instance, when application calls mprotect() or
 	munmap() on part of huge page. It doesn't split huge page, only
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 58ecc056ee45..ec084321fe09 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -72,6 +72,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_COLLAPSE_ALLOC_FAILED,
 		THP_SPLIT_PAGE,
 		THP_SPLIT_PAGE_FAILED,
+		THP_DEFERRED_SPLIT_PAGE,
 		THP_SPLIT_PMD,
 		THP_ZERO_PAGE_ALLOC,
 		THP_ZERO_PAGE_ALLOC_FAILED,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1ea21e203a70..1dddfb21fc22 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3455,6 +3455,7 @@ void deferred_split_huge_page(struct page *page)
 
 	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
 	if (list_empty(page_deferred_list(page))) {
+		count_vm_event(THP_DEFERRED_SPLIT_PAGE);
 		list_add_tail(page_deferred_list(page), &pgdata->split_queue);
 		pgdata->split_queue_len++;
 	}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index f80066248c94..5e4300482897 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -848,6 +848,7 @@ const char * const vmstat_text[] = {
 	"thp_collapse_alloc_failed",
 	"thp_split_page",
 	"thp_split_page_failed",
+	"thp_deferred_split_page",
 	"thp_split_pmd",
 	"thp_zero_page_alloc",
 	"thp_zero_page_alloc_failed",
-- 
cgit v1.2.3


From ea606cf5d8df370e7932460dfd960b21f20e7c6d Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Thu, 17 Mar 2016 14:18:48 -0700
Subject: mm: move max_map_count bits into mm.h

max_map_count sysctl unrelated to scheduler. Move its bits from
include/linux/sched/sysctl.h to include/linux/mm.h.

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h           | 21 +++++++++++++++++++++
 include/linux/sched/sysctl.h | 21 ---------------------
 mm/mmap.c                    |  1 -
 mm/mremap.c                  |  1 -
 mm/nommu.c                   |  1 -
 5 files changed, 21 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dbf1eddab964..6922adf41938 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -82,6 +82,27 @@ extern int mmap_rnd_compat_bits __read_mostly;
 #define mm_forbids_zeropage(X)	(0)
 #endif
 
+/*
+ * Default maximum number of active map areas, this limits the number of vmas
+ * per mm struct. Users can overwrite this number by sysctl but there is a
+ * problem.
+ *
+ * When a program's coredump is generated as ELF format, a section is created
+ * per a vma. In ELF, the number of sections is represented in unsigned short.
+ * This means the number of sections should be smaller than 65535 at coredump.
+ * Because the kernel adds some informative sections to a image of program at
+ * generating coredump, we need some margin. The number of extra sections is
+ * 1-3 now and depends on arch. We use "5" as safe margin, here.
+ *
+ * ELF extended numbering allows more than 65535 sections, so 16-bit bound is
+ * not a hard limit any more. Although some userspace tools can be surprised by
+ * that.
+ */
+#define MAPCOUNT_ELF_CORE_MARGIN	(5)
+#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+
+extern int sysctl_max_map_count;
+
 extern unsigned long sysctl_user_reserve_kbytes;
 extern unsigned long sysctl_admin_reserve_kbytes;
 
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 4f080ab4f2cd..22db1e63707e 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -14,27 +14,6 @@ extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 enum { sysctl_hung_task_timeout_secs = 0 };
 #endif
 
-/*
- * Default maximum number of active map areas, this limits the number of vmas
- * per mm struct. Users can overwrite this number by sysctl but there is a
- * problem.
- *
- * When a program's coredump is generated as ELF format, a section is created
- * per a vma. In ELF, the number of sections is represented in unsigned short.
- * This means the number of sections should be smaller than 65535 at coredump.
- * Because the kernel adds some informative sections to a image of program at
- * generating coredump, we need some margin. The number of extra sections is
- * 1-3 now and depends on arch. We use "5" as safe margin, here.
- *
- * ELF extended numbering allows more than 65535 sections, so 16-bit bound is
- * not a hard limit any more. Although some userspace tools can be surprised by
- * that.
- */
-#define MAPCOUNT_ELF_CORE_MARGIN	(5)
-#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
-
-extern int sysctl_max_map_count;
-
 extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
diff --git a/mm/mmap.c b/mm/mmap.c
index 90e3b869a8b9..676f422f2e2c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -37,7 +37,6 @@
 #include <linux/khugepaged.h>
 #include <linux/uprobes.h>
 #include <linux/rbtree_augmented.h>
-#include <linux/sched/sysctl.h>
 #include <linux/notifier.h>
 #include <linux/memory.h>
 #include <linux/printk.h>
diff --git a/mm/mremap.c b/mm/mremap.c
index 8eeba02fc991..e30c8a6489a6 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -20,7 +20,6 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/mmu_notifier.h>
-#include <linux/sched/sysctl.h>
 #include <linux/uaccess.h>
 #include <linux/mm-arch-hooks.h>
 
diff --git a/mm/nommu.c b/mm/nommu.c
index fbf6f0f1d6c9..9bdf8b119078 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -33,7 +33,6 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/audit.h>
-#include <linux/sched/sysctl.h>
 #include <linux/printk.h>
 
 #include <asm/uaccess.h>
-- 
cgit v1.2.3


From bcf6691797f425b301f629bb783b7ff2d0bcfa5a Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 17 Mar 2016 14:18:53 -0700
Subject: mm, tracing: refresh __def_vmaflag_names

Get list of VMA flags up-to-date and sort it to match VM_* definition
order.

[vbabka@suse.cz: add a note above vmaflag definitions to update the names when changing]
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h             |  1 +
 include/trace/events/mmflags.h | 23 ++++++++++++++++-------
 2 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6922adf41938..b3202612bb95 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -143,6 +143,7 @@ extern unsigned int kobjsize(const void *objp);
 
 /*
  * vm_flags in vm_area_struct, see mm_types.h.
+ * When changing, update also include/trace/events/mmflags.h
  */
 #define VM_NONE		0x00000000
 
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index a849185c82f0..43cedbf0c759 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -111,15 +111,21 @@ IF_HAVE_PG_IDLE(PG_idle,		"idle"		)
 	) : "none"
 
 #if defined(CONFIG_X86)
-#define __VM_ARCH_SPECIFIC {VM_PAT,     "pat"           }
+#define __VM_ARCH_SPECIFIC_1 {VM_PAT,     "pat"           }
 #elif defined(CONFIG_PPC)
-#define __VM_ARCH_SPECIFIC {VM_SAO,     "sao"           }
+#define __VM_ARCH_SPECIFIC_1 {VM_SAO,     "sao"           }
 #elif defined(CONFIG_PARISC) || defined(CONFIG_METAG) || defined(CONFIG_IA64)
-#define __VM_ARCH_SPECIFIC {VM_GROWSUP,	"growsup"	}
+#define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP,	"growsup"	}
 #elif !defined(CONFIG_MMU)
-#define __VM_ARCH_SPECIFIC {VM_MAPPED_COPY,"mappedcopy"	}
+#define __VM_ARCH_SPECIFIC_1 {VM_MAPPED_COPY,"mappedcopy"	}
 #else
-#define __VM_ARCH_SPECIFIC {VM_ARCH_1,	"arch_1"	}
+#define __VM_ARCH_SPECIFIC_1 {VM_ARCH_1,	"arch_1"	}
+#endif
+
+#if defined(CONFIG_X86)
+#define __VM_ARCH_SPECIFIC_2 {VM_MPX,		"mpx"		}
+#else
+#define __VM_ARCH_SPECIFIC_2 {VM_ARCH_2,	"arch_2"	}
 #endif
 
 #ifdef CONFIG_MEM_SOFT_DIRTY
@@ -138,19 +144,22 @@ IF_HAVE_PG_IDLE(PG_idle,		"idle"		)
 	{VM_MAYEXEC,			"mayexec"	},		\
 	{VM_MAYSHARE,			"mayshare"	},		\
 	{VM_GROWSDOWN,			"growsdown"	},		\
+	{VM_UFFD_MISSING,		"uffd_missing"	},		\
 	{VM_PFNMAP,			"pfnmap"	},		\
 	{VM_DENYWRITE,			"denywrite"	},		\
-	{VM_LOCKONFAULT,		"lockonfault"	},		\
+	{VM_UFFD_WP,			"uffd_wp"	},		\
 	{VM_LOCKED,			"locked"	},		\
 	{VM_IO,				"io"		},		\
 	{VM_SEQ_READ,			"seqread"	},		\
 	{VM_RAND_READ,			"randread"	},		\
 	{VM_DONTCOPY,			"dontcopy"	},		\
 	{VM_DONTEXPAND,			"dontexpand"	},		\
+	{VM_LOCKONFAULT,		"lockonfault"	},		\
 	{VM_ACCOUNT,			"account"	},		\
 	{VM_NORESERVE,			"noreserve"	},		\
 	{VM_HUGETLB,			"hugetlb"	},		\
-	__VM_ARCH_SPECIFIC				,		\
+	__VM_ARCH_SPECIFIC_1				,		\
+	__VM_ARCH_SPECIFIC_2				,		\
 	{VM_DONTDUMP,			"dontdump"	},		\
 IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,	"softdirty"	)		\
 	{VM_MIXEDMAP,			"mixedmap"	},		\
-- 
cgit v1.2.3


From d02bd27bd33dd7e8d22594cd568b81be0cb584cd Mon Sep 17 00:00:00 2001
From: Igor Redko <redkoi@virtuozzo.com>
Date: Thu, 17 Mar 2016 14:19:05 -0700
Subject: mm/page_alloc.c: calculate 'available' memory in a separate function

Add a new field, VIRTIO_BALLOON_S_AVAIL, to virtio_balloon memory
statistics protocol, corresponding to 'Available' in /proc/meminfo.

It indicates to the hypervisor how big the balloon can be inflated
without pushing the guest system to swap.  This metric would be very
useful in VM orchestration software to improve memory management of
different VMs under overcommit.

This patch (of 2):

Factor out calculation of the available memory counter into a separate
exportable function, in order to be able to use it in other parts of the
kernel.

In particular, it appears a relevant metric to report to the hypervisor
via virtio-balloon statistics interface (in a followup patch).

Signed-off-by: Igor Redko <redkoi@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/meminfo.c  | 31 +------------------------------
 include/linux/mm.h |  1 +
 mm/page_alloc.c    | 43 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index df4661abadc4..83720460c5bc 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -29,10 +29,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 	unsigned long committed;
 	long cached;
 	long available;
-	unsigned long pagecache;
-	unsigned long wmark_low = 0;
 	unsigned long pages[NR_LRU_LISTS];
-	struct zone *zone;
 	int lru;
 
 /*
@@ -51,33 +48,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
 		pages[lru] = global_page_state(NR_LRU_BASE + lru);
 
-	for_each_zone(zone)
-		wmark_low += zone->watermark[WMARK_LOW];
-
-	/*
-	 * Estimate the amount of memory available for userspace allocations,
-	 * without causing swapping.
-	 */
-	available = i.freeram - totalreserve_pages;
-
-	/*
-	 * Not all the page cache can be freed, otherwise the system will
-	 * start swapping. Assume at least half of the page cache, or the
-	 * low watermark worth of cache, needs to stay.
-	 */
-	pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
-	pagecache -= min(pagecache / 2, wmark_low);
-	available += pagecache;
-
-	/*
-	 * Part of the reclaimable slab consists of items that are in use,
-	 * and cannot be freed. Cap this estimate at the low watermark.
-	 */
-	available += global_page_state(NR_SLAB_RECLAIMABLE) -
-		     min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
-
-	if (available < 0)
-		available = 0;
+	available = si_mem_available();
 
 	/*
 	 * Tagged format, for easy grepping and expansion.
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b3202612bb95..db9df3f78de1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1875,6 +1875,7 @@ extern int __meminit init_per_zone_wmark_min(void);
 extern void mem_init(void);
 extern void __init mmap_init(void);
 extern void show_mem(unsigned int flags);
+extern long si_mem_available(void);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 25a75da53c27..941b802e11ec 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3713,6 +3713,49 @@ static inline void show_node(struct zone *zone)
 		printk("Node %d ", zone_to_nid(zone));
 }
 
+long si_mem_available(void)
+{
+	long available;
+	unsigned long pagecache;
+	unsigned long wmark_low = 0;
+	unsigned long pages[NR_LRU_LISTS];
+	struct zone *zone;
+	int lru;
+
+	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
+		pages[lru] = global_page_state(NR_LRU_BASE + lru);
+
+	for_each_zone(zone)
+		wmark_low += zone->watermark[WMARK_LOW];
+
+	/*
+	 * Estimate the amount of memory available for userspace allocations,
+	 * without causing swapping.
+	 */
+	available = global_page_state(NR_FREE_PAGES) - totalreserve_pages;
+
+	/*
+	 * Not all the page cache can be freed, otherwise the system will
+	 * start swapping. Assume at least half of the page cache, or the
+	 * low watermark worth of cache, needs to stay.
+	 */
+	pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
+	pagecache -= min(pagecache / 2, wmark_low);
+	available += pagecache;
+
+	/*
+	 * Part of the reclaimable slab consists of items that are in use,
+	 * and cannot be freed. Cap this estimate at the low watermark.
+	 */
+	available += global_page_state(NR_SLAB_RECLAIMABLE) -
+		     min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
+
+	if (available < 0)
+		available = 0;
+	return available;
+}
+EXPORT_SYMBOL_GPL(si_mem_available);
+
 void si_meminfo(struct sysinfo *val)
 {
 	val->totalram = totalram_pages;
-- 
cgit v1.2.3


From 3ed3a4f0ddffece942bb2661924d87be4ce63cb7 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 17 Mar 2016 14:19:11 -0700
Subject: mm: cleanup *pte_alloc* interfaces

There are few things about *pte_alloc*() helpers worth cleaning up:

 - 'vma' argument is unused, let's drop it;

 - most __pte_alloc() callers do speculative check for pmd_none(),
   before taking ptl: let's introduce pte_alloc() macro which does
   the check.

   The only direct user of __pte_alloc left is userfaultfd, which has
   different expectation about atomicity wrt pmd.

 - pte_alloc_map() and pte_alloc_map_lock() are redefined using
   pte_alloc().

[sudeep.holla@arm.com: fix build for arm64 hugetlbpage]
[sfr@canb.auug.org.au: fix arch/arm/mm/mmu.c some more]
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/mmu.c            |  6 +++---
 arch/arm/mm/pgd.c            |  2 +-
 arch/arm64/mm/hugetlbpage.c  |  2 +-
 arch/ia64/mm/hugetlbpage.c   |  2 +-
 arch/metag/mm/hugetlbpage.c  |  2 +-
 arch/parisc/mm/hugetlbpage.c |  2 +-
 arch/sh/mm/hugetlbpage.c     |  2 +-
 arch/sparc/mm/hugetlbpage.c  |  2 +-
 arch/tile/mm/hugetlbpage.c   |  2 +-
 arch/um/kernel/skas/mmu.c    |  2 +-
 arch/unicore32/mm/pgd.c      |  2 +-
 arch/x86/kernel/tboot.c      |  2 +-
 include/linux/mm.h           | 17 ++++++++---------
 mm/memory.c                  |  8 +++-----
 mm/mremap.c                  |  3 +--
 mm/userfaultfd.c             |  3 +--
 16 files changed, 27 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 434d76f0b363..88fbe0d23ca6 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -732,7 +732,7 @@ static void *__init late_alloc(unsigned long sz)
 	return ptr;
 }
 
-static pte_t * __init pte_alloc(pmd_t *pmd, unsigned long addr,
+static pte_t * __init arm_pte_alloc(pmd_t *pmd, unsigned long addr,
 				unsigned long prot,
 				void *(*alloc)(unsigned long sz))
 {
@@ -747,7 +747,7 @@ static pte_t * __init pte_alloc(pmd_t *pmd, unsigned long addr,
 static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr,
 				      unsigned long prot)
 {
-	return pte_alloc(pmd, addr, prot, early_alloc);
+	return arm_pte_alloc(pmd, addr, prot, early_alloc);
 }
 
 static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
@@ -756,7 +756,7 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
 				  void *(*alloc)(unsigned long sz),
 				  bool ng)
 {
-	pte_t *pte = pte_alloc(pmd, addr, type->prot_l1, alloc);
+	pte_t *pte = arm_pte_alloc(pmd, addr, type->prot_l1, alloc);
 	do {
 		set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)),
 			    ng ? PTE_EXT_NG : 0);
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index e683db1b90a3..b8d477321730 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -80,7 +80,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 		if (!new_pmd)
 			goto no_pmd;
 
-		new_pte = pte_alloc_map(mm, NULL, new_pmd, 0);
+		new_pte = pte_alloc_map(mm, new_pmd, 0);
 		if (!new_pte)
 			goto no_pte;
 
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index da30529bb1f6..589fd28e1fb5 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -124,7 +124,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 		 * will be no pte_unmap() to correspond with this
 		 * pte_alloc_map().
 		 */
-		pte = pte_alloc_map(mm, NULL, pmd, addr);
+		pte = pte_alloc_map(mm, pmd, addr);
 	} else if (sz == PMD_SIZE) {
 		if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
 		    pud_none(*pud))
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index f50d4b3f501a..85de86d36fdf 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -38,7 +38,7 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
 	if (pud) {
 		pmd = pmd_alloc(mm, pud, taddr);
 		if (pmd)
-			pte = pte_alloc_map(mm, NULL, pmd, taddr);
+			pte = pte_alloc_map(mm, pmd, taddr);
 	}
 	return pte;
 }
diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c
index 53f0f6c47027..b38700ae4e84 100644
--- a/arch/metag/mm/hugetlbpage.c
+++ b/arch/metag/mm/hugetlbpage.c
@@ -67,7 +67,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	pgd = pgd_offset(mm, addr);
 	pud = pud_offset(pgd, addr);
 	pmd = pmd_offset(pud, addr);
-	pte = pte_alloc_map(mm, NULL, pmd, addr);
+	pte = pte_alloc_map(mm, pmd, addr);
 	pgd->pgd &= ~_PAGE_SZ_MASK;
 	pgd->pgd |= _PAGE_SZHUGE;
 
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index 54ba39262b82..5d6eea925cf4 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -63,7 +63,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	if (pud) {
 		pmd = pmd_alloc(mm, pud, addr);
 		if (pmd)
-			pte = pte_alloc_map(mm, NULL, pmd, addr);
+			pte = pte_alloc_map(mm, pmd, addr);
 	}
 	return pte;
 }
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 6385f60209b6..cc948db74878 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -35,7 +35,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 		if (pud) {
 			pmd = pmd_alloc(mm, pud, addr);
 			if (pmd)
-				pte = pte_alloc_map(mm, NULL, pmd, addr);
+				pte = pte_alloc_map(mm, pmd, addr);
 		}
 	}
 
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 131eaf4ad7f5..4977800e9770 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -146,7 +146,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	if (pud) {
 		pmd = pmd_alloc(mm, pud, addr);
 		if (pmd)
-			pte = pte_alloc_map(mm, NULL, pmd, addr);
+			pte = pte_alloc_map(mm, pmd, addr);
 	}
 	return pte;
 }
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index c034dc3fe2d4..e212c64682c5 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -77,7 +77,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 		else {
 			if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE])
 				panic("Unexpected page size %#lx\n", sz);
-			return pte_alloc_map(mm, NULL, pmd, addr);
+			return pte_alloc_map(mm, pmd, addr);
 		}
 	}
 #else
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 9591a66aa5c5..3943e9d7d13d 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -31,7 +31,7 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 	if (!pmd)
 		goto out_pmd;
 
-	pte = pte_alloc_map(mm, NULL, pmd, proc);
+	pte = pte_alloc_map(mm, pmd, proc);
 	if (!pte)
 		goto out_pte;
 
diff --git a/arch/unicore32/mm/pgd.c b/arch/unicore32/mm/pgd.c
index 2ade20d8eab3..c572a28c76c9 100644
--- a/arch/unicore32/mm/pgd.c
+++ b/arch/unicore32/mm/pgd.c
@@ -54,7 +54,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 		if (!new_pmd)
 			goto no_pmd;
 
-		new_pte = pte_alloc_map(mm, NULL, new_pmd, 0);
+		new_pte = pte_alloc_map(mm, new_pmd, 0);
 		if (!new_pte)
 			goto no_pte;
 
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 91a4496db434..e72a07f20b05 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -135,7 +135,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
 	pmd = pmd_alloc(&tboot_mm, pud, vaddr);
 	if (!pmd)
 		return -1;
-	pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr);
+	pte = pte_alloc_map(&tboot_mm, pmd, vaddr);
 	if (!pte)
 		return -1;
 	set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
diff --git a/include/linux/mm.h b/include/linux/mm.h
index db9df3f78de1..75d1907b9009 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1545,8 +1545,7 @@ static inline void mm_dec_nr_pmds(struct mm_struct *mm)
 }
 #endif
 
-int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
-		pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
 int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
 
 /*
@@ -1672,15 +1671,15 @@ static inline void pgtable_page_dtor(struct page *page)
 	pte_unmap(pte);					\
 } while (0)
 
-#define pte_alloc_map(mm, vma, pmd, address)				\
-	((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma,	\
-							pmd, address))?	\
-	 NULL: pte_offset_map(pmd, address))
+#define pte_alloc(mm, pmd, address)			\
+	(unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address))
+
+#define pte_alloc_map(mm, pmd, address)			\
+	(pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address))
 
 #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
-	((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL,	\
-							pmd, address))?	\
-		NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
+	(pte_alloc(mm, pmd, address) ?			\
+		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)			\
 	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
diff --git a/mm/memory.c b/mm/memory.c
index 0e247642ed5b..1974fc02c4d0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -562,8 +562,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	}
 }
 
-int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
-		pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 {
 	spinlock_t *ptl;
 	pgtable_t new = pte_alloc_one(mm, address);
@@ -3419,12 +3418,11 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	/*
-	 * Use __pte_alloc instead of pte_alloc_map, because we can't
+	 * Use pte_alloc() instead of pte_alloc_map, because we can't
 	 * run pte_offset_map on the pmd, if an huge pmd could
 	 * materialize from under us from a different thread.
 	 */
-	if (unlikely(pmd_none(*pmd)) &&
-	    unlikely(__pte_alloc(mm, vma, pmd, address)))
+	if (unlikely(pte_alloc(mm, pmd, address)))
 		return VM_FAULT_OOM;
 	/*
 	 * If a huge pmd materialized under us just retry later.  Use
diff --git a/mm/mremap.c b/mm/mremap.c
index e30c8a6489a6..3fa0a467df66 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -213,8 +213,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 				continue;
 			VM_BUG_ON(pmd_trans_huge(*old_pmd));
 		}
-		if (pmd_none(*new_pmd) && __pte_alloc(new_vma->vm_mm, new_vma,
-						      new_pmd, new_addr))
+		if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
 			break;
 		next = (new_addr + PMD_SIZE) & PMD_MASK;
 		if (extent > next - new_addr)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 806b0c758c5b..9f3a0290b273 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -230,8 +230,7 @@ retry:
 			break;
 		}
 		if (unlikely(pmd_none(dst_pmdval)) &&
-		    unlikely(__pte_alloc(dst_mm, dst_vma, dst_pmd,
-					 dst_addr))) {
+		    unlikely(__pte_alloc(dst_mm, dst_pmd, dst_addr))) {
 			err = -ENOMEM;
 			break;
 		}
-- 
cgit v1.2.3


From 795ae7a0de6b834a0cc202aa55c190ef81496665 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 17 Mar 2016 14:19:14 -0700
Subject: mm: scale kswapd watermarks in proportion to memory

In machines with 140G of memory and enterprise flash storage, we have
seen read and write bursts routinely exceed the kswapd watermarks and
cause thundering herds in direct reclaim.  Unfortunately, the only way
to tune kswapd aggressiveness is through adjusting min_free_kbytes - the
system's emergency reserves - which is entirely unrelated to the
system's latency requirements.  In order to get kswapd to maintain a
250M buffer of free memory, the emergency reserves need to be set to 1G.
That is a lot of memory wasted for no good reason.

On the other hand, it's reasonable to assume that allocation bursts and
overall allocation concurrency scale with memory capacity, so it makes
sense to make kswapd aggressiveness a function of that as well.

Change the kswapd watermark scale factor from the currently fixed 25% of
the tunable emergency reserve to a tunable 0.1% of memory.

Beyond 1G of memory, this will produce bigger watermark steps than the
current formula in default settings.  Ensure that the new formula never
chooses steps smaller than that, i.e.  25% of the emergency reserve.

On a 140G machine, this raises the default watermark steps - the
distance between min and low, and low and high - from 16M to 143M.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/vm.txt | 18 ++++++++++++++++++
 include/linux/mm.h          |  1 +
 include/linux/mmzone.h      |  2 ++
 kernel/sysctl.c             | 10 ++++++++++
 mm/page_alloc.c             | 29 +++++++++++++++++++++++++++--
 5 files changed, 58 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 89a887c76629..cb0368459da3 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -803,6 +803,24 @@ performance impact. Reclaim code needs to take various locks to find freeable
 directory and inode objects. With vfs_cache_pressure=1000, it will look for
 ten times more freeable objects than there are.
 
+=============================================================
+
+watermark_scale_factor:
+
+This factor controls the aggressiveness of kswapd. It defines the
+amount of memory left in a node/system before kswapd is woken up and
+how much memory needs to be free before kswapd goes back to sleep.
+
+The unit is in fractions of 10,000. The default value of 10 means the
+distances between watermarks are 0.1% of the available memory in the
+node/system. The maximum value is 1000, or 10% of memory.
+
+A high rate of threads entering direct reclaim (allocstall) or kswapd
+going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate
+that the number of free pages kswapd maintains for latency reasons is
+too small for the allocation bursts occurring in the system. This knob
+can then be used to tune kswapd aggressiveness accordingly.
+
 ==============================================================
 
 zone_reclaim_mode:
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 75d1907b9009..f7fd64227d3a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1889,6 +1889,7 @@ extern void zone_pcp_reset(struct zone *zone);
 
 /* page_alloc.c */
 extern int min_free_kbytes;
+extern int watermark_scale_factor;
 
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bdd9a270a813..c60df9257cc7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -841,6 +841,8 @@ static inline int is_highmem(struct zone *zone)
 struct ctl_table;
 int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
+int watermark_scale_factor_sysctl_handler(struct ctl_table *, int,
+					void __user *, size_t *, loff_t *);
 extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
 int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f5102fabef7f..725587f10667 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -126,6 +126,7 @@ static int __maybe_unused two = 2;
 static int __maybe_unused four = 4;
 static unsigned long one_ul = 1;
 static int one_hundred = 100;
+static int one_thousand = 1000;
 #ifdef CONFIG_PRINTK
 static int ten_thousand = 10000;
 #endif
@@ -1403,6 +1404,15 @@ static struct ctl_table vm_table[] = {
 		.proc_handler	= min_free_kbytes_sysctl_handler,
 		.extra1		= &zero,
 	},
+	{
+		.procname	= "watermark_scale_factor",
+		.data		= &watermark_scale_factor,
+		.maxlen		= sizeof(watermark_scale_factor),
+		.mode		= 0644,
+		.proc_handler	= watermark_scale_factor_sysctl_handler,
+		.extra1		= &one,
+		.extra2		= &one_thousand,
+	},
 	{
 		.procname	= "percpu_pagelist_fraction",
 		.data		= &percpu_pagelist_fraction,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 941b802e11ec..d156310aedeb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -249,6 +249,7 @@ compound_page_dtor * const compound_page_dtors[] = {
 
 int min_free_kbytes = 1024;
 int user_min_free_kbytes = -1;
+int watermark_scale_factor = 10;
 
 static unsigned long __meminitdata nr_kernel_pages;
 static unsigned long __meminitdata nr_all_pages;
@@ -6347,8 +6348,17 @@ static void __setup_per_zone_wmarks(void)
 			zone->watermark[WMARK_MIN] = tmp;
 		}
 
-		zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) + (tmp >> 2);
-		zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
+		/*
+		 * Set the kswapd watermarks distance according to the
+		 * scale factor in proportion to available memory, but
+		 * ensure a minimum size on small systems.
+		 */
+		tmp = max_t(u64, tmp >> 2,
+			    mult_frac(zone->managed_pages,
+				      watermark_scale_factor, 10000));
+
+		zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) + tmp;
+		zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2;
 
 		__mod_zone_page_state(zone, NR_ALLOC_BATCH,
 			high_wmark_pages(zone) - low_wmark_pages(zone) -
@@ -6489,6 +6499,21 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
 	return 0;
 }
 
+int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
+	void __user *buffer, size_t *length, loff_t *ppos)
+{
+	int rc;
+
+	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
+	if (rc)
+		return rc;
+
+	if (write)
+		setup_per_zone_wmarks();
+
+	return 0;
+}
+
 #ifdef CONFIG_NUMA
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
 	void __user *buffer, size_t *length, loff_t *ppos)
-- 
cgit v1.2.3


From b14a1ef58e8acb7fa64c1c8f745b21fd8a577aba Mon Sep 17 00:00:00 2001
From: Satoru Takeuchi <takeuchi_satoru@jp.fujitsu.com>
Date: Thu, 17 Mar 2016 14:19:17 -0700
Subject: mm: remove unnecessary description about a non-exist gfp flag

Since __GFP_NOACCOUNT was removed by commit 20b5c3039863 ("Revert 'gfp:
add __GFP_NOACCOUNT'"), its description is not necessary.

Signed-off-by: Satoru Takeuchi <takeuchi_satoru@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index bb16dfeb917e..c083d0820a87 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -105,8 +105,6 @@ struct vm_area_struct;
  *
  * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
  *   This takes precedence over the __GFP_MEMALLOC flag if both are set.
- *
- * __GFP_NOACCOUNT ignores the accounting for kmemcg limit enforcement.
  */
 #define __GFP_ATOMIC	((__force gfp_t)___GFP_ATOMIC)
 #define __GFP_HIGH	((__force gfp_t)___GFP_HIGH)
-- 
cgit v1.2.3


From 444eb2a449ef36fe115431ed7b71467c4563c7f1 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Thu, 17 Mar 2016 14:19:23 -0700
Subject: mm: thp: set THP defrag by default to madvise and add a stall-free
 defrag option

THP defrag is enabled by default to direct reclaim/compact but not wake
kswapd in the event of a THP allocation failure.  The problem is that
THP allocation requests potentially enter reclaim/compaction.  This
potentially incurs a severe stall that is not guaranteed to be offset by
reduced TLB misses.  While there has been considerable effort to reduce
the impact of reclaim/compaction, it is still a high cost and workloads
that should fit in memory fail to do so.  Specifically, a simple
anon/file streaming workload will enter direct reclaim on NUMA at least
even though the working set size is 80% of RAM.  It's been years and
it's time to throw in the towel.

First, this patch defines THP defrag as follows;

 madvise: A failed allocation will direct reclaim/compact if the application requests it
 never:   Neither reclaim/compact nor wake kswapd
 defer:   A failed allocation will wake kswapd/kcompactd
 always:  A failed allocation will direct reclaim/compact (historical behaviour)
          khugepaged defrag will enter direct/reclaim but not wake kswapd.

Next it sets the default defrag option to be "madvise" to only enter
direct reclaim/compaction for applications that specifically requested
it.

Lastly, it removes a check from the page allocator slowpath that is
related to __GFP_THISNODE to allow "defer" to work.  The callers that
really cares are slub/slab and they are updated accordingly.  The slab
one may be surprising because it also corrects a comment as kswapd was
never woken up by that path.

This means that a THP fault will no longer stall for most applications
by default and the ideal for most users that get THP if they are
immediately available.  There are still options for users that prefer a
stall at startup of a new application by either restoring historical
behaviour with "always" or pick a half-way point with "defer" where
kswapd does some of the work in the background and wakes kcompactd if
necessary.  THP defrag for khugepaged remains enabled and will enter
direct/reclaim but no wakeup kswapd or kcompactd.

After this patch a THP allocation failure will quickly fallback and rely
on khugepaged to recover the situation at some time in the future.  In
some cases, this will reduce THP usage but the benefit of THP is hard to
measure and not a universal win where as a stall to reclaim/compaction
is definitely measurable and can be painful.

The first test for this is using "usemem" to read a large file and write
a large anonymous mapping (to avoid the zero page) multiple times.  The
total size of the mappings is 80% of RAM and the benchmark simply
measures how long it takes to complete.  It uses multiple threads to see
if that is a factor.  On UMA, the performance is almost identical so is
not reported but on NUMA, we see this

usemem
                                   4.4.0                 4.4.0
                          kcompactd-v1r1         nodefrag-v1r3
Amean    System-1       102.86 (  0.00%)       46.81 ( 54.50%)
Amean    System-4        37.85 (  0.00%)       34.02 ( 10.12%)
Amean    System-7        48.12 (  0.00%)       46.89 (  2.56%)
Amean    System-12       51.98 (  0.00%)       56.96 ( -9.57%)
Amean    System-21       80.16 (  0.00%)       79.05 (  1.39%)
Amean    System-30      110.71 (  0.00%)      107.17 (  3.20%)
Amean    System-48      127.98 (  0.00%)      124.83 (  2.46%)
Amean    Elapsd-1       185.84 (  0.00%)      105.51 ( 43.23%)
Amean    Elapsd-4        26.19 (  0.00%)       25.58 (  2.33%)
Amean    Elapsd-7        21.65 (  0.00%)       21.62 (  0.16%)
Amean    Elapsd-12       18.58 (  0.00%)       17.94 (  3.43%)
Amean    Elapsd-21       17.53 (  0.00%)       16.60 (  5.33%)
Amean    Elapsd-30       17.45 (  0.00%)       17.13 (  1.84%)
Amean    Elapsd-48       15.40 (  0.00%)       15.27 (  0.82%)

For a single thread, the benchmark completes 43.23% faster with this
patch applied with smaller benefits as the thread increases.  Similar,
notice the large reduction in most cases in system CPU usage.  The
overall CPU time is

               4.4.0       4.4.0
        kcompactd-v1r1 nodefrag-v1r3
User        10357.65    10438.33
System       3988.88     3543.94
Elapsed      2203.01     1634.41

Which is substantial. Now, the reclaim figures

                                 4.4.0       4.4.0
                          kcompactd-v1r1nodefrag-v1r3
Minor Faults                 128458477   278352931
Major Faults                   2174976         225
Swap Ins                      16904701           0
Swap Outs                     17359627           0
Allocation stalls                43611           0
DMA allocs                           0           0
DMA32 allocs                  19832646    19448017
Normal allocs                614488453   580941839
Movable allocs                       0           0
Direct pages scanned          24163800           0
Kswapd pages scanned                 0           0
Kswapd pages reclaimed               0           0
Direct pages reclaimed        20691346           0
Compaction stalls                42263           0
Compaction success                 938           0
Compaction failures              41325           0

This patch eliminates almost all swapping and direct reclaim activity.
There is still overhead but it's from NUMA balancing which does not
identify that it's pointless trying to do anything with this workload.

I also tried the thpscale benchmark which forces a corner case where
compaction can be used heavily and measures the latency of whether base
or huge pages were used

thpscale Fault Latencies
                                       4.4.0                 4.4.0
                              kcompactd-v1r1         nodefrag-v1r3
Amean    fault-base-1      5288.84 (  0.00%)     2817.12 ( 46.73%)
Amean    fault-base-3      6365.53 (  0.00%)     3499.11 ( 45.03%)
Amean    fault-base-5      6526.19 (  0.00%)     4363.06 ( 33.15%)
Amean    fault-base-7      7142.25 (  0.00%)     4858.08 ( 31.98%)
Amean    fault-base-12    13827.64 (  0.00%)    10292.11 ( 25.57%)
Amean    fault-base-18    18235.07 (  0.00%)    13788.84 ( 24.38%)
Amean    fault-base-24    21597.80 (  0.00%)    24388.03 (-12.92%)
Amean    fault-base-30    26754.15 (  0.00%)    19700.55 ( 26.36%)
Amean    fault-base-32    26784.94 (  0.00%)    19513.57 ( 27.15%)
Amean    fault-huge-1      4223.96 (  0.00%)     2178.57 ( 48.42%)
Amean    fault-huge-3      2194.77 (  0.00%)     2149.74 (  2.05%)
Amean    fault-huge-5      2569.60 (  0.00%)     2346.95 (  8.66%)
Amean    fault-huge-7      3612.69 (  0.00%)     2997.70 ( 17.02%)
Amean    fault-huge-12     3301.75 (  0.00%)     6727.02 (-103.74%)
Amean    fault-huge-18     6696.47 (  0.00%)     6685.72 (  0.16%)
Amean    fault-huge-24     8000.72 (  0.00%)     9311.43 (-16.38%)
Amean    fault-huge-30    13305.55 (  0.00%)     9750.45 ( 26.72%)
Amean    fault-huge-32     9981.71 (  0.00%)    10316.06 ( -3.35%)

The average time to fault pages is substantially reduced in the majority
of caseds but with the obvious caveat that fewer THPs are actually used
in this adverse workload

                                   4.4.0                 4.4.0
                          kcompactd-v1r1         nodefrag-v1r3
Percentage huge-1         0.71 (  0.00%)       14.04 (1865.22%)
Percentage huge-3        10.77 (  0.00%)       33.05 (206.85%)
Percentage huge-5        60.39 (  0.00%)       38.51 (-36.23%)
Percentage huge-7        45.97 (  0.00%)       34.57 (-24.79%)
Percentage huge-12       68.12 (  0.00%)       40.07 (-41.17%)
Percentage huge-18       64.93 (  0.00%)       47.82 (-26.35%)
Percentage huge-24       62.69 (  0.00%)       44.23 (-29.44%)
Percentage huge-30       43.49 (  0.00%)       55.38 ( 27.34%)
Percentage huge-32       50.72 (  0.00%)       51.90 (  2.35%)

                                 4.4.0       4.4.0
                          kcompactd-v1r1nodefrag-v1r3
Minor Faults                  37429143    47564000
Major Faults                      1916        1558
Swap Ins                          1466        1079
Swap Outs                      2936863      149626
Allocation stalls                62510           3
DMA allocs                           0           0
DMA32 allocs                   6566458     6401314
Normal allocs                216361697   216538171
Movable allocs                       0           0
Direct pages scanned          25977580       17998
Kswapd pages scanned                 0     3638931
Kswapd pages reclaimed               0      207236
Direct pages reclaimed         8833714          88
Compaction stalls               103349           5
Compaction success                 270           4
Compaction failures             103079           1

Note again that while this does swap as it's an aggressive workload, the
direct relcim activity and allocation stalls is substantially reduced.
There is some kswapd activity but ftrace showed that the kswapd activity
was due to normal wakeups from 4K pages being allocated.
Compaction-related stalls and activity are almost eliminated.

I also tried the stutter benchmark.  For this, I do not have figures for
NUMA but it's something that does impact UMA so I'll report what is
available

stutter
                                 4.4.0                 4.4.0
                        kcompactd-v1r1         nodefrag-v1r3
Min         mmap      7.3571 (  0.00%)      7.3438 (  0.18%)
1st-qrtle   mmap      7.5278 (  0.00%)     17.9200 (-138.05%)
2nd-qrtle   mmap      7.6818 (  0.00%)     21.6055 (-181.25%)
3rd-qrtle   mmap     11.0889 (  0.00%)     21.8881 (-97.39%)
Max-90%     mmap     27.8978 (  0.00%)     22.1632 ( 20.56%)
Max-93%     mmap     28.3202 (  0.00%)     22.3044 ( 21.24%)
Max-95%     mmap     28.5600 (  0.00%)     22.4580 ( 21.37%)
Max-99%     mmap     29.6032 (  0.00%)     25.5216 ( 13.79%)
Max         mmap   4109.7289 (  0.00%)   4813.9832 (-17.14%)
Mean        mmap     12.4474 (  0.00%)     19.3027 (-55.07%)

This benchmark is trying to fault an anonymous mapping while there is a
heavy IO load -- a scenario that desktop users used to complain about
frequently.  This shows a mix because the ideal case of mapping with THP
is not hit as often.  However, note that 99% of the mappings complete
13.79% faster.  The CPU usage here is particularly interesting

               4.4.0       4.4.0
        kcompactd-v1r1nodefrag-v1r3
User           67.50        0.99
System       1327.88       91.30
Elapsed      2079.00     2128.98

And once again we look at the reclaim figures

                                 4.4.0       4.4.0
                          kcompactd-v1r1nodefrag-v1r3
Minor Faults                 335241922  1314582827
Major Faults                       715         819
Swap Ins                             0           0
Swap Outs                            0           0
Allocation stalls               532723           0
DMA allocs                           0           0
DMA32 allocs                1822364341  1177950222
Normal allocs               1815640808  1517844854
Movable allocs                       0           0
Direct pages scanned          21892772           0
Kswapd pages scanned          20015890    41879484
Kswapd pages reclaimed        19961986    41822072
Direct pages reclaimed        21892741           0
Compaction stalls              1065755           0
Compaction success                 514           0
Compaction failures            1065241           0

Allocation stalls and all direct reclaim activity is eliminated as well
as compaction-related stalls.

THP gives impressive gains in some cases but only if they are quickly
available.  We're not going to reach the point where they are completely
free so lets take the costs out of the fast paths finally and defer the
cost to kswapd, kcompactd and khugepaged where it belongs.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/transhuge.txt |  17 +++++++
 include/linux/gfp.h            |   2 +-
 include/linux/huge_mm.h        |   9 +---
 mm/huge_memory.c               | 101 +++++++++++++++++++++++++++--------------
 mm/page_alloc.c                |   8 ----
 mm/slab.c                      |   8 ++--
 mm/slub.c                      |   2 +-
 7 files changed, 91 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
index 0dc8632aa01e..d9cb65cf5cfd 100644
--- a/Documentation/vm/transhuge.txt
+++ b/Documentation/vm/transhuge.txt
@@ -113,9 +113,26 @@ guaranteed, but it may be more likely in case the allocation is for a
 MADV_HUGEPAGE region.
 
 echo always >/sys/kernel/mm/transparent_hugepage/defrag
+echo defer >/sys/kernel/mm/transparent_hugepage/defrag
 echo madvise >/sys/kernel/mm/transparent_hugepage/defrag
 echo never >/sys/kernel/mm/transparent_hugepage/defrag
 
+"always" means that an application requesting THP will stall on allocation
+failure and directly reclaim pages and compact memory in an effort to
+allocate a THP immediately. This may be desirable for virtual machines
+that benefit heavily from THP use and are willing to delay the VM start
+to utilise them.
+
+"defer" means that an application will wake kswapd in the background
+to reclaim pages and wake kcompact to compact memory so that THP is
+available in the near future. It's the responsibility of khugepaged
+to then install the THP pages later.
+
+"madvise" will enter direct reclaim like "always" but only for regions
+that are have used madvise(MADV_HUGEPAGE). This is the default behaviour.
+
+"never" should be self-explanatory.
+
 By default kernel tries to use huge zero page on read page fault.
 It's possible to disable huge zero page by writing 0 or enable it
 back by writing 1:
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c083d0820a87..11d56c6e7ef2 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -257,7 +257,7 @@ struct vm_area_struct;
 #define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE)
 #define GFP_TRANSHUGE	((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
-			 ~__GFP_KSWAPD_RECLAIM)
+			 ~__GFP_RECLAIM)
 
 /* Convert GFP flags to their corresponding migrate type */
 #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 459fd25b378e..a4cecb4801ec 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -41,7 +41,8 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *,
 enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_FLAG,
 	TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
-	TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
+	TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
+	TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
 	TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
 	TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
 	TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG,
@@ -71,12 +72,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 	   ((__vma)->vm_flags & VM_HUGEPAGE))) &&			\
 	 !((__vma)->vm_flags & VM_NOHUGEPAGE) &&			\
 	 !is_vma_temporary_stack(__vma))
-#define transparent_hugepage_defrag(__vma)				\
-	((transparent_hugepage_flags &					\
-	  (1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)) ||			\
-	 (transparent_hugepage_flags &					\
-	  (1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG) &&		\
-	  (__vma)->vm_flags & VM_HUGEPAGE))
 #define transparent_hugepage_use_zero_page()				\
 	(transparent_hugepage_flags &					\
 	 (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1dddfb21fc22..e08b1659ff19 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -78,7 +78,7 @@ unsigned long transparent_hugepage_flags __read_mostly =
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
 	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
 #endif
-	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)|
+	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
 
@@ -270,37 +270,35 @@ static struct shrinker huge_zero_page_shrinker = {
 
 #ifdef CONFIG_SYSFS
 
-static ssize_t double_flag_show(struct kobject *kobj,
-				struct kobj_attribute *attr, char *buf,
-				enum transparent_hugepage_flag enabled,
-				enum transparent_hugepage_flag req_madv)
-{
-	if (test_bit(enabled, &transparent_hugepage_flags)) {
-		VM_BUG_ON(test_bit(req_madv, &transparent_hugepage_flags));
-		return sprintf(buf, "[always] madvise never\n");
-	} else if (test_bit(req_madv, &transparent_hugepage_flags))
-		return sprintf(buf, "always [madvise] never\n");
-	else
-		return sprintf(buf, "always madvise [never]\n");
-}
-static ssize_t double_flag_store(struct kobject *kobj,
+static ssize_t triple_flag_store(struct kobject *kobj,
 				 struct kobj_attribute *attr,
 				 const char *buf, size_t count,
 				 enum transparent_hugepage_flag enabled,
+				 enum transparent_hugepage_flag deferred,
 				 enum transparent_hugepage_flag req_madv)
 {
-	if (!memcmp("always", buf,
+	if (!memcmp("defer", buf,
+		    min(sizeof("defer")-1, count))) {
+		if (enabled == deferred)
+			return -EINVAL;
+		clear_bit(enabled, &transparent_hugepage_flags);
+		clear_bit(req_madv, &transparent_hugepage_flags);
+		set_bit(deferred, &transparent_hugepage_flags);
+	} else if (!memcmp("always", buf,
 		    min(sizeof("always")-1, count))) {
-		set_bit(enabled, &transparent_hugepage_flags);
+		clear_bit(deferred, &transparent_hugepage_flags);
 		clear_bit(req_madv, &transparent_hugepage_flags);
+		set_bit(enabled, &transparent_hugepage_flags);
 	} else if (!memcmp("madvise", buf,
 			   min(sizeof("madvise")-1, count))) {
 		clear_bit(enabled, &transparent_hugepage_flags);
+		clear_bit(deferred, &transparent_hugepage_flags);
 		set_bit(req_madv, &transparent_hugepage_flags);
 	} else if (!memcmp("never", buf,
 			   min(sizeof("never")-1, count))) {
 		clear_bit(enabled, &transparent_hugepage_flags);
 		clear_bit(req_madv, &transparent_hugepage_flags);
+		clear_bit(deferred, &transparent_hugepage_flags);
 	} else
 		return -EINVAL;
 
@@ -310,17 +308,22 @@ static ssize_t double_flag_store(struct kobject *kobj,
 static ssize_t enabled_show(struct kobject *kobj,
 			    struct kobj_attribute *attr, char *buf)
 {
-	return double_flag_show(kobj, attr, buf,
-				TRANSPARENT_HUGEPAGE_FLAG,
-				TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG);
+	if (test_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags))
+		return sprintf(buf, "[always] madvise never\n");
+	else if (test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags))
+		return sprintf(buf, "always [madvise] never\n");
+	else
+		return sprintf(buf, "always madvise [never]\n");
 }
+
 static ssize_t enabled_store(struct kobject *kobj,
 			     struct kobj_attribute *attr,
 			     const char *buf, size_t count)
 {
 	ssize_t ret;
 
-	ret = double_flag_store(kobj, attr, buf, count,
+	ret = triple_flag_store(kobj, attr, buf, count,
+				TRANSPARENT_HUGEPAGE_FLAG,
 				TRANSPARENT_HUGEPAGE_FLAG,
 				TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG);
 
@@ -378,16 +381,23 @@ static ssize_t single_flag_store(struct kobject *kobj,
 static ssize_t defrag_show(struct kobject *kobj,
 			   struct kobj_attribute *attr, char *buf)
 {
-	return double_flag_show(kobj, attr, buf,
-				TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
-				TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG);
+	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
+		return sprintf(buf, "[always] defer madvise never\n");
+	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
+		return sprintf(buf, "always [defer] madvise never\n");
+	else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
+		return sprintf(buf, "always defer [madvise] never\n");
+	else
+		return sprintf(buf, "always defer madvise [never]\n");
+
 }
 static ssize_t defrag_store(struct kobject *kobj,
 			    struct kobj_attribute *attr,
 			    const char *buf, size_t count)
 {
-	return double_flag_store(kobj, attr, buf, count,
-				 TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
+	return triple_flag_store(kobj, attr, buf, count,
+				 TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
+				 TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
 				 TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG);
 }
 static struct kobj_attribute defrag_attr =
@@ -843,9 +853,30 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 	return 0;
 }
 
-static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
+/*
+ * If THP is set to always then directly reclaim/compact as necessary
+ * If set to defer then do no reclaim and defer to khugepaged
+ * If set to madvise and the VMA is flagged then directly reclaim/compact
+ */
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
+{
+	gfp_t reclaim_flags = 0;
+
+	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags) &&
+	    (vma->vm_flags & VM_HUGEPAGE))
+		reclaim_flags = __GFP_DIRECT_RECLAIM;
+	else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
+		reclaim_flags = __GFP_KSWAPD_RECLAIM;
+	else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
+		reclaim_flags = __GFP_DIRECT_RECLAIM;
+
+	return GFP_TRANSHUGE | reclaim_flags;
+}
+
+/* Defrag for khugepaged will enter direct reclaim/compaction if necessary */
+static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void)
 {
-	return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_RECLAIM)) | extra_gfp;
+	return GFP_TRANSHUGE | (khugepaged_defrag() ? __GFP_DIRECT_RECLAIM : 0);
 }
 
 /* Caller must hold page table lock. */
@@ -919,7 +950,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		}
 		return ret;
 	}
-	gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
+	gfp = alloc_hugepage_direct_gfpmask(vma);
 	page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
 	if (unlikely(!page)) {
 		count_vm_event(THP_FAULT_FALLBACK);
@@ -1279,7 +1310,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 alloc:
 	if (transparent_hugepage_enabled(vma) &&
 	    !transparent_hugepage_debug_cow()) {
-		huge_gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
+		huge_gfp = alloc_hugepage_direct_gfpmask(vma);
 		new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
 	} else
 		new_page = NULL;
@@ -2249,11 +2280,12 @@ static int khugepaged_find_target_node(void)
 	return 0;
 }
 
-static inline struct page *alloc_hugepage(int defrag)
+static inline struct page *alloc_khugepaged_hugepage(void)
 {
 	struct page *page;
 
-	page = alloc_pages(alloc_hugepage_gfpmask(defrag, 0), HPAGE_PMD_ORDER);
+	page = alloc_pages(alloc_hugepage_khugepaged_gfpmask(),
+			   HPAGE_PMD_ORDER);
 	if (page)
 		prep_transhuge_page(page);
 	return page;
@@ -2264,7 +2296,7 @@ static struct page *khugepaged_alloc_hugepage(bool *wait)
 	struct page *hpage;
 
 	do {
-		hpage = alloc_hugepage(khugepaged_defrag());
+		hpage = alloc_khugepaged_hugepage();
 		if (!hpage) {
 			count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
 			if (!*wait)
@@ -2335,8 +2367,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
 	/* Only allocate from the target node */
-	gfp = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) |
-		__GFP_THISNODE;
+	gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_OTHER_NODE | __GFP_THISNODE;
 
 	/* release the mmap_sem read lock. */
 	new_page = khugepaged_alloc_page(hpage, gfp, mm, address, node);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d156310aedeb..096a00d98a45 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3119,14 +3119,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 				(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
 		gfp_mask &= ~__GFP_ATOMIC;
 
-	/*
-	 * If this allocation cannot block and it is for a specific node, then
-	 * fail early.  There's no need to wakeup kswapd or retry for a
-	 * speculative node-specific allocation.
-	 */
-	if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !can_direct_reclaim)
-		goto nopage;
-
 retry:
 	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
 		wake_all_kswapds(order, ac);
diff --git a/mm/slab.c b/mm/slab.c
index 56dd0df2a8ce..e1f6c27c3db5 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -670,7 +670,7 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
 
 static inline gfp_t gfp_exact_node(gfp_t flags)
 {
-	return flags;
+	return flags & ~__GFP_NOFAIL;
 }
 
 #else	/* CONFIG_NUMA */
@@ -841,12 +841,12 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
 }
 
 /*
- * Construct gfp mask to allocate from a specific node but do not direct reclaim
- * or warn about failures. kswapd may still wake to reclaim in the background.
+ * Construct gfp mask to allocate from a specific node but do not reclaim or
+ * warn about failures.
  */
 static inline gfp_t gfp_exact_node(gfp_t flags)
 {
-	return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM;
+	return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
 }
 #endif
 
diff --git a/mm/slub.c b/mm/slub.c
index 2f2f04d39104..64ed5f3a3046 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1426,7 +1426,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	 */
 	alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
 	if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
-		alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_DIRECT_RECLAIM;
+		alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
 
 	page = alloc_slab_page(s, alloc_gfp, node, oo);
 	if (unlikely(!page)) {
-- 
cgit v1.2.3


From fe896d1878949ea92ba547587bc3075cc688fb8f Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 17 Mar 2016 14:19:26 -0700
Subject: mm: introduce page reference manipulation functions

The success of CMA allocation largely depends on the success of
migration and key factor of it is page reference count.  Until now, page
reference is manipulated by direct calling atomic functions so we cannot
follow up who and where manipulate it.  Then, it is hard to find actual
reason of CMA allocation failure.  CMA allocation should be guaranteed
to succeed so finding offending place is really important.

In this patch, call sites where page reference is manipulated are
converted to introduced wrapper function.  This is preparation step to
add tracepoint to each page reference manipulation function.  With this
facility, we can easily find reason of CMA allocation failure.  There is
no functional change in this patch.

In addition, this patch also converts reference read sites.  It will
help a second step that renames page._count to something else and
prevents later attempt to direct access to it (Suggested by Andrew).

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/mm/gup.c                                |  2 +-
 arch/powerpc/mm/mmu_context_hash64.c              |  3 +-
 arch/powerpc/mm/pgtable_64.c                      |  2 +-
 arch/powerpc/platforms/512x/mpc512x_shared.c      |  2 +-
 arch/x86/mm/gup.c                                 |  2 +-
 drivers/block/aoe/aoecmd.c                        |  4 +-
 drivers/net/ethernet/freescale/gianfar.c          |  2 +-
 drivers/net/ethernet/intel/fm10k/fm10k_main.c     |  2 +-
 drivers/net/ethernet/intel/igb/igb_main.c         |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c     |  2 +-
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_rx.c        |  9 ++-
 drivers/net/ethernet/sun/niu.c                    |  2 +-
 fs/nilfs2/page.c                                  |  2 +-
 include/linux/mm.h                                | 25 ++-----
 include/linux/page_ref.h                          | 85 +++++++++++++++++++++++
 include/linux/pagemap.h                           | 19 +----
 mm/debug.c                                        |  2 +-
 mm/huge_memory.c                                  |  6 +-
 mm/internal.h                                     |  7 +-
 mm/memory_hotplug.c                               |  4 +-
 mm/migrate.c                                      | 10 +--
 mm/page_alloc.c                                   | 12 ++--
 mm/vmscan.c                                       |  6 +-
 net/core/sock.c                                   |  2 +-
 25 files changed, 134 insertions(+), 82 deletions(-)
 create mode 100644 include/linux/page_ref.h

(limited to 'include/linux')

diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 1afd87c999b0..6cdffc76735c 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -64,7 +64,7 @@ static inline void get_head_page_multiple(struct page *page, int nr)
 {
 	VM_BUG_ON(page != compound_head(page));
 	VM_BUG_ON(page_count(page) == 0);
-	atomic_add(nr, &page->_count);
+	page_ref_add(page, nr);
 	SetPageReferenced(page);
 }
 
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 4e4efbc2658e..9ca6fe16cb29 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -118,8 +118,7 @@ static void destroy_pagetable_page(struct mm_struct *mm)
 	/* drop all the pending references */
 	count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
 	/* We allow PTE_FRAG_NR fragments from a PTE page */
-	count = atomic_sub_return(PTE_FRAG_NR - count, &page->_count);
-	if (!count) {
+	if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
 		pgtable_page_dtor(page);
 		free_hot_cold_page(page, 0);
 	}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index cdf2123d46db..d9cc66cbdbb7 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -403,7 +403,7 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
 	 * count.
 	 */
 	if (likely(!mm->context.pte_frag)) {
-		atomic_set(&page->_count, PTE_FRAG_NR);
+		set_page_count(page, PTE_FRAG_NR);
 		mm->context.pte_frag = ret + PTE_FRAG_SIZE;
 	}
 	spin_unlock(&mm->page_table_lock);
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 711f3d352af7..452da2391153 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -188,7 +188,7 @@ static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb;
 static inline void mpc512x_free_bootmem(struct page *page)
 {
 	BUG_ON(PageTail(page));
-	BUG_ON(atomic_read(&page->_count) > 1);
+	BUG_ON(page_ref_count(page) > 1);
 	free_reserved_page(page);
 }
 
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index d8a798d8bf50..f8d0b5e8bdfd 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -131,7 +131,7 @@ static inline void get_head_page_multiple(struct page *page, int nr)
 {
 	VM_BUG_ON_PAGE(page != compound_head(page), page);
 	VM_BUG_ON_PAGE(page_count(page) == 0, page);
-	atomic_add(nr, &page->_count);
+	page_ref_add(page, nr);
 	SetPageReferenced(page);
 }
 
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index d048d2009e89..437b3a822f44 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -875,7 +875,7 @@ bio_pageinc(struct bio *bio)
 		 * compound pages is no longer allowed by the kernel.
 		 */
 		page = compound_head(bv.bv_page);
-		atomic_inc(&page->_count);
+		page_ref_inc(page);
 	}
 }
 
@@ -888,7 +888,7 @@ bio_pagedec(struct bio *bio)
 
 	bio_for_each_segment(bv, bio, iter) {
 		page = compound_head(bv.bv_page);
-		atomic_dec(&page->_count);
+		page_ref_dec(page);
 	}
 }
 
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index b9ecf197ad11..f21b2c479780 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2944,7 +2944,7 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus,
 	/* change offset to the other half */
 	rxb->page_offset ^= GFAR_RXB_TRUESIZE;
 
-	atomic_inc(&page->_count);
+	page_ref_inc(page);
 
 	return true;
 }
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index b243c3cbe68f..b4547ebed774 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -243,7 +243,7 @@ static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer,
 	/* Even if we own the page, we are not allowed to use atomic_set()
 	 * This would break get_page_unless_zero() users.
 	 */
-	atomic_inc(&page->_count);
+	page_ref_inc(page);
 
 	return true;
 }
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 31e5f3942839..5b4ad1ad4d5f 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6630,7 +6630,7 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
 	/* Even if we own the page, we are not allowed to use atomic_set()
 	 * This would break get_page_unless_zero() users.
 	 */
-	atomic_inc(&page->_count);
+	page_ref_inc(page);
 
 	return true;
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index c4003a88bbf6..e6035ff6b861 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1942,7 +1942,7 @@ static bool ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring,
 	/* Even if we own the page, we are not allowed to use atomic_set()
 	 * This would break get_page_unless_zero() users.
 	 */
-	atomic_inc(&page->_count);
+	page_ref_inc(page);
 
 	return true;
 }
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 3558f019b631..0ea14c0a2e74 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -837,7 +837,7 @@ add_tail_frag:
 	/* Even if we own the page, we are not allowed to use atomic_set()
 	 * This would break get_page_unless_zero() users.
 	 */
-	atomic_inc(&page->_count);
+	page_ref_inc(page);
 
 	return true;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 41440b2b20a3..86bcfe510e4e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -82,8 +82,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
 	/* Not doing get_page() for each frag is a big win
 	 * on asymetric workloads. Note we can not use atomic_set().
 	 */
-	atomic_add(page_alloc->page_size / frag_info->frag_stride - 1,
-		   &page->_count);
+	page_ref_add(page, page_alloc->page_size / frag_info->frag_stride - 1);
 	return 0;
 }
 
@@ -127,7 +126,7 @@ out:
 			dma_unmap_page(priv->ddev, page_alloc[i].dma,
 				page_alloc[i].page_size, PCI_DMA_FROMDEVICE);
 			page = page_alloc[i].page;
-			atomic_set(&page->_count, 1);
+			set_page_count(page, 1);
 			put_page(page);
 		}
 	}
@@ -165,7 +164,7 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
 
 		en_dbg(DRV, priv, "  frag %d allocator: - size:%d frags:%d\n",
 		       i, ring->page_alloc[i].page_size,
-		       atomic_read(&ring->page_alloc[i].page->_count));
+		       page_ref_count(ring->page_alloc[i].page));
 	}
 	return 0;
 
@@ -177,7 +176,7 @@ out:
 		dma_unmap_page(priv->ddev, page_alloc->dma,
 			       page_alloc->page_size, PCI_DMA_FROMDEVICE);
 		page = page_alloc->page;
-		atomic_set(&page->_count, 1);
+		set_page_count(page, 1);
 		put_page(page);
 		page_alloc->page = NULL;
 	}
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index ab6051a43134..9cc45649f477 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -3341,7 +3341,7 @@ static int niu_rbr_add_page(struct niu *np, struct rx_ring_info *rp,
 
 	niu_hash_page(rp, page, addr);
 	if (rp->rbr_blocks_per_page > 1)
-		atomic_add(rp->rbr_blocks_per_page - 1, &page->_count);
+		page_ref_add(page, rp->rbr_blocks_per_page - 1);
 
 	for (i = 0; i < rp->rbr_blocks_per_page; i++) {
 		__le32 *rbr = &rp->rbr[start_index + i];
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 45d650addd56..c20df77eff99 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -180,7 +180,7 @@ void nilfs_page_bug(struct page *page)
 
 	printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
 	       "mapping=%p ino=%lu\n",
-	       page, atomic_read(&page->_count),
+	       page, page_ref_count(page),
 	       (unsigned long long)page->index, page->flags, m, ino);
 
 	if (page_has_buffers(page)) {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f7fd64227d3a..997fc2e5d9d8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -22,6 +22,7 @@
 #include <linux/resource.h>
 #include <linux/page_ext.h>
 #include <linux/err.h>
+#include <linux/page_ref.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -386,8 +387,8 @@ static inline int pmd_devmap(pmd_t pmd)
  */
 static inline int put_page_testzero(struct page *page)
 {
-	VM_BUG_ON_PAGE(atomic_read(&page->_count) == 0, page);
-	return atomic_dec_and_test(&page->_count);
+	VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
+	return page_ref_dec_and_test(page);
 }
 
 /*
@@ -398,7 +399,7 @@ static inline int put_page_testzero(struct page *page)
  */
 static inline int get_page_unless_zero(struct page *page)
 {
-	return atomic_inc_not_zero(&page->_count);
+	return page_ref_add_unless(page, 1, 0);
 }
 
 extern int page_is_ram(unsigned long pfn);
@@ -486,11 +487,6 @@ static inline int total_mapcount(struct page *page)
 }
 #endif
 
-static inline int page_count(struct page *page)
-{
-	return atomic_read(&compound_head(page)->_count);
-}
-
 static inline struct page *virt_to_head_page(const void *x)
 {
 	struct page *page = virt_to_page(x);
@@ -498,15 +494,6 @@ static inline struct page *virt_to_head_page(const void *x)
 	return compound_head(page);
 }
 
-/*
- * Setup the page count before being freed into the page allocator for
- * the first time (boot or memory hotplug)
- */
-static inline void init_page_count(struct page *page)
-{
-	atomic_set(&page->_count, 1);
-}
-
 void __put_page(struct page *page);
 
 void put_pages_list(struct list_head *pages);
@@ -716,8 +703,8 @@ static inline void get_page(struct page *page)
 	 * Getting a normal page or the head of a compound page
 	 * requires to already have an elevated page->_count.
 	 */
-	VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);
-	atomic_inc(&page->_count);
+	VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page);
+	page_ref_inc(page);
 
 	if (unlikely(is_zone_device_page(page)))
 		get_zone_device_page(page);
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
new file mode 100644
index 000000000000..30f5817f6b8e
--- /dev/null
+++ b/include/linux/page_ref.h
@@ -0,0 +1,85 @@
+#ifndef _LINUX_PAGE_REF_H
+#define _LINUX_PAGE_REF_H
+
+#include <linux/atomic.h>
+#include <linux/mm_types.h>
+#include <linux/page-flags.h>
+
+static inline int page_ref_count(struct page *page)
+{
+	return atomic_read(&page->_count);
+}
+
+static inline int page_count(struct page *page)
+{
+	return atomic_read(&compound_head(page)->_count);
+}
+
+static inline void set_page_count(struct page *page, int v)
+{
+	atomic_set(&page->_count, v);
+}
+
+/*
+ * Setup the page count before being freed into the page allocator for
+ * the first time (boot or memory hotplug)
+ */
+static inline void init_page_count(struct page *page)
+{
+	set_page_count(page, 1);
+}
+
+static inline void page_ref_add(struct page *page, int nr)
+{
+	atomic_add(nr, &page->_count);
+}
+
+static inline void page_ref_sub(struct page *page, int nr)
+{
+	atomic_sub(nr, &page->_count);
+}
+
+static inline void page_ref_inc(struct page *page)
+{
+	atomic_inc(&page->_count);
+}
+
+static inline void page_ref_dec(struct page *page)
+{
+	atomic_dec(&page->_count);
+}
+
+static inline int page_ref_sub_and_test(struct page *page, int nr)
+{
+	return atomic_sub_and_test(nr, &page->_count);
+}
+
+static inline int page_ref_dec_and_test(struct page *page)
+{
+	return atomic_dec_and_test(&page->_count);
+}
+
+static inline int page_ref_dec_return(struct page *page)
+{
+	return atomic_dec_return(&page->_count);
+}
+
+static inline int page_ref_add_unless(struct page *page, int nr, int u)
+{
+	return atomic_add_unless(&page->_count, nr, u);
+}
+
+static inline int page_ref_freeze(struct page *page, int count)
+{
+	return likely(atomic_cmpxchg(&page->_count, count, 0) == count);
+}
+
+static inline void page_ref_unfreeze(struct page *page, int count)
+{
+	VM_BUG_ON_PAGE(page_count(page) != 0, page);
+	VM_BUG_ON(count == 0);
+
+	atomic_set(&page->_count, count);
+}
+
+#endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 183b15ea052b..1ebd65c91422 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -165,7 +165,7 @@ static inline int page_cache_get_speculative(struct page *page)
 	 * SMP requires.
 	 */
 	VM_BUG_ON_PAGE(page_count(page) == 0, page);
-	atomic_inc(&page->_count);
+	page_ref_inc(page);
 
 #else
 	if (unlikely(!get_page_unless_zero(page))) {
@@ -194,10 +194,10 @@ static inline int page_cache_add_speculative(struct page *page, int count)
 	VM_BUG_ON(!in_atomic());
 # endif
 	VM_BUG_ON_PAGE(page_count(page) == 0, page);
-	atomic_add(count, &page->_count);
+	page_ref_add(page, count);
 
 #else
-	if (unlikely(!atomic_add_unless(&page->_count, count, 0)))
+	if (unlikely(!page_ref_add_unless(page, count, 0)))
 		return 0;
 #endif
 	VM_BUG_ON_PAGE(PageCompound(page) && page != compound_head(page), page);
@@ -205,19 +205,6 @@ static inline int page_cache_add_speculative(struct page *page, int count)
 	return 1;
 }
 
-static inline int page_freeze_refs(struct page *page, int count)
-{
-	return likely(atomic_cmpxchg(&page->_count, count, 0) == count);
-}
-
-static inline void page_unfreeze_refs(struct page *page, int count)
-{
-	VM_BUG_ON_PAGE(page_count(page) != 0, page);
-	VM_BUG_ON(count == 0);
-
-	atomic_set(&page->_count, count);
-}
-
 #ifdef CONFIG_NUMA
 extern struct page *__page_cache_alloc(gfp_t gfp);
 #else
diff --git a/mm/debug.c b/mm/debug.c
index df7247b0b532..8865bfb41b0b 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -43,7 +43,7 @@ const struct trace_print_flags vmaflag_names[] = {
 void __dump_page(struct page *page, const char *reason)
 {
 	pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx",
-		  page, atomic_read(&page->_count), page_mapcount(page),
+		  page, page_ref_count(page), page_mapcount(page),
 		  page->mapping, page->index);
 	if (PageCompound(page))
 		pr_cont(" compound_mapcount: %d", compound_mapcount(page));
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e08b1659ff19..bb944c771c82 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2888,7 +2888,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 
 	page = pmd_page(*pmd);
 	VM_BUG_ON_PAGE(!page_count(page), page);
-	atomic_add(HPAGE_PMD_NR - 1, &page->_count);
+	page_ref_add(page, HPAGE_PMD_NR - 1);
 	write = pmd_write(*pmd);
 	young = pmd_young(*pmd);
 	dirty = pmd_dirty(*pmd);
@@ -3257,7 +3257,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
 	struct page *page_tail = head + tail;
 
 	VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail);
-	VM_BUG_ON_PAGE(atomic_read(&page_tail->_count) != 0, page_tail);
+	VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail);
 
 	/*
 	 * tail_page->_count is zero and not changing from under us. But
@@ -3270,7 +3270,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
 	 * atomic_set() here would be safe on all archs (and not only on x86),
 	 * it's safer to use atomic_inc().
 	 */
-	atomic_inc(&page_tail->_count);
+	page_ref_inc(page_tail);
 
 	page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 	page_tail->flags |= (head->flags &
diff --git a/mm/internal.h b/mm/internal.h
index 4042a8a05672..57d7b0e839f0 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -38,11 +38,6 @@
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 
-static inline void set_page_count(struct page *page, int v)
-{
-	atomic_set(&page->_count, v);
-}
-
 extern int __do_page_cache_readahead(struct address_space *mapping,
 		struct file *filp, pgoff_t offset, unsigned long nr_to_read,
 		unsigned long lookahead_size);
@@ -64,7 +59,7 @@ static inline unsigned long ra_submit(struct file_ra_state *ra,
 static inline void set_page_refcounted(struct page *page)
 {
 	VM_BUG_ON_PAGE(PageTail(page), page);
-	VM_BUG_ON_PAGE(atomic_read(&page->_count), page);
+	VM_BUG_ON_PAGE(page_ref_count(page), page);
 	set_page_count(page, 1);
 }
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c832ef3565cc..e62aa078f5c9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -167,7 +167,7 @@ void get_page_bootmem(unsigned long info,  struct page *page,
 	page->lru.next = (struct list_head *) type;
 	SetPagePrivate(page);
 	set_page_private(page, info);
-	atomic_inc(&page->_count);
+	page_ref_inc(page);
 }
 
 void put_page_bootmem(struct page *page)
@@ -178,7 +178,7 @@ void put_page_bootmem(struct page *page)
 	BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
 	       type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
 
-	if (atomic_dec_return(&page->_count) == 1) {
+	if (page_ref_dec_return(page) == 1) {
 		ClearPagePrivate(page);
 		set_page_private(page, 0);
 		INIT_LIST_HEAD(&page->lru);
diff --git a/mm/migrate.c b/mm/migrate.c
index fdaf0818fb30..577c94b8e959 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -349,7 +349,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 		return -EAGAIN;
 	}
 
-	if (!page_freeze_refs(page, expected_count)) {
+	if (!page_ref_freeze(page, expected_count)) {
 		spin_unlock_irq(&mapping->tree_lock);
 		return -EAGAIN;
 	}
@@ -363,7 +363,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	 */
 	if (mode == MIGRATE_ASYNC && head &&
 			!buffer_migrate_lock_buffers(head, mode)) {
-		page_unfreeze_refs(page, expected_count);
+		page_ref_unfreeze(page, expected_count);
 		spin_unlock_irq(&mapping->tree_lock);
 		return -EAGAIN;
 	}
@@ -397,7 +397,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	 * to one less reference.
 	 * We know this isn't the last reference.
 	 */
-	page_unfreeze_refs(page, expected_count - 1);
+	page_ref_unfreeze(page, expected_count - 1);
 
 	spin_unlock(&mapping->tree_lock);
 	/* Leave irq disabled to prevent preemption while updating stats */
@@ -451,7 +451,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 		return -EAGAIN;
 	}
 
-	if (!page_freeze_refs(page, expected_count)) {
+	if (!page_ref_freeze(page, expected_count)) {
 		spin_unlock_irq(&mapping->tree_lock);
 		return -EAGAIN;
 	}
@@ -463,7 +463,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 
 	radix_tree_replace_slot(pslot, newpage);
 
-	page_unfreeze_refs(page, expected_count - 1);
+	page_ref_unfreeze(page, expected_count - 1);
 
 	spin_unlock_irq(&mapping->tree_lock);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 096a00d98a45..30134a8f7cc8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -766,7 +766,7 @@ static inline int free_pages_check(struct page *page)
 		bad_reason = "nonzero mapcount";
 	if (unlikely(page->mapping != NULL))
 		bad_reason = "non-NULL mapping";
-	if (unlikely(atomic_read(&page->_count) != 0))
+	if (unlikely(page_ref_count(page) != 0))
 		bad_reason = "nonzero _count";
 	if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_FREE)) {
 		bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
@@ -1462,7 +1462,7 @@ static inline int check_new_page(struct page *page)
 		bad_reason = "nonzero mapcount";
 	if (unlikely(page->mapping != NULL))
 		bad_reason = "non-NULL mapping";
-	if (unlikely(atomic_read(&page->_count) != 0))
+	if (unlikely(page_ref_count(page) != 0))
 		bad_reason = "nonzero _count";
 	if (unlikely(page->flags & __PG_HWPOISON)) {
 		bad_reason = "HWPoisoned (hardware-corrupted)";
@@ -3475,7 +3475,7 @@ refill:
 		/* Even if we own the page, we do not use atomic_set().
 		 * This would break get_page_unless_zero() users.
 		 */
-		atomic_add(size - 1, &page->_count);
+		page_ref_add(page, size - 1);
 
 		/* reset page count bias and offset to start of new frag */
 		nc->pfmemalloc = page_is_pfmemalloc(page);
@@ -3487,7 +3487,7 @@ refill:
 	if (unlikely(offset < 0)) {
 		page = virt_to_page(nc->va);
 
-		if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
+		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
 			goto refill;
 
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
@@ -3495,7 +3495,7 @@ refill:
 		size = nc->size;
 #endif
 		/* OK, page count is 0, we can safely set it */
-		atomic_set(&page->_count, size);
+		set_page_count(page, size);
 
 		/* reset page count bias and offset to start of new frag */
 		nc->pagecnt_bias = size;
@@ -6852,7 +6852,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
 		 * This check already skips compound tails of THP
 		 * because their page->_count is zero at all time.
 		 */
-		if (!atomic_read(&page->_count)) {
+		if (!page_ref_count(page)) {
 			if (PageBuddy(page))
 				iter += (1 << page_order(page)) - 1;
 			continue;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b41b82d4bab1..b934223eaa45 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -638,11 +638,11 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 	 * Note that if SetPageDirty is always performed via set_page_dirty,
 	 * and thus under tree_lock, then this ordering is not required.
 	 */
-	if (!page_freeze_refs(page, 2))
+	if (!page_ref_freeze(page, 2))
 		goto cannot_free;
 	/* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
 	if (unlikely(PageDirty(page))) {
-		page_unfreeze_refs(page, 2);
+		page_ref_unfreeze(page, 2);
 		goto cannot_free;
 	}
 
@@ -704,7 +704,7 @@ int remove_mapping(struct address_space *mapping, struct page *page)
 		 * drops the pagecache ref for us without requiring another
 		 * atomic operation.
 		 */
-		page_unfreeze_refs(page, 1);
+		page_ref_unfreeze(page, 1);
 		return 1;
 	}
 	return 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 6c1c8bc93412..67e7efe12ff7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1903,7 +1903,7 @@ EXPORT_SYMBOL(sock_cmsg_send);
 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
 {
 	if (pfrag->page) {
-		if (atomic_read(&pfrag->page->_count) == 1) {
+		if (page_ref_count(pfrag->page) == 1) {
 			pfrag->offset = 0;
 			return true;
 		}
-- 
cgit v1.2.3


From 95813b8faa0cd315f61a8b9d9c523792370b693e Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 17 Mar 2016 14:19:29 -0700
Subject: mm/page_ref: add tracepoint to track down page reference manipulation

CMA allocation should be guaranteed to succeed by definition, but,
unfortunately, it would be failed sometimes.  It is hard to track down
the problem, because it is related to page reference manipulation and we
don't have any facility to analyze it.

This patch adds tracepoints to track down page reference manipulation.
With it, we can find exact reason of failure and can fix the problem.
Following is an example of tracepoint output.  (note: this example is
stale version that printing flags as the number.  Recent version will
print it as human readable string.)

<...>-9018  [004]    92.678375: page_ref_set:         pfn=0x17ac9 flags=0x0 count=1 mapcount=0 mapping=(nil) mt=4 val=1
<...>-9018  [004]    92.678378: kernel_stack:
 => get_page_from_freelist (ffffffff81176659)
 => __alloc_pages_nodemask (ffffffff81176d22)
 => alloc_pages_vma (ffffffff811bf675)
 => handle_mm_fault (ffffffff8119e693)
 => __do_page_fault (ffffffff810631ea)
 => trace_do_page_fault (ffffffff81063543)
 => do_async_page_fault (ffffffff8105c40a)
 => async_page_fault (ffffffff817581d8)
[snip]
<...>-9018  [004]    92.678379: page_ref_mod:         pfn=0x17ac9 flags=0x40048 count=2 mapcount=1 mapping=0xffff880015a78dc1 mt=4 val=1
[snip]
...
...
<...>-9131  [001]    93.174468: test_pages_isolated:  start_pfn=0x17800 end_pfn=0x17c00 fin_pfn=0x17ac9 ret=fail
[snip]
<...>-9018  [004]    93.174843: page_ref_mod_and_test: pfn=0x17ac9 flags=0x40068 count=0 mapcount=0 mapping=0xffff880015a78dc1 mt=4 val=-1 ret=1
 => release_pages (ffffffff8117c9e4)
 => free_pages_and_swap_cache (ffffffff811b0697)
 => tlb_flush_mmu_free (ffffffff81199616)
 => tlb_finish_mmu (ffffffff8119a62c)
 => exit_mmap (ffffffff811a53f7)
 => mmput (ffffffff81073f47)
 => do_exit (ffffffff810794e9)
 => do_group_exit (ffffffff81079def)
 => SyS_exit_group (ffffffff81079e74)
 => entry_SYSCALL_64_fastpath (ffffffff817560b6)

This output shows that problem comes from exit path.  In exit path, to
improve performance, pages are not freed immediately.  They are gathered
and processed by batch.  During this process, migration cannot be
possible and CMA allocation is failed.  This problem is hard to find
without this page reference tracepoint facility.

Enabling this feature bloat kernel text 30 KB in my configuration.

   text    data     bss     dec     hex filename
12127327        2243616 1507328 15878271         f2487f vmlinux_disabled
12157208        2258880 1507328 15923416         f2f8d8 vmlinux_enabled

Note that, due to header file dependency problem between mm.h and
tracepoint.h, this feature has to open code the static key functions for
tracepoints.  Proposed by Steven Rostedt in following link.

https://lkml.org/lkml/2015/12/9/699

[arnd@arndb.de: crypto/async_pq: use __free_page() instead of put_page()]
[iamjoonsoo.kim@lge.com: fix build failure for xtensa]
[akpm@linux-foundation.org: tweak Kconfig text, per Vlastimil]
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 crypto/async_tx/async_pq.c      |   2 +-
 include/linux/page_ref.h        |  98 +++++++++++++++++++++++++++--
 include/trace/events/page_ref.h | 134 ++++++++++++++++++++++++++++++++++++++++
 mm/Kconfig.debug                |  13 ++++
 mm/Makefile                     |   1 +
 mm/debug_page_ref.c             |  54 ++++++++++++++++
 6 files changed, 296 insertions(+), 6 deletions(-)
 create mode 100644 include/trace/events/page_ref.h
 create mode 100644 mm/debug_page_ref.c

(limited to 'include/linux')

diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index c0748bbd4c08..08b3ac68952b 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -444,7 +444,7 @@ static int __init async_pq_init(void)
 
 static void __exit async_pq_exit(void)
 {
-	put_page(pq_scribble_page);
+	__free_page(pq_scribble_page);
 }
 
 module_init(async_pq_init);
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 30f5817f6b8e..e596d5d9540e 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -4,6 +4,62 @@
 #include <linux/atomic.h>
 #include <linux/mm_types.h>
 #include <linux/page-flags.h>
+#include <linux/tracepoint-defs.h>
+
+extern struct tracepoint __tracepoint_page_ref_set;
+extern struct tracepoint __tracepoint_page_ref_mod;
+extern struct tracepoint __tracepoint_page_ref_mod_and_test;
+extern struct tracepoint __tracepoint_page_ref_mod_and_return;
+extern struct tracepoint __tracepoint_page_ref_mod_unless;
+extern struct tracepoint __tracepoint_page_ref_freeze;
+extern struct tracepoint __tracepoint_page_ref_unfreeze;
+
+#ifdef CONFIG_DEBUG_PAGE_REF
+
+/*
+ * Ideally we would want to use the trace_<tracepoint>_enabled() helper
+ * functions. But due to include header file issues, that is not
+ * feasible. Instead we have to open code the static key functions.
+ *
+ * See trace_##name##_enabled(void) in include/linux/tracepoint.h
+ */
+#define page_ref_tracepoint_active(t) static_key_false(&(t).key)
+
+extern void __page_ref_set(struct page *page, int v);
+extern void __page_ref_mod(struct page *page, int v);
+extern void __page_ref_mod_and_test(struct page *page, int v, int ret);
+extern void __page_ref_mod_and_return(struct page *page, int v, int ret);
+extern void __page_ref_mod_unless(struct page *page, int v, int u);
+extern void __page_ref_freeze(struct page *page, int v, int ret);
+extern void __page_ref_unfreeze(struct page *page, int v);
+
+#else
+
+#define page_ref_tracepoint_active(t) false
+
+static inline void __page_ref_set(struct page *page, int v)
+{
+}
+static inline void __page_ref_mod(struct page *page, int v)
+{
+}
+static inline void __page_ref_mod_and_test(struct page *page, int v, int ret)
+{
+}
+static inline void __page_ref_mod_and_return(struct page *page, int v, int ret)
+{
+}
+static inline void __page_ref_mod_unless(struct page *page, int v, int u)
+{
+}
+static inline void __page_ref_freeze(struct page *page, int v, int ret)
+{
+}
+static inline void __page_ref_unfreeze(struct page *page, int v)
+{
+}
+
+#endif
 
 static inline int page_ref_count(struct page *page)
 {
@@ -18,6 +74,8 @@ static inline int page_count(struct page *page)
 static inline void set_page_count(struct page *page, int v)
 {
 	atomic_set(&page->_count, v);
+	if (page_ref_tracepoint_active(__tracepoint_page_ref_set))
+		__page_ref_set(page, v);
 }
 
 /*
@@ -32,46 +90,74 @@ static inline void init_page_count(struct page *page)
 static inline void page_ref_add(struct page *page, int nr)
 {
 	atomic_add(nr, &page->_count);
+	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
+		__page_ref_mod(page, nr);
 }
 
 static inline void page_ref_sub(struct page *page, int nr)
 {
 	atomic_sub(nr, &page->_count);
+	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
+		__page_ref_mod(page, -nr);
 }
 
 static inline void page_ref_inc(struct page *page)
 {
 	atomic_inc(&page->_count);
+	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
+		__page_ref_mod(page, 1);
 }
 
 static inline void page_ref_dec(struct page *page)
 {
 	atomic_dec(&page->_count);
+	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod))
+		__page_ref_mod(page, -1);
 }
 
 static inline int page_ref_sub_and_test(struct page *page, int nr)
 {
-	return atomic_sub_and_test(nr, &page->_count);
+	int ret = atomic_sub_and_test(nr, &page->_count);
+
+	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_test))
+		__page_ref_mod_and_test(page, -nr, ret);
+	return ret;
 }
 
 static inline int page_ref_dec_and_test(struct page *page)
 {
-	return atomic_dec_and_test(&page->_count);
+	int ret = atomic_dec_and_test(&page->_count);
+
+	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_test))
+		__page_ref_mod_and_test(page, -1, ret);
+	return ret;
 }
 
 static inline int page_ref_dec_return(struct page *page)
 {
-	return atomic_dec_return(&page->_count);
+	int ret = atomic_dec_return(&page->_count);
+
+	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return))
+		__page_ref_mod_and_return(page, -1, ret);
+	return ret;
 }
 
 static inline int page_ref_add_unless(struct page *page, int nr, int u)
 {
-	return atomic_add_unless(&page->_count, nr, u);
+	int ret = atomic_add_unless(&page->_count, nr, u);
+
+	if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_unless))
+		__page_ref_mod_unless(page, nr, ret);
+	return ret;
 }
 
 static inline int page_ref_freeze(struct page *page, int count)
 {
-	return likely(atomic_cmpxchg(&page->_count, count, 0) == count);
+	int ret = likely(atomic_cmpxchg(&page->_count, count, 0) == count);
+
+	if (page_ref_tracepoint_active(__tracepoint_page_ref_freeze))
+		__page_ref_freeze(page, count, ret);
+	return ret;
 }
 
 static inline void page_ref_unfreeze(struct page *page, int count)
@@ -80,6 +166,8 @@ static inline void page_ref_unfreeze(struct page *page, int count)
 	VM_BUG_ON(count == 0);
 
 	atomic_set(&page->_count, count);
+	if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze))
+		__page_ref_unfreeze(page, count);
 }
 
 #endif
diff --git a/include/trace/events/page_ref.h b/include/trace/events/page_ref.h
new file mode 100644
index 000000000000..81001f8b0db4
--- /dev/null
+++ b/include/trace/events/page_ref.h
@@ -0,0 +1,134 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM page_ref
+
+#if !defined(_TRACE_PAGE_REF_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PAGE_REF_H
+
+#include <linux/types.h>
+#include <linux/page_ref.h>
+#include <linux/tracepoint.h>
+#include <trace/events/mmflags.h>
+
+DECLARE_EVENT_CLASS(page_ref_mod_template,
+
+	TP_PROTO(struct page *page, int v),
+
+	TP_ARGS(page, v),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, pfn)
+		__field(unsigned long, flags)
+		__field(int, count)
+		__field(int, mapcount)
+		__field(void *, mapping)
+		__field(int, mt)
+		__field(int, val)
+	),
+
+	TP_fast_assign(
+		__entry->pfn = page_to_pfn(page);
+		__entry->flags = page->flags;
+		__entry->count = page_ref_count(page);
+		__entry->mapcount = page_mapcount(page);
+		__entry->mapping = page->mapping;
+		__entry->mt = get_pageblock_migratetype(page);
+		__entry->val = v;
+	),
+
+	TP_printk("pfn=0x%lx flags=%s count=%d mapcount=%d mapping=%p mt=%d val=%d",
+		__entry->pfn,
+		show_page_flags(__entry->flags & ((1UL << NR_PAGEFLAGS) - 1)),
+		__entry->count,
+		__entry->mapcount, __entry->mapping, __entry->mt,
+		__entry->val)
+);
+
+DEFINE_EVENT(page_ref_mod_template, page_ref_set,
+
+	TP_PROTO(struct page *page, int v),
+
+	TP_ARGS(page, v)
+);
+
+DEFINE_EVENT(page_ref_mod_template, page_ref_mod,
+
+	TP_PROTO(struct page *page, int v),
+
+	TP_ARGS(page, v)
+);
+
+DECLARE_EVENT_CLASS(page_ref_mod_and_test_template,
+
+	TP_PROTO(struct page *page, int v, int ret),
+
+	TP_ARGS(page, v, ret),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, pfn)
+		__field(unsigned long, flags)
+		__field(int, count)
+		__field(int, mapcount)
+		__field(void *, mapping)
+		__field(int, mt)
+		__field(int, val)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->pfn = page_to_pfn(page);
+		__entry->flags = page->flags;
+		__entry->count = page_ref_count(page);
+		__entry->mapcount = page_mapcount(page);
+		__entry->mapping = page->mapping;
+		__entry->mt = get_pageblock_migratetype(page);
+		__entry->val = v;
+		__entry->ret = ret;
+	),
+
+	TP_printk("pfn=0x%lx flags=%s count=%d mapcount=%d mapping=%p mt=%d val=%d ret=%d",
+		__entry->pfn,
+		show_page_flags(__entry->flags & ((1UL << NR_PAGEFLAGS) - 1)),
+		__entry->count,
+		__entry->mapcount, __entry->mapping, __entry->mt,
+		__entry->val, __entry->ret)
+);
+
+DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_mod_and_test,
+
+	TP_PROTO(struct page *page, int v, int ret),
+
+	TP_ARGS(page, v, ret)
+);
+
+DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_mod_and_return,
+
+	TP_PROTO(struct page *page, int v, int ret),
+
+	TP_ARGS(page, v, ret)
+);
+
+DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_mod_unless,
+
+	TP_PROTO(struct page *page, int v, int ret),
+
+	TP_ARGS(page, v, ret)
+);
+
+DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_freeze,
+
+	TP_PROTO(struct page *page, int v, int ret),
+
+	TP_ARGS(page, v, ret)
+);
+
+DEFINE_EVENT(page_ref_mod_template, page_ref_unfreeze,
+
+	TP_PROTO(struct page *page, int v),
+
+	TP_ARGS(page, v)
+);
+
+#endif /* _TRACE_PAGE_COUNT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 5c50b238b770..22f4cd96acb0 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -79,3 +79,16 @@ config PAGE_POISONING_ZERO
 	   Enabling page poisoning with this option will disable hibernation
 
 	   If unsure, say N
+	bool
+
+config DEBUG_PAGE_REF
+	bool "Enable tracepoint to track down page reference manipulation"
+	depends on DEBUG_KERNEL
+	depends on TRACEPOINTS
+	---help---
+	  This is a feature to add tracepoint for tracking down page reference
+	  manipulation. This tracking is useful to diagnose functional failure
+	  due to migration failures caused by page reference mismatches.  Be
+	  careful when enabling this feature because it adds about 30 KB to the
+	  kernel code.  However the runtime performance overhead is virtually
+	  nil until the tracepoints are actually enabled.
diff --git a/mm/Makefile b/mm/Makefile
index cfdd481d27a5..6da300a1414b 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -81,3 +81,4 @@ obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
 obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
 obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
 obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
+obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
diff --git a/mm/debug_page_ref.c b/mm/debug_page_ref.c
new file mode 100644
index 000000000000..1aef3d562e52
--- /dev/null
+++ b/mm/debug_page_ref.c
@@ -0,0 +1,54 @@
+#include <linux/mm_types.h>
+#include <linux/tracepoint.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/page_ref.h>
+
+void __page_ref_set(struct page *page, int v)
+{
+	trace_page_ref_set(page, v);
+}
+EXPORT_SYMBOL(__page_ref_set);
+EXPORT_TRACEPOINT_SYMBOL(page_ref_set);
+
+void __page_ref_mod(struct page *page, int v)
+{
+	trace_page_ref_mod(page, v);
+}
+EXPORT_SYMBOL(__page_ref_mod);
+EXPORT_TRACEPOINT_SYMBOL(page_ref_mod);
+
+void __page_ref_mod_and_test(struct page *page, int v, int ret)
+{
+	trace_page_ref_mod_and_test(page, v, ret);
+}
+EXPORT_SYMBOL(__page_ref_mod_and_test);
+EXPORT_TRACEPOINT_SYMBOL(page_ref_mod_and_test);
+
+void __page_ref_mod_and_return(struct page *page, int v, int ret)
+{
+	trace_page_ref_mod_and_return(page, v, ret);
+}
+EXPORT_SYMBOL(__page_ref_mod_and_return);
+EXPORT_TRACEPOINT_SYMBOL(page_ref_mod_and_return);
+
+void __page_ref_mod_unless(struct page *page, int v, int u)
+{
+	trace_page_ref_mod_unless(page, v, u);
+}
+EXPORT_SYMBOL(__page_ref_mod_unless);
+EXPORT_TRACEPOINT_SYMBOL(page_ref_mod_unless);
+
+void __page_ref_freeze(struct page *page, int v, int ret)
+{
+	trace_page_ref_freeze(page, v, ret);
+}
+EXPORT_SYMBOL(__page_ref_freeze);
+EXPORT_TRACEPOINT_SYMBOL(page_ref_freeze);
+
+void __page_ref_unfreeze(struct page *page, int v)
+{
+	trace_page_ref_unfreeze(page, v);
+}
+EXPORT_SYMBOL(__page_ref_unfreeze);
+EXPORT_TRACEPOINT_SYMBOL(page_ref_unfreeze);
-- 
cgit v1.2.3


From b11a7b94100cba5ec926a181894c2897a22651b9 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 17 Mar 2016 14:19:41 -0700
Subject: mm: exclude ZONE_DEVICE from GFP_ZONE_TABLE

ZONE_DEVICE (merged in 4.3) and ZONE_CMA (proposed) are examples of new
mm zones that are bumping up against the current maximum limit of 4
zones, i.e.  2 bits in page->flags for the GFP_ZONE_TABLE.

The GFP_ZONE_TABLE poses an interesting constraint since
include/linux/gfp.h gets included by the 32-bit portion of a 64-bit
build.  We need to be careful to only build the table for zones that
have a corresponding gfp_t flag.  GFP_ZONES_SHIFT is introduced for this
purpose.  This patch does not attempt to solve the problem of adding a
new zone that also has a corresponding GFP_ flag.

Vlastimil points out that ZONE_DEVICE, by depending on x86_64 and
SPARSEMEM_VMEMMAP implies that SECTIONS_WIDTH is zero.  In other words
even though ZONE_DEVICE does not fit in GFP_ZONE_TABLE it is free to
consume another bit in page->flags (expand ZONES_WIDTH) with room to
spare.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=110931
Fixes: 033fbae988fc ("mm: ZONE_DEVICE for "device memory"")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Mark <markk@clara.co.uk>
Reported-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h               | 33 ++++++++++++++++++++-------------
 include/linux/page-flags-layout.h |  2 ++
 mm/Kconfig                        |  2 --
 3 files changed, 22 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 11d56c6e7ef2..570383a41853 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -331,22 +331,29 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
  *       0xe    => BAD (MOVABLE+DMA32+HIGHMEM)
  *       0xf    => BAD (MOVABLE+DMA32+HIGHMEM+DMA)
  *
- * ZONES_SHIFT must be <= 2 on 32 bit platforms.
+ * GFP_ZONES_SHIFT must be <= 2 on 32 bit platforms.
  */
 
-#if 16 * ZONES_SHIFT > BITS_PER_LONG
-#error ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
+#if defined(CONFIG_ZONE_DEVICE) && (MAX_NR_ZONES-1) <= 4
+/* ZONE_DEVICE is not a valid GFP zone specifier */
+#define GFP_ZONES_SHIFT 2
+#else
+#define GFP_ZONES_SHIFT ZONES_SHIFT
+#endif
+
+#if 16 * GFP_ZONES_SHIFT > BITS_PER_LONG
+#error GFP_ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
 #endif
 
 #define GFP_ZONE_TABLE ( \
-	(ZONE_NORMAL << 0 * ZONES_SHIFT)				      \
-	| (OPT_ZONE_DMA << ___GFP_DMA * ZONES_SHIFT)			      \
-	| (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * ZONES_SHIFT)		      \
-	| (OPT_ZONE_DMA32 << ___GFP_DMA32 * ZONES_SHIFT)		      \
-	| (ZONE_NORMAL << ___GFP_MOVABLE * ZONES_SHIFT)			      \
-	| (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * ZONES_SHIFT)	      \
-	| (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * ZONES_SHIFT)   \
-	| (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * ZONES_SHIFT)   \
+	(ZONE_NORMAL << 0 * GFP_ZONES_SHIFT)				       \
+	| (OPT_ZONE_DMA << ___GFP_DMA * GFP_ZONES_SHIFT)		       \
+	| (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * GFP_ZONES_SHIFT)	       \
+	| (OPT_ZONE_DMA32 << ___GFP_DMA32 * GFP_ZONES_SHIFT)		       \
+	| (ZONE_NORMAL << ___GFP_MOVABLE * GFP_ZONES_SHIFT)		       \
+	| (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * GFP_ZONES_SHIFT)    \
+	| (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * GFP_ZONES_SHIFT)\
+	| (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * GFP_ZONES_SHIFT)\
 )
 
 /*
@@ -371,8 +378,8 @@ static inline enum zone_type gfp_zone(gfp_t flags)
 	enum zone_type z;
 	int bit = (__force int) (flags & GFP_ZONEMASK);
 
-	z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
-					 ((1 << ZONES_SHIFT) - 1);
+	z = (GFP_ZONE_TABLE >> (bit * GFP_ZONES_SHIFT)) &
+					 ((1 << GFP_ZONES_SHIFT) - 1);
 	VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
 	return z;
 }
diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
index da523661500a..77b078c103b2 100644
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -17,6 +17,8 @@
 #define ZONES_SHIFT 1
 #elif MAX_NR_ZONES <= 4
 #define ZONES_SHIFT 2
+#elif MAX_NR_ZONES <= 8
+#define ZONES_SHIFT 3
 #else
 #error ZONES_SHIFT -- too many zones configured adjust calculation
 #endif
diff --git a/mm/Kconfig b/mm/Kconfig
index c07776503708..520dae6e6ed0 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -651,8 +651,6 @@ config IDLE_PAGE_TRACKING
 
 config ZONE_DEVICE
 	bool "Device memory (pmem, etc...) hotplug support" if EXPERT
-	default !ZONE_DMA
-	depends on !ZONE_DMA
 	depends on MEMORY_HOTPLUG
 	depends on MEMORY_HOTREMOVE
 	depends on X86_64 #arch_add_memory() comprehends device memory
-- 
cgit v1.2.3


From 0e8fb9312fbaf1a687dd731b04d8ab3121c4ff5a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 17 Mar 2016 14:19:55 -0700
Subject: mm: remove VM_FAULT_MINOR

The define has a comment from Nick Piggin from 2007:

 /* For backwards compat. Remove me quickly. */

I guess 9 years should not be too hurried sense of 'quickly' even for
kernel measures.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/fault.c       | 2 +-
 arch/arm64/mm/fault.c     | 2 +-
 arch/unicore32/mm/fault.c | 2 +-
 arch/xtensa/mm/fault.c    | 2 +-
 include/linux/mm.h        | 2 --
 5 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index daafcf121ce0..ad5841856007 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -346,7 +346,7 @@ retry:
 	up_read(&mm->mmap_sem);
 
 	/*
-	 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
+	 * Handle the "normal" case first - VM_FAULT_MAJOR
 	 */
 	if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
 		return 0;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index abe2a9542b3a..97135b61b32a 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -295,7 +295,7 @@ retry:
 	up_read(&mm->mmap_sem);
 
 	/*
-	 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
+	 * Handle the "normal" case first - VM_FAULT_MAJOR
 	 */
 	if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
 			      VM_FAULT_BADACCESS))))
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
index afccef5529cc..2ec3d3adcefc 100644
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -276,7 +276,7 @@ retry:
 	up_read(&mm->mmap_sem);
 
 	/*
-	 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
+	 * Handle the "normal" case first - VM_FAULT_MAJOR
 	 */
 	if (likely(!(fault &
 	       (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index c9784c1b18d8..7f4a1fdb1502 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -146,7 +146,7 @@ good_area:
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 	if (flags & VM_FAULT_MAJOR)
 		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
-	else if (flags & VM_FAULT_MINOR)
+	else
 		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
 
 	return;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 997fc2e5d9d8..7d42501c8bb4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1052,8 +1052,6 @@ static inline void clear_page_pfmemalloc(struct page *page)
  * just gets major/minor fault counters bumped up.
  */
 
-#define VM_FAULT_MINOR	0 /* For backwards compat. Remove me quickly. */
-
 #define VM_FAULT_OOM	0x0001
 #define VM_FAULT_SIGBUS	0x0002
 #define VM_FAULT_MAJOR	0x0004
-- 
cgit v1.2.3


From b97731992d00f09456726bfc5ab6641c07773038 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 17 Mar 2016 14:20:01 -0700
Subject: rmap: introduce rmap_walk_locked()

This patchset rewrites freeze_page() and unfreeze_page() using
try_to_unmap() and remove_migration_ptes().  Result is much simpler, but
somewhat slower.

Migration 8GiB worth of PMD-mapped THP:

  Baseline	20.21 +/- 0.393
  Patched	20.73 +/- 0.082
  Slowdown	1.03x

It's 3% slower, comparing to 14% in v1.  I don't it should be a stopper.

Splitting of PTE-mapped pages slowed more.  But this is not a common
case.

Migration 8GiB worth of PMD-mapped THP:

  Baseline	20.39 +/- 0.225
  Patched	22.43 +/- 0.496
  Slowdown	1.10x

rmap_walk_locked() is the same as rmap_walk(), but the caller takes care
of the relevant rmap lock.

This is preparation for switching THP splitting from custom rmap walk in
freeze_page()/unfreeze_page() to the generic one.

There is no support for KSM pages for now: not clear which lock is
implied.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h |  1 +
 mm/rmap.c            | 41 ++++++++++++++++++++++++++++++++---------
 2 files changed, 33 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index a07f42bedda3..a5875e9b4a27 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -266,6 +266,7 @@ struct rmap_walk_control {
 };
 
 int rmap_walk(struct page *page, struct rmap_walk_control *rwc);
+int rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc);
 
 #else	/* !CONFIG_MMU */
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 02f0bfc3c80a..30b739ce0ffa 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1715,14 +1715,21 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page,
  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
  * LOCKED.
  */
-static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
+static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
+		bool locked)
 {
 	struct anon_vma *anon_vma;
 	pgoff_t pgoff;
 	struct anon_vma_chain *avc;
 	int ret = SWAP_AGAIN;
 
-	anon_vma = rmap_walk_anon_lock(page, rwc);
+	if (locked) {
+		anon_vma = page_anon_vma(page);
+		/* anon_vma disappear under us? */
+		VM_BUG_ON_PAGE(!anon_vma, page);
+	} else {
+		anon_vma = rmap_walk_anon_lock(page, rwc);
+	}
 	if (!anon_vma)
 		return ret;
 
@@ -1742,7 +1749,9 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
 		if (rwc->done && rwc->done(page))
 			break;
 	}
-	anon_vma_unlock_read(anon_vma);
+
+	if (!locked)
+		anon_vma_unlock_read(anon_vma);
 	return ret;
 }
 
@@ -1759,9 +1768,10 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
  * LOCKED.
  */
-static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
+static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
+		bool locked)
 {
-	struct address_space *mapping = page->mapping;
+	struct address_space *mapping = page_mapping(page);
 	pgoff_t pgoff;
 	struct vm_area_struct *vma;
 	int ret = SWAP_AGAIN;
@@ -1778,7 +1788,8 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
 		return ret;
 
 	pgoff = page_to_pgoff(page);
-	i_mmap_lock_read(mapping);
+	if (!locked)
+		i_mmap_lock_read(mapping);
 	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
 
@@ -1795,7 +1806,8 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
 	}
 
 done:
-	i_mmap_unlock_read(mapping);
+	if (!locked)
+		i_mmap_unlock_read(mapping);
 	return ret;
 }
 
@@ -1804,9 +1816,20 @@ int rmap_walk(struct page *page, struct rmap_walk_control *rwc)
 	if (unlikely(PageKsm(page)))
 		return rmap_walk_ksm(page, rwc);
 	else if (PageAnon(page))
-		return rmap_walk_anon(page, rwc);
+		return rmap_walk_anon(page, rwc, false);
+	else
+		return rmap_walk_file(page, rwc, false);
+}
+
+/* Like rmap_walk, but caller holds relevant rmap lock */
+int rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc)
+{
+	/* no ksm support for now */
+	VM_BUG_ON_PAGE(PageKsm(page), page);
+	if (PageAnon(page))
+		return rmap_walk_anon(page, rwc, true);
 	else
-		return rmap_walk_file(page, rwc);
+		return rmap_walk_file(page, rwc, true);
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
-- 
cgit v1.2.3


From 2a52bcbcc688eecead2953143f7ef695b8e44575 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 17 Mar 2016 14:20:04 -0700
Subject: rmap: extend try_to_unmap() to be usable by split_huge_page()

Add support for two ttu_flags:

  - TTU_SPLIT_HUGE_PMD would split PMD if it's there, before trying to
    unmap page;

  - TTU_RMAP_LOCKED indicates that caller holds relevant rmap lock;

Also, change rwc->done to !page_mapcount() instead of !page_mapped().
try_to_unmap() works on pte level, so we are really interested in the
mappedness of this small page rather than of the compound page it's a
part of.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h |  7 +++++++
 include/linux/rmap.h    |  3 +++
 mm/huge_memory.c        |  5 +----
 mm/rmap.c               | 24 ++++++++++++++++--------
 4 files changed, 27 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a4cecb4801ec..01ad22e938b0 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -106,6 +106,9 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			__split_huge_pmd(__vma, __pmd, __address);	\
 	}  while (0)
 
+
+void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address);
+
 #if HPAGE_PMD_ORDER >= MAX_ORDER
 #error "hugepages can't be allocated by the buddy allocator"
 #endif
@@ -173,6 +176,10 @@ static inline int split_huge_page(struct page *page)
 static inline void deferred_split_huge_page(struct page *page) {}
 #define split_huge_pmd(__vma, __pmd, __address)	\
 	do { } while (0)
+
+static inline void split_huge_pmd_address(struct vm_area_struct *vma,
+		unsigned long address) {}
+
 static inline int hugepage_madvise(struct vm_area_struct *vma,
 				   unsigned long *vm_flags, int advice)
 {
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index a5875e9b4a27..3d975e2252d4 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -86,6 +86,7 @@ enum ttu_flags {
 	TTU_MIGRATION = 2,		/* migration mode */
 	TTU_MUNLOCK = 4,		/* munlock mode */
 	TTU_LZFREE = 8,			/* lazy free mode */
+	TTU_SPLIT_HUGE_PMD = 16,	/* split huge PMD if any */
 
 	TTU_IGNORE_MLOCK = (1 << 8),	/* ignore mlock */
 	TTU_IGNORE_ACCESS = (1 << 9),	/* don't age */
@@ -93,6 +94,8 @@ enum ttu_flags {
 	TTU_BATCH_FLUSH = (1 << 11),	/* Batch TLB flushes where possible
 					 * and caller guarantees they will
 					 * do a final flush if necessary */
+	TTU_RMAP_LOCKED = (1 << 12)	/* do not grab rmap lock:
+					 * caller holds it */
 };
 
 #ifdef CONFIG_MMU
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e1a177c20791..11d15674ff38 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3006,15 +3006,12 @@ out:
 	}
 }
 
-static void split_huge_pmd_address(struct vm_area_struct *vma,
-				    unsigned long address)
+void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 
-	VM_BUG_ON(!(address & ~HPAGE_PMD_MASK));
-
 	pgd = pgd_offset(vma->vm_mm, address);
 	if (!pgd_present(*pgd))
 		return;
diff --git a/mm/rmap.c b/mm/rmap.c
index 30b739ce0ffa..945933a01010 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1431,6 +1431,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
 		goto out;
 
+	if (flags & TTU_SPLIT_HUGE_PMD)
+		split_huge_pmd_address(vma, address);
 	pte = page_check_address(page, mm, address, &ptl, 0);
 	if (!pte)
 		goto out;
@@ -1576,10 +1578,10 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
 	return is_vma_temporary_stack(vma);
 }
 
-static int page_not_mapped(struct page *page)
+static int page_mapcount_is_zero(struct page *page)
 {
-	return !page_mapped(page);
-};
+	return !page_mapcount(page);
+}
 
 /**
  * try_to_unmap - try to remove all page table mappings to a page
@@ -1606,12 +1608,10 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
 	struct rmap_walk_control rwc = {
 		.rmap_one = try_to_unmap_one,
 		.arg = &rp,
-		.done = page_not_mapped,
+		.done = page_mapcount_is_zero,
 		.anon_lock = page_lock_anon_vma_read,
 	};
 
-	VM_BUG_ON_PAGE(!PageHuge(page) && PageTransHuge(page), page);
-
 	/*
 	 * During exec, a temporary VMA is setup and later moved.
 	 * The VMA is moved under the anon_vma lock but not the
@@ -1623,9 +1623,12 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
 	if ((flags & TTU_MIGRATION) && !PageKsm(page) && PageAnon(page))
 		rwc.invalid_vma = invalid_migration_vma;
 
-	ret = rmap_walk(page, &rwc);
+	if (flags & TTU_RMAP_LOCKED)
+		ret = rmap_walk_locked(page, &rwc);
+	else
+		ret = rmap_walk(page, &rwc);
 
-	if (ret != SWAP_MLOCK && !page_mapped(page)) {
+	if (ret != SWAP_MLOCK && !page_mapcount(page)) {
 		ret = SWAP_SUCCESS;
 		if (rp.lazyfreed && !PageDirty(page))
 			ret = SWAP_LZFREE;
@@ -1633,6 +1636,11 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
 	return ret;
 }
 
+static int page_not_mapped(struct page *page)
+{
+	return !page_mapped(page);
+};
+
 /**
  * try_to_munlock - try to munlock a page
  * @page: the page to be munlocked
-- 
cgit v1.2.3


From e388466de4a2a1a50c43bfaeacc0c8254d9e7cb2 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 17 Mar 2016 14:20:07 -0700
Subject: mm: make remove_migration_ptes() beyond mm/migration.c

Make remove_migration_ptes() available to be used in split_huge_page().

New parameter 'locked' added: as with try_to_umap() we need a way to
indicate that caller holds rmap lock.

We also shouldn't try to mlock() pte-mapped huge pages: pte-mapeed THP
pages are never mlocked.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h |  2 ++
 mm/migrate.c         | 15 +++++++++------
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 3d975e2252d4..49eb4f8ebac9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -243,6 +243,8 @@ int page_mkclean(struct page *);
  */
 int try_to_munlock(struct page *);
 
+void remove_migration_ptes(struct page *old, struct page *new, bool locked);
+
 /*
  * Called by memory-failure.c to kill processes.
  */
diff --git a/mm/migrate.c b/mm/migrate.c
index 577c94b8e959..6c822a7b27e0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -172,7 +172,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 	else
 		page_add_file_rmap(new);
 
-	if (vma->vm_flags & VM_LOCKED)
+	if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
 		mlock_vma_page(new);
 
 	/* No need to invalidate - it was non-present before */
@@ -187,14 +187,17 @@ out:
  * Get rid of all migration entries and replace them by
  * references to the indicated page.
  */
-static void remove_migration_ptes(struct page *old, struct page *new)
+void remove_migration_ptes(struct page *old, struct page *new, bool locked)
 {
 	struct rmap_walk_control rwc = {
 		.rmap_one = remove_migration_pte,
 		.arg = old,
 	};
 
-	rmap_walk(new, &rwc);
+	if (locked)
+		rmap_walk_locked(new, &rwc);
+	else
+		rmap_walk(new, &rwc);
 }
 
 /*
@@ -702,7 +705,7 @@ static int writeout(struct address_space *mapping, struct page *page)
 	 * At this point we know that the migration attempt cannot
 	 * be successful.
 	 */
-	remove_migration_ptes(page, page);
+	remove_migration_ptes(page, page, false);
 
 	rc = mapping->a_ops->writepage(page, &wbc);
 
@@ -900,7 +903,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 
 	if (page_was_mapped)
 		remove_migration_ptes(page,
-			rc == MIGRATEPAGE_SUCCESS ? newpage : page);
+			rc == MIGRATEPAGE_SUCCESS ? newpage : page, false);
 
 out_unlock_both:
 	unlock_page(newpage);
@@ -1070,7 +1073,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 
 	if (page_was_mapped)
 		remove_migration_ptes(hpage,
-			rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage);
+			rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
 
 	unlock_page(new_hpage);
 
-- 
cgit v1.2.3


From fec89c109f3a7737fe3a7bf0f40d1fb7709d353b Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 17 Mar 2016 14:20:10 -0700
Subject: thp: rewrite freeze_page()/unfreeze_page() with generic rmap walkers

freeze_page() and unfreeze_page() helpers evolved in rather complex
beasts.  It would be nice to cut complexity of this code.

This patch rewrites freeze_page() using standard try_to_unmap().
unfreeze_page() is rewritten with remove_migration_ptes().

The result is much simpler.

But the new variant is somewhat slower for PTE-mapped THPs.  Current
helpers iterates over VMAs the compound page is mapped to, and then over
ptes within this VMA.  New helpers iterates over small page, then over
VMA the small page mapped to, and only then find relevant pte.

We have short cut for PMD-mapped THP: we directly install migration
entries on PMD split.

I don't think the slowdown is critical, considering how much simpler
result is and that split_huge_page() is quite rare nowadays.  It only
happens due memory pressure or migration.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h |  10 ++-
 mm/huge_memory.c        | 210 +++++++++---------------------------------------
 mm/rmap.c               |  10 ++-
 3 files changed, 50 insertions(+), 180 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 01ad22e938b0..5307dfb3f8ec 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -96,18 +96,20 @@ static inline int split_huge_page(struct page *page)
 void deferred_split_huge_page(struct page *page);
 
 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-		unsigned long address);
+		unsigned long address, bool freeze);
 
 #define split_huge_pmd(__vma, __pmd, __address)				\
 	do {								\
 		pmd_t *____pmd = (__pmd);				\
 		if (pmd_trans_huge(*____pmd)				\
 					|| pmd_devmap(*____pmd))	\
-			__split_huge_pmd(__vma, __pmd, __address);	\
+			__split_huge_pmd(__vma, __pmd, __address,	\
+						false);			\
 	}  while (0)
 
 
-void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address);
+void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
+		bool freeze, struct page *page);
 
 #if HPAGE_PMD_ORDER >= MAX_ORDER
 #error "hugepages can't be allocated by the buddy allocator"
@@ -178,7 +180,7 @@ static inline void deferred_split_huge_page(struct page *page) {}
 	do { } while (0)
 
 static inline void split_huge_pmd_address(struct vm_area_struct *vma,
-		unsigned long address) {}
+		unsigned long address, bool freeze, struct page *page) {}
 
 static inline int hugepage_madvise(struct vm_area_struct *vma,
 				   unsigned long *vm_flags, int advice)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 11d15674ff38..4a58fa19afac 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2977,7 +2977,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 }
 
 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-		unsigned long address)
+		unsigned long address, bool freeze)
 {
 	spinlock_t *ptl;
 	struct mm_struct *mm = vma->vm_mm;
@@ -2994,7 +2994,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			page = NULL;
 	} else if (!pmd_devmap(*pmd))
 		goto out;
-	__split_huge_pmd_locked(vma, pmd, haddr, false);
+	__split_huge_pmd_locked(vma, pmd, haddr, freeze);
 out:
 	spin_unlock(ptl);
 	mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE);
@@ -3006,7 +3006,8 @@ out:
 	}
 }
 
-void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address)
+void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
+		bool freeze, struct page *page)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -3023,11 +3024,20 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address)
 	pmd = pmd_offset(pud, address);
 	if (!pmd_present(*pmd) || (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)))
 		return;
+
+	/*
+	 * If caller asks to setup a migration entries, we need a page to check
+	 * pmd against. Otherwise we can end up replacing wrong page.
+	 */
+	VM_BUG_ON(freeze && !page);
+	if (page && page != pmd_page(*pmd))
+		return;
+
 	/*
 	 * Caller holds the mmap_sem write mode, so a huge pmd cannot
 	 * materialize from under us.
 	 */
-	split_huge_pmd(vma, pmd, address);
+	__split_huge_pmd(vma, pmd, address, freeze);
 }
 
 void vma_adjust_trans_huge(struct vm_area_struct *vma,
@@ -3043,7 +3053,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 	if (start & ~HPAGE_PMD_MASK &&
 	    (start & HPAGE_PMD_MASK) >= vma->vm_start &&
 	    (start & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
-		split_huge_pmd_address(vma, start);
+		split_huge_pmd_address(vma, start, false, NULL);
 
 	/*
 	 * If the new end address isn't hpage aligned and it could
@@ -3053,7 +3063,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 	if (end & ~HPAGE_PMD_MASK &&
 	    (end & HPAGE_PMD_MASK) >= vma->vm_start &&
 	    (end & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
-		split_huge_pmd_address(vma, end);
+		split_huge_pmd_address(vma, end, false, NULL);
 
 	/*
 	 * If we're also updating the vma->vm_next->vm_start, if the new
@@ -3067,184 +3077,36 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 		if (nstart & ~HPAGE_PMD_MASK &&
 		    (nstart & HPAGE_PMD_MASK) >= next->vm_start &&
 		    (nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end)
-			split_huge_pmd_address(next, nstart);
+			split_huge_pmd_address(next, nstart, false, NULL);
 	}
 }
 
-static void freeze_page_vma(struct vm_area_struct *vma, struct page *page,
-		unsigned long address)
+static void freeze_page(struct page *page)
 {
-	unsigned long haddr = address & HPAGE_PMD_MASK;
-	spinlock_t *ptl;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	int i, nr = HPAGE_PMD_NR;
-
-	/* Skip pages which doesn't belong to the VMA */
-	if (address < vma->vm_start) {
-		int off = (vma->vm_start - address) >> PAGE_SHIFT;
-		page += off;
-		nr -= off;
-		address = vma->vm_start;
-	}
-
-	pgd = pgd_offset(vma->vm_mm, address);
-	if (!pgd_present(*pgd))
-		return;
-	pud = pud_offset(pgd, address);
-	if (!pud_present(*pud))
-		return;
-	pmd = pmd_offset(pud, address);
-	ptl = pmd_lock(vma->vm_mm, pmd);
-	if (!pmd_present(*pmd)) {
-		spin_unlock(ptl);
-		return;
-	}
-	if (pmd_trans_huge(*pmd)) {
-		if (page == pmd_page(*pmd))
-			__split_huge_pmd_locked(vma, pmd, haddr, true);
-		spin_unlock(ptl);
-		return;
-	}
-	spin_unlock(ptl);
-
-	pte = pte_offset_map_lock(vma->vm_mm, pmd, address, &ptl);
-	for (i = 0; i < nr; i++, address += PAGE_SIZE, page++, pte++) {
-		pte_t entry, swp_pte;
-		swp_entry_t swp_entry;
-
-		/*
-		 * We've just crossed page table boundary: need to map next one.
-		 * It can happen if THP was mremaped to non PMD-aligned address.
-		 */
-		if (unlikely(address == haddr + HPAGE_PMD_SIZE)) {
-			pte_unmap_unlock(pte - 1, ptl);
-			pmd = mm_find_pmd(vma->vm_mm, address);
-			if (!pmd)
-				return;
-			pte = pte_offset_map_lock(vma->vm_mm, pmd,
-					address, &ptl);
-		}
-
-		if (!pte_present(*pte))
-			continue;
-		if (page_to_pfn(page) != pte_pfn(*pte))
-			continue;
-		flush_cache_page(vma, address, page_to_pfn(page));
-		entry = ptep_clear_flush(vma, address, pte);
-		if (pte_dirty(entry))
-			SetPageDirty(page);
-		swp_entry = make_migration_entry(page, pte_write(entry));
-		swp_pte = swp_entry_to_pte(swp_entry);
-		if (pte_soft_dirty(entry))
-			swp_pte = pte_swp_mksoft_dirty(swp_pte);
-		set_pte_at(vma->vm_mm, address, pte, swp_pte);
-		page_remove_rmap(page, false);
-		put_page(page);
-	}
-	pte_unmap_unlock(pte - 1, ptl);
-}
-
-static void freeze_page(struct anon_vma *anon_vma, struct page *page)
-{
-	struct anon_vma_chain *avc;
-	pgoff_t pgoff = page_to_pgoff(page);
+	enum ttu_flags ttu_flags = TTU_MIGRATION | TTU_IGNORE_MLOCK |
+		TTU_IGNORE_ACCESS | TTU_RMAP_LOCKED;
+	int i, ret;
 
 	VM_BUG_ON_PAGE(!PageHead(page), page);
 
-	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff,
-			pgoff + HPAGE_PMD_NR - 1) {
-		unsigned long address = __vma_address(page, avc->vma);
-
-		mmu_notifier_invalidate_range_start(avc->vma->vm_mm,
-				address, address + HPAGE_PMD_SIZE);
-		freeze_page_vma(avc->vma, page, address);
-		mmu_notifier_invalidate_range_end(avc->vma->vm_mm,
-				address, address + HPAGE_PMD_SIZE);
-	}
-}
-
-static void unfreeze_page_vma(struct vm_area_struct *vma, struct page *page,
-		unsigned long address)
-{
-	spinlock_t *ptl;
-	pmd_t *pmd;
-	pte_t *pte, entry;
-	swp_entry_t swp_entry;
-	unsigned long haddr = address & HPAGE_PMD_MASK;
-	int i, nr = HPAGE_PMD_NR;
-
-	/* Skip pages which doesn't belong to the VMA */
-	if (address < vma->vm_start) {
-		int off = (vma->vm_start - address) >> PAGE_SHIFT;
-		page += off;
-		nr -= off;
-		address = vma->vm_start;
-	}
-
-	pmd = mm_find_pmd(vma->vm_mm, address);
-	if (!pmd)
-		return;
-
-	pte = pte_offset_map_lock(vma->vm_mm, pmd, address, &ptl);
-	for (i = 0; i < nr; i++, address += PAGE_SIZE, page++, pte++) {
-		/*
-		 * We've just crossed page table boundary: need to map next one.
-		 * It can happen if THP was mremaped to non-PMD aligned address.
-		 */
-		if (unlikely(address == haddr + HPAGE_PMD_SIZE)) {
-			pte_unmap_unlock(pte - 1, ptl);
-			pmd = mm_find_pmd(vma->vm_mm, address);
-			if (!pmd)
-				return;
-			pte = pte_offset_map_lock(vma->vm_mm, pmd,
-					address, &ptl);
-		}
-
-		if (!is_swap_pte(*pte))
-			continue;
-
-		swp_entry = pte_to_swp_entry(*pte);
-		if (!is_migration_entry(swp_entry))
-			continue;
-		if (migration_entry_to_page(swp_entry) != page)
-			continue;
-
-		get_page(page);
-		page_add_anon_rmap(page, vma, address, false);
-
-		entry = pte_mkold(mk_pte(page, vma->vm_page_prot));
-		if (PageDirty(page))
-			entry = pte_mkdirty(entry);
-		if (is_write_migration_entry(swp_entry))
-			entry = maybe_mkwrite(entry, vma);
-
-		flush_dcache_page(page);
-		set_pte_at(vma->vm_mm, address, pte, entry);
+	/* We only need TTU_SPLIT_HUGE_PMD once */
+	ret = try_to_unmap(page, ttu_flags | TTU_SPLIT_HUGE_PMD);
+	for (i = 1; !ret && i < HPAGE_PMD_NR; i++) {
+		/* Cut short if the page is unmapped */
+		if (page_count(page) == 1)
+			return;
 
-		/* No need to invalidate - it was non-present before */
-		update_mmu_cache(vma, address, pte);
+		ret = try_to_unmap(page + i, ttu_flags);
 	}
-	pte_unmap_unlock(pte - 1, ptl);
+	VM_BUG_ON(ret);
 }
 
-static void unfreeze_page(struct anon_vma *anon_vma, struct page *page)
+static void unfreeze_page(struct page *page)
 {
-	struct anon_vma_chain *avc;
-	pgoff_t pgoff = page_to_pgoff(page);
-
-	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
-			pgoff, pgoff + HPAGE_PMD_NR - 1) {
-		unsigned long address = __vma_address(page, avc->vma);
+	int i;
 
-		mmu_notifier_invalidate_range_start(avc->vma->vm_mm,
-				address, address + HPAGE_PMD_SIZE);
-		unfreeze_page_vma(avc->vma, page, address);
-		mmu_notifier_invalidate_range_end(avc->vma->vm_mm,
-				address, address + HPAGE_PMD_SIZE);
-	}
+	for (i = 0; i < HPAGE_PMD_NR; i++)
+		remove_migration_ptes(page + i, page + i, true);
 }
 
 static void __split_huge_page_tail(struct page *head, int tail,
@@ -3322,7 +3184,7 @@ static void __split_huge_page(struct page *page, struct list_head *list)
 	ClearPageCompound(head);
 	spin_unlock_irq(&zone->lru_lock);
 
-	unfreeze_page(page_anon_vma(head), head);
+	unfreeze_page(head);
 
 	for (i = 0; i < HPAGE_PMD_NR; i++) {
 		struct page *subpage = head + i;
@@ -3418,7 +3280,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	}
 
 	mlocked = PageMlocked(page);
-	freeze_page(anon_vma, head);
+	freeze_page(head);
 	VM_BUG_ON_PAGE(compound_mapcount(head), head);
 
 	/* Make sure the page is not on per-CPU pagevec as it takes pin */
@@ -3447,7 +3309,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 		BUG();
 	} else {
 		spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
-		unfreeze_page(anon_vma, head);
+		unfreeze_page(head);
 		ret = -EBUSY;
 	}
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 945933a01010..c399a0d41b31 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1431,8 +1431,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
 		goto out;
 
-	if (flags & TTU_SPLIT_HUGE_PMD)
-		split_huge_pmd_address(vma, address);
+	if (flags & TTU_SPLIT_HUGE_PMD) {
+		split_huge_pmd_address(vma, address,
+				flags & TTU_MIGRATION, page);
+		/* check if we have anything to do after split */
+		if (page_mapcount(page) == 0)
+			goto out;
+	}
+
 	pte = page_check_address(page, mm, address, &ptl, 0);
 	if (!pte)
 		goto out;
-- 
cgit v1.2.3


From da8b44d5a9f8bf26da637b7336508ca534d6b319 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 17 Mar 2016 14:20:51 -0700
Subject: timer: convert timer_slack_ns from unsigned long to u64

This patchset introduces a /proc/<pid>/timerslack_ns interface which
would allow controlling processes to be able to set the timerslack value
on other processes in order to save power by avoiding wakeups (Something
Android currently does via out-of-tree patches).

The first patch tries to fix the internal timer_slack_ns usage which was
defined as a long, which limits the slack range to ~4 seconds on 32bit
systems.  It converts it to a u64, which provides the same basically
unlimited slack (500 years) on both 32bit and 64bit machines.

The second patch introduces the /proc/<pid>/timerslack_ns interface
which allows the full 64bit slack range for a task to be read or set on
both 32bit and 64bit machines.

With these two patches, on a 32bit machine, after setting the slack on
bash to 10 seconds:

$ time sleep 1

real    0m10.747s
user    0m0.001s
sys     0m0.005s

The first patch is a little ugly, since I had to chase the slack delta
arguments through a number of functions converting them to u64s.  Let me
know if it makes sense to break that up more or not.

Other than that things are fairly straightforward.

This patch (of 2):

The timer_slack_ns value in the task struct is currently a unsigned
long.  This means that on 32bit applications, the maximum slack is just
over 4 seconds.  However, on 64bit machines, its much much larger (~500
years).

This disparity could make application development a little (as well as
the default_slack) to a u64.  This means both 32bit and 64bit systems
have the same effective internal slack range.

Now the existing ABI via PR_GET_TIMERSLACK and PR_SET_TIMERSLACK specify
the interface as a unsigned long, so we preserve that limitation on
32bit systems, where SET_TIMERSLACK can only set the slack to a unsigned
long value, and GET_TIMERSLACK will return ULONG_MAX if the slack is
actually larger then what can be stored by an unsigned long.

This patch also modifies hrtimer functions which specified the slack
delta as a unsigned long.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Oren Laadan <orenl@cellrox.com>
Cc: Ruchi Kandoi <kandoiruchi@google.com>
Cc: Rom Lemarchand <romlem@android.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Android Kernel Team <kernel-team@android.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c          |  2 +-
 fs/select.c             |  8 ++++----
 include/linux/freezer.h |  2 +-
 include/linux/hrtimer.h | 12 +++++++-----
 include/linux/poll.h    |  2 +-
 include/linux/sched.h   |  4 ++--
 kernel/sys.c            |  5 ++++-
 kernel/time/hrtimer.c   |  8 ++++----
 kernel/time/timer.c     |  4 ++--
 9 files changed, 26 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cde60741cad2..8a74a2a52e0f 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1616,7 +1616,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 {
 	int res = 0, eavail, timed_out = 0;
 	unsigned long flags;
-	long slack = 0;
+	u64 slack = 0;
 	wait_queue_t wait;
 	ktime_t expires, *to = NULL;
 
diff --git a/fs/select.c b/fs/select.c
index 79d0d4953cad..869293988c2a 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -70,9 +70,9 @@ static long __estimate_accuracy(struct timespec *tv)
 	return slack;
 }
 
-long select_estimate_accuracy(struct timespec *tv)
+u64 select_estimate_accuracy(struct timespec *tv)
 {
-	unsigned long ret;
+	u64 ret;
 	struct timespec now;
 
 	/*
@@ -402,7 +402,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
 	struct poll_wqueues table;
 	poll_table *wait;
 	int retval, i, timed_out = 0;
-	unsigned long slack = 0;
+	u64 slack = 0;
 	unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
 	unsigned long busy_end = 0;
 
@@ -784,7 +784,7 @@ static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
 	poll_table* pt = &wait->pt;
 	ktime_t expire, *to = NULL;
 	int timed_out = 0, count = 0;
-	unsigned long slack = 0;
+	u64 slack = 0;
 	unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
 	unsigned long busy_end = 0;
 
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 6b7fd9cf5ea2..dd03e837ebb7 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -231,7 +231,7 @@ static inline long freezable_schedule_timeout_killable_unsafe(long timeout)
  * call this with locks held.
  */
 static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
-		unsigned long delta, const enum hrtimer_mode mode)
+		u64 delta, const enum hrtimer_mode mode)
 {
 	int __retval;
 	freezer_do_not_count();
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 2ead22dd74a0..c98c6539e2c2 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -220,7 +220,7 @@ static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time
 	timer->node.expires = ktime_add_safe(time, delta);
 }
 
-static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta)
+static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, u64 delta)
 {
 	timer->_softexpires = time;
 	timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta));
@@ -378,7 +378,7 @@ static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
 
 /* Basic timer operations: */
 extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
-			unsigned long range_ns, const enum hrtimer_mode mode);
+				   u64 range_ns, const enum hrtimer_mode mode);
 
 /**
  * hrtimer_start - (re)start an hrtimer on the current CPU
@@ -399,7 +399,7 @@ extern int hrtimer_try_to_cancel(struct hrtimer *timer);
 static inline void hrtimer_start_expires(struct hrtimer *timer,
 					 enum hrtimer_mode mode)
 {
-	unsigned long delta;
+	u64 delta;
 	ktime_t soft, hard;
 	soft = hrtimer_get_softexpires(timer);
 	hard = hrtimer_get_expires(timer);
@@ -477,10 +477,12 @@ extern long hrtimer_nanosleep_restart(struct restart_block *restart_block);
 extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
 				 struct task_struct *tsk);
 
-extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta,
 						const enum hrtimer_mode mode);
 extern int schedule_hrtimeout_range_clock(ktime_t *expires,
-		unsigned long delta, const enum hrtimer_mode mode, int clock);
+					  u64 delta,
+					  const enum hrtimer_mode mode,
+					  int clock);
 extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
 
 /* Soft interrupt function to run the hrtimer queues: */
diff --git a/include/linux/poll.h b/include/linux/poll.h
index c08386fb3e08..9fb4f40d9a26 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -96,7 +96,7 @@ extern void poll_initwait(struct poll_wqueues *pwq);
 extern void poll_freewait(struct poll_wqueues *pwq);
 extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
 				 ktime_t *expires, unsigned long slack);
-extern long select_estimate_accuracy(struct timespec *tv);
+extern u64 select_estimate_accuracy(struct timespec *tv);
 
 
 static inline int poll_schedule(struct poll_wqueues *pwq, int state)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eb7f2f84009b..3284d07edec7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1792,8 +1792,8 @@ struct task_struct {
 	 * time slack values; these are used to round up poll() and
 	 * select() etc timeout values. These are in nanoseconds.
 	 */
-	unsigned long timer_slack_ns;
-	unsigned long default_timer_slack_ns;
+	u64 timer_slack_ns;
+	u64 default_timer_slack_ns;
 
 #ifdef CONFIG_KASAN
 	unsigned int kasan_depth;
diff --git a/kernel/sys.c b/kernel/sys.c
index 78947de6f969..cf8ba545c7d3 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2169,7 +2169,10 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		error = perf_event_task_enable();
 		break;
 	case PR_GET_TIMERSLACK:
-		error = current->timer_slack_ns;
+		if (current->timer_slack_ns > ULONG_MAX)
+			error = ULONG_MAX;
+		else
+			error = current->timer_slack_ns;
 		break;
 	case PR_SET_TIMERSLACK:
 		if (arg2 <= 0)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index fa909f9fd559..58a321c34cfb 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -979,7 +979,7 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
  *		relative (HRTIMER_MODE_REL)
  */
 void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
-			    unsigned long delta_ns, const enum hrtimer_mode mode)
+			    u64 delta_ns, const enum hrtimer_mode mode)
 {
 	struct hrtimer_clock_base *base, *new_base;
 	unsigned long flags;
@@ -1548,7 +1548,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 	struct restart_block *restart;
 	struct hrtimer_sleeper t;
 	int ret = 0;
-	unsigned long slack;
+	u64 slack;
 
 	slack = current->timer_slack_ns;
 	if (dl_task(current) || rt_task(current))
@@ -1724,7 +1724,7 @@ void __init hrtimers_init(void)
  * @clock:	timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
  */
 int __sched
-schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
+schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
 			       const enum hrtimer_mode mode, int clock)
 {
 	struct hrtimer_sleeper t;
@@ -1792,7 +1792,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
  *
  * Returns 0 when the timer has expired otherwise -EINTR
  */
-int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
 				     const enum hrtimer_mode mode)
 {
 	return schedule_hrtimeout_range_clock(expires, delta, mode,
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index bbc5d1114583..d1798fa0c743 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1698,10 +1698,10 @@ EXPORT_SYMBOL(msleep_interruptible);
 static void __sched do_usleep_range(unsigned long min, unsigned long max)
 {
 	ktime_t kmin;
-	unsigned long delta;
+	u64 delta;
 
 	kmin = ktime_set(0, min * NSEC_PER_USEC);
-	delta = (max - min) * NSEC_PER_USEC;
+	delta = (u64)(max - min) * NSEC_PER_USEC;
 	schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
 }
 
-- 
cgit v1.2.3


From 93e205a728e6cb8d7d11f6836e289798a1de25e2 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Thu, 17 Mar 2016 14:21:15 -0700
Subject: fix Christoph's email addresses

There are various email addresses for me throughout the kernel.  Use the
one that will always be valid.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroup-v1/cgroups.txt | 2 +-
 Documentation/cgroup-v1/cpusets.txt | 2 +-
 MAINTAINERS                         | 2 +-
 arch/ia64/include/asm/rwsem.h       | 2 +-
 include/linux/quicklist.h           | 2 +-
 mm/mmu_notifier.c                   | 2 +-
 mm/quicklist.c                      | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroup-v1/cgroups.txt b/Documentation/cgroup-v1/cgroups.txt
index c6256ae9885b..947e6fe31ef9 100644
--- a/Documentation/cgroup-v1/cgroups.txt
+++ b/Documentation/cgroup-v1/cgroups.txt
@@ -8,7 +8,7 @@ Original copyright statements from cpusets.txt:
 Portions Copyright (C) 2004 BULL SA.
 Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
 Modified by Paul Jackson <pj@sgi.com>
-Modified by Christoph Lameter <clameter@sgi.com>
+Modified by Christoph Lameter <cl@linux.com>
 
 CONTENTS:
 =========
diff --git a/Documentation/cgroup-v1/cpusets.txt b/Documentation/cgroup-v1/cpusets.txt
index fdf7dff3f607..e5cdcd445615 100644
--- a/Documentation/cgroup-v1/cpusets.txt
+++ b/Documentation/cgroup-v1/cpusets.txt
@@ -6,7 +6,7 @@ Written by Simon.Derr@bull.net
 
 Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
 Modified by Paul Jackson <pj@sgi.com>
-Modified by Christoph Lameter <clameter@sgi.com>
+Modified by Christoph Lameter <cl@linux.com>
 Modified by Paul Menage <menage@google.com>
 Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 99bd725affc6..67d34bb7335c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8473,7 +8473,7 @@ F:	include/crypto/pcrypt.h
 
 PER-CPU MEMORY ALLOCATOR
 M:	Tejun Heo <tj@kernel.org>
-M:	Christoph Lameter <cl@linux-foundation.org>
+M:	Christoph Lameter <cl@linux.com>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git
 S:	Maintained
 F:	include/linux/percpu*.h
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 3027e7516d85..ce112472bdd6 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com>
  * Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com>
- * Copyright (C) 2005 Christoph Lameter <clameter@sgi.com>
+ * Copyright (C) 2005 Christoph Lameter <cl@linux.com>
  *
  * Based on asm-i386/rwsem.h and other architecture implementation.
  *
diff --git a/include/linux/quicklist.h b/include/linux/quicklist.h
index bd466439c588..3bdfa70bc642 100644
--- a/include/linux/quicklist.h
+++ b/include/linux/quicklist.h
@@ -5,7 +5,7 @@
  * as needed after allocation when they are freed. Per cpu lists of pages
  * are kept that only contain node local pages.
  *
- * (C) 2007, SGI. Christoph Lameter <clameter@sgi.com>
+ * (C) 2007, SGI. Christoph Lameter <cl@linux.com>
  */
 #include <linux/kernel.h>
 #include <linux/gfp.h>
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 5fbdd367bbed..f4259e496f83 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -3,7 +3,7 @@
  *
  *  Copyright (C) 2008  Qumranet, Inc.
  *  Copyright (C) 2008  SGI
- *             Christoph Lameter <clameter@sgi.com>
+ *             Christoph Lameter <cl@linux.com>
  *
  *  This work is licensed under the terms of the GNU GPL, version 2. See
  *  the COPYING file in the top-level directory.
diff --git a/mm/quicklist.c b/mm/quicklist.c
index 942212970529..daf6ff6e199a 100644
--- a/mm/quicklist.c
+++ b/mm/quicklist.c
@@ -8,7 +8,7 @@
  * improved on it.
  *
  * Copyright (C) 2007 SGI,
- * 	Christoph Lameter <clameter@sgi.com>
+ * 	Christoph Lameter <cl@linux.com>
  * 		Generalized, added support for multiple lists and
  * 		constructors / destructors.
  */
-- 
cgit v1.2.3


From 26a247fd9f8d2df1965b7f22c9be2fb3a48603f3 Mon Sep 17 00:00:00 2001
From: Chen Gang <chengang@emindsoft.com.cn>
Date: Thu, 17 Mar 2016 14:21:35 -0700
Subject: include/linux/list_bl.h: use bool instead of int for boolean
 functions

hlist_bl_unhashed() and hlist_bl_empty() are all boolean functions, so
return bool instead of int.

Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list_bl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index ee7229a6c06a..cb483305e1f5 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -48,7 +48,7 @@ static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
 
 #define hlist_bl_entry(ptr, type, member) container_of(ptr,type,member)
 
-static inline int hlist_bl_unhashed(const struct hlist_bl_node *h)
+static inline bool  hlist_bl_unhashed(const struct hlist_bl_node *h)
 {
 	return !h->pprev;
 }
@@ -68,7 +68,7 @@ static inline void hlist_bl_set_first(struct hlist_bl_head *h,
 	h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK);
 }
 
-static inline int hlist_bl_empty(const struct hlist_bl_head *h)
+static inline bool hlist_bl_empty(const struct hlist_bl_head *h)
 {
 	return !((unsigned long)READ_ONCE(h->first) & ~LIST_BL_LOCKMASK);
 }
-- 
cgit v1.2.3


From f67c07f07fca95a7f330b8bb928eabaf9fcce75d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Thu, 17 Mar 2016 14:21:42 -0700
Subject: radix-tree: add an explicit include of bitops.h

The radix-tree header uses the __ffs() function, which is defined in
bitops.h.  The current kernel headers implicitly include bitops.h, but
the userspace test harness does not.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index f54be7082207..39598b9cf1d9 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -21,6 +21,7 @@
 #ifndef _LINUX_RADIX_TREE_H
 #define _LINUX_RADIX_TREE_H
 
+#include <linux/bitops.h>
 #include <linux/preempt.h>
 #include <linux/types.h>
 #include <linux/bug.h>
-- 
cgit v1.2.3


From e61452365372570253b2b1de84bab0cdb2e62c64 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Thu, 17 Mar 2016 14:21:54 -0700
Subject: radix_tree: add support for multi-order entries

With huge pages, it is convenient to have the radix tree be able to
return an entry that covers multiple indices.  Previous attempts to deal
with the problem have involved inserting N duplicate entries, which is a
waste of memory and leads to problems trying to handle aliased tags, or
probing the tree multiple times to find alternative entries which might
cover the requested index.

This approach inserts one canonical entry into the tree for a given
range of indices, and may also insert other entries in order to ensure
that lookups find the canonical entry.

This solution only tolerates inserting powers of two that are greater
than the fanout of the tree.  If we wish to expand the radix tree's
abilities to support large-ish pages that is less than the fanout at the
penultimate level of the tree, then we would need to add one more step
in lookup to ensure that any sibling nodes in the final level of the
tree are dereferenced and we return the canonical entry that they
reference.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h |  11 ++++-
 lib/radix-tree.c           | 109 ++++++++++++++++++++++++++++++++++-----------
 mm/filemap.c               |   2 +-
 3 files changed, 93 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 39598b9cf1d9..b211f145c811 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -271,8 +271,15 @@ static inline void radix_tree_replace_slot(void **pslot, void *item)
 }
 
 int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
-			struct radix_tree_node **nodep, void ***slotp);
-int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
+			unsigned order, struct radix_tree_node **nodep,
+			void ***slotp);
+int __radix_tree_insert(struct radix_tree_root *, unsigned long index,
+			unsigned order, void *);
+static inline int radix_tree_insert(struct radix_tree_root *root,
+			unsigned long index, void *entry)
+{
+	return __radix_tree_insert(root, index, 0, entry);
+}
 void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
 			  struct radix_tree_node **nodep, void ***slotp);
 void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 9bb440f317c9..d907dca302d5 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -333,7 +333,8 @@ static inline unsigned long radix_tree_maxindex(unsigned int height)
 /*
  *	Extend a radix tree so it can store key @index.
  */
-static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
+static int radix_tree_extend(struct radix_tree_root *root,
+				unsigned long index, unsigned order)
 {
 	struct radix_tree_node *node;
 	struct radix_tree_node *slot;
@@ -345,7 +346,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 	while (index > radix_tree_maxindex(height))
 		height++;
 
-	if (root->rnode == NULL) {
+	if ((root->rnode == NULL) && (order == 0)) {
 		root->height = height;
 		goto out;
 	}
@@ -386,6 +387,7 @@ out:
  *	__radix_tree_create	-	create a slot in a radix tree
  *	@root:		radix tree root
  *	@index:		index key
+ *	@order:		index occupies 2^order aligned slots
  *	@nodep:		returns node
  *	@slotp:		returns slot
  *
@@ -399,26 +401,29 @@ out:
  *	Returns -ENOMEM, or 0 for success.
  */
 int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
-			struct radix_tree_node **nodep, void ***slotp)
+			unsigned order, struct radix_tree_node **nodep,
+			void ***slotp)
 {
 	struct radix_tree_node *node = NULL, *slot;
 	unsigned int height, shift, offset;
 	int error;
 
+	BUG_ON((0 < order) && (order < RADIX_TREE_MAP_SHIFT));
+
 	/* Make sure the tree is high enough.  */
 	if (index > radix_tree_maxindex(root->height)) {
-		error = radix_tree_extend(root, index);
+		error = radix_tree_extend(root, index, order);
 		if (error)
 			return error;
 	}
 
-	slot = indirect_to_ptr(root->rnode);
+	slot = root->rnode;
 
 	height = root->height;
 	shift = height * RADIX_TREE_MAP_SHIFT;
 
 	offset = 0;			/* uninitialised var warning */
-	while (shift > 0) {
+	while (shift > order) {
 		if (slot == NULL) {
 			/* Have to add a child node.  */
 			if (!(slot = radix_tree_node_alloc(root)))
@@ -433,15 +438,31 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 			} else
 				rcu_assign_pointer(root->rnode,
 							ptr_to_indirect(slot));
-		}
+		} else if (!radix_tree_is_indirect_ptr(slot))
+			break;
 
 		/* Go a level down */
+		height--;
 		shift -= RADIX_TREE_MAP_SHIFT;
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-		node = slot;
+		node = indirect_to_ptr(slot);
 		slot = node->slots[offset];
-		slot = indirect_to_ptr(slot);
-		height--;
+	}
+
+	/* Insert pointers to the canonical entry */
+	if ((shift - order) > 0) {
+		int i, n = 1 << (shift - order);
+		offset = offset & ~(n - 1);
+		slot = ptr_to_indirect(&node->slots[offset]);
+		for (i = 0; i < n; i++) {
+			if (node->slots[offset + i])
+				return -EEXIST;
+		}
+
+		for (i = 1; i < n; i++) {
+			rcu_assign_pointer(node->slots[offset + i], slot);
+			node->count++;
+		}
 	}
 
 	if (nodep)
@@ -452,15 +473,16 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 }
 
 /**
- *	radix_tree_insert    -    insert into a radix tree
+ *	__radix_tree_insert    -    insert into a radix tree
  *	@root:		radix tree root
  *	@index:		index key
+ *	@order:		key covers the 2^order indices around index
  *	@item:		item to insert
  *
  *	Insert an item into the radix tree at position @index.
  */
-int radix_tree_insert(struct radix_tree_root *root,
-			unsigned long index, void *item)
+int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
+			unsigned order, void *item)
 {
 	struct radix_tree_node *node;
 	void **slot;
@@ -468,7 +490,7 @@ int radix_tree_insert(struct radix_tree_root *root,
 
 	BUG_ON(radix_tree_is_indirect_ptr(item));
 
-	error = __radix_tree_create(root, index, &node, &slot);
+	error = __radix_tree_create(root, index, order, &node, &slot);
 	if (error)
 		return error;
 	if (*slot != NULL)
@@ -486,7 +508,7 @@ int radix_tree_insert(struct radix_tree_root *root,
 
 	return 0;
 }
-EXPORT_SYMBOL(radix_tree_insert);
+EXPORT_SYMBOL(__radix_tree_insert);
 
 /**
  *	__radix_tree_lookup	-	lookup an item in a radix tree
@@ -537,6 +559,8 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
 		node = rcu_dereference_raw(*slot);
 		if (node == NULL)
 			return NULL;
+		if (!radix_tree_is_indirect_ptr(node))
+			break;
 		node = indirect_to_ptr(node);
 
 		shift -= RADIX_TREE_MAP_SHIFT;
@@ -624,6 +648,8 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
 			tag_set(slot, tag, offset);
 		slot = slot->slots[offset];
 		BUG_ON(slot == NULL);
+		if (!radix_tree_is_indirect_ptr(slot))
+			break;
 		slot = indirect_to_ptr(slot);
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
@@ -669,6 +695,8 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
 	while (shift) {
 		if (slot == NULL)
 			goto out;
+		if (!radix_tree_is_indirect_ptr(slot))
+			break;
 		slot = indirect_to_ptr(slot);
 
 		shift -= RADIX_TREE_MAP_SHIFT;
@@ -753,6 +781,8 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 		if (height == 1)
 			return 1;
 		node = rcu_dereference_raw(node->slots[offset]);
+		if (!radix_tree_is_indirect_ptr(node))
+			return 1;
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
 	}
@@ -813,6 +843,7 @@ restart:
 
 	node = rnode;
 	while (1) {
+		struct radix_tree_node *slot;
 		if ((flags & RADIX_TREE_ITER_TAGGED) ?
 				!test_bit(offset, node->tags[tag]) :
 				!node->slots[offset]) {
@@ -843,10 +874,12 @@ restart:
 		if (!shift)
 			break;
 
-		node = rcu_dereference_raw(node->slots[offset]);
-		if (node == NULL)
+		slot = rcu_dereference_raw(node->slots[offset]);
+		if (slot == NULL)
 			goto restart;
-		node = indirect_to_ptr(node);
+		if (!radix_tree_is_indirect_ptr(slot))
+			break;
+		node = indirect_to_ptr(slot);
 		shift -= RADIX_TREE_MAP_SHIFT;
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 	}
@@ -944,16 +977,20 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 		if (!tag_get(slot, iftag, offset))
 			goto next;
 		if (shift) {
-			/* Go down one level */
-			shift -= RADIX_TREE_MAP_SHIFT;
 			node = slot;
 			slot = slot->slots[offset];
-			slot = indirect_to_ptr(slot);
-			continue;
+			if (radix_tree_is_indirect_ptr(slot)) {
+				slot = indirect_to_ptr(slot);
+				shift -= RADIX_TREE_MAP_SHIFT;
+				continue;
+			} else {
+				slot = node;
+				node = node->parent;
+			}
 		}
 
 		/* tag the leaf */
-		tagged++;
+		tagged += 1 << shift;
 		tag_set(slot, settag, offset);
 
 		/* walk back up the path tagging interior nodes */
@@ -1201,11 +1238,20 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item,
 				goto out;
 		}
 
-		shift -= RADIX_TREE_MAP_SHIFT;
 		slot = rcu_dereference_raw(slot->slots[i]);
 		if (slot == NULL)
 			goto out;
+		if (!radix_tree_is_indirect_ptr(slot)) {
+			if (slot == item) {
+				*found_index = index + i;
+				index = 0;
+			} else {
+				index += shift;
+			}
+			goto out;
+		}
 		slot = indirect_to_ptr(slot);
+		shift -= RADIX_TREE_MAP_SHIFT;
 	}
 
 	/* Bottom level: check items */
@@ -1285,7 +1331,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
 
 		/*
 		 * The candidate node has more than one child, or its child
-		 * is not at the leftmost slot, we cannot shrink.
+		 * is not at the leftmost slot, or it is a multiorder entry,
+		 * we cannot shrink.
 		 */
 		if (to_free->count != 1)
 			break;
@@ -1301,6 +1348,9 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
 		 * one (root->rnode) as far as dependent read barriers go.
 		 */
 		if (root->height > 1) {
+			if (!radix_tree_is_indirect_ptr(slot))
+				break;
+
 			slot = indirect_to_ptr(slot);
 			slot->parent = NULL;
 			slot = ptr_to_indirect(slot);
@@ -1399,7 +1449,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
 			     unsigned long index, void *item)
 {
 	struct radix_tree_node *node;
-	unsigned int offset;
+	unsigned int offset, i;
 	void **slot;
 	void *entry;
 	int tag;
@@ -1428,6 +1478,13 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
 			radix_tree_tag_clear(root, index, tag);
 	}
 
+	/* Delete any sibling slots pointing to this slot */
+	for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
+		if (node->slots[offset + i] != ptr_to_indirect(slot))
+			break;
+		node->slots[offset + i] = NULL;
+		node->count--;
+	}
 	node->slots[offset] = NULL;
 	node->count--;
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 61b441b191ad..084ad0fe73c7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -586,7 +586,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 	void **slot;
 	int error;
 
-	error = __radix_tree_create(&mapping->page_tree, page->index,
+	error = __radix_tree_create(&mapping->page_tree, page->index, 0,
 				    &node, &slot);
 	if (error)
 		return error;
-- 
cgit v1.2.3


From 7165092fe5ca70bae722ac6cd78421cfd0eec18d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Thu, 17 Mar 2016 14:22:06 -0700
Subject: radix-tree,shmem: introduce radix_tree_iter_next()

shmem likes to occasionally drop the lock, schedule, then reacqire the
lock and continue with the iteration from the last place it left off.
This is currently done with a pretty ugly goto.  Introduce
radix_tree_iter_next() and use it throughout shmem.c.

[koct9i@gmail.com: fix bug in radix_tree_iter_next() for tagged iteration]
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 16 ++++++++++++++++
 mm/shmem.c                 | 12 +++---------
 2 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index b211f145c811..51a97ac8bfbf 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -402,6 +402,22 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter)
 	return NULL;
 }
 
+/**
+ * radix_tree_iter_next - resume iterating when the chunk may be invalid
+ * @iter:	iterator state
+ *
+ * If the iterator needs to release then reacquire a lock, the chunk may
+ * have been invalidated by an insertion or deletion.  Call this function
+ * to continue the iteration from the next index.
+ */
+static inline __must_check
+void **radix_tree_iter_next(struct radix_tree_iter *iter)
+{
+	iter->next_index = iter->index + 1;
+	iter->tags = 0;
+	return NULL;
+}
+
 /**
  * radix_tree_chunk_size - get current chunk size
  *
diff --git a/mm/shmem.c b/mm/shmem.c
index 91c0dadf48d3..9428c51ab2d6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -376,7 +376,6 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
 
 	rcu_read_lock();
 
-restart:
 	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
 		if (iter.index >= end)
 			break;
@@ -393,8 +392,7 @@ restart:
 
 		if (need_resched()) {
 			cond_resched_rcu();
-			start = iter.index + 1;
-			goto restart;
+			slot = radix_tree_iter_next(&iter);
 		}
 	}
 
@@ -1944,7 +1942,6 @@ static void shmem_tag_pins(struct address_space *mapping)
 	start = 0;
 	rcu_read_lock();
 
-restart:
 	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
 		page = radix_tree_deref_slot(slot);
 		if (!page || radix_tree_exception(page)) {
@@ -1961,8 +1958,7 @@ restart:
 
 		if (need_resched()) {
 			cond_resched_rcu();
-			start = iter.index + 1;
-			goto restart;
+			slot = radix_tree_iter_next(&iter);
 		}
 	}
 	rcu_read_unlock();
@@ -1999,7 +1995,6 @@ static int shmem_wait_for_pins(struct address_space *mapping)
 
 		start = 0;
 		rcu_read_lock();
-restart:
 		radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
 					   start, SHMEM_TAG_PINNED) {
 
@@ -2033,8 +2028,7 @@ restart:
 continue_resched:
 			if (need_resched()) {
 				cond_resched_rcu();
-				start = iter.index + 1;
-				goto restart;
+				slot = radix_tree_iter_next(&iter);
 			}
 		}
 		rcu_read_unlock();
-- 
cgit v1.2.3


From 56b060814e2d87d6646a85a2f4609c73587399ca Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 17 Mar 2016 14:22:14 -0700
Subject: lib/string: introduce match_string() helper

Occasionally we have to search for an occurrence of a string in an array
of strings.  Make a simple helper for that purpose.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: David Airlie <airlied@linux.ie>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string.h |  2 ++
 lib/string.c           | 26 ++++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index 9eebc66d957a..0f235e80d355 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -130,6 +130,8 @@ extern void argv_free(char **argv);
 extern bool sysfs_streq(const char *s1, const char *s2);
 extern int strtobool(const char *s, bool *res);
 
+int match_string(const char * const *array, size_t n, const char *string);
+
 #ifdef CONFIG_BINARY_PRINTF
 int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
 int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf);
diff --git a/lib/string.c b/lib/string.c
index 0323c0d5629a..e9c9db161d2c 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -630,6 +630,32 @@ bool sysfs_streq(const char *s1, const char *s2)
 }
 EXPORT_SYMBOL(sysfs_streq);
 
+/**
+ * match_string - matches given string in an array
+ * @array:	array of strings
+ * @n:		number of strings in the array or -1 for NULL terminated arrays
+ * @string:	string to match with
+ *
+ * Return:
+ * index of a @string in the @array if matches, or %-EINVAL otherwise.
+ */
+int match_string(const char * const *array, size_t n, const char *string)
+{
+	int index;
+	const char *item;
+
+	for (index = 0; index < n; index++) {
+		item = array[index];
+		if (!item)
+			break;
+		if (!strcmp(item, string))
+			return index;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(match_string);
+
 /**
  * strtobool - convert common user inputs into boolean values
  * @s: input string
-- 
cgit v1.2.3


From e3bde9568d992c5f985e6e30731a5f9f9bef7b13 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Thu, 17 Mar 2016 14:22:47 -0700
Subject: include/linux/unaligned: force inlining of byteswap operations

Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See

    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122

With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:

<get_unaligned_be16> (24 copies, 108 calls):
       66 8b 07                mov    (%rdi),%ax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       86 e0                   xchg   %ah,%al
       5d                      pop    %rbp
       c3                      retq

<get_unaligned_be32> (25 copies, 181 calls):
       8b 07                   mov    (%rdi),%eax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       0f c8                   bswap  %eax
       5d                      pop    %rbp
       c3                      retq

<get_unaligned_be64> (23 copies, 94 calls):
       48 8b 07                mov    (%rdi),%rax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       48 0f c8                bswap  %rax
       5d                      pop    %rbp
       c3                      retq

<put_unaligned_be16> (2 copies, 11 calls):
       89 f8                   mov    %edi,%eax
       55                      push   %rbp
       c1 ef 08                shr    $0x8,%edi
       c1 e0 08                shl    $0x8,%eax
       09 c7                   or     %eax,%edi
       48 89 e5                mov    %rsp,%rbp
       66 89 3e                mov    %di,(%rsi)

<put_unaligned_be32> (8 copies, 43 calls):
       55                      push   %rbp
       0f cf                   bswap  %edi
       89 3e                   mov    %edi,(%rsi)
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq

<put_unaligned_be64> (26 copies, 157 calls):
       55                      push   %rbp
       48 0f cf                bswap  %rdi
       48 89 3e                mov    %rdi,(%rsi)
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq

This patch fixes this via s/inline/__always_inline/.

It only affects arches with efficient unaligned access insns, such as x86.
(arched which lack such ops do not include linux/unaligned/access_ok.h)

Code size decrease after the patch is ~8.5k:

    text     data      bss       dec     hex filename
92197848 20826112 36417536 149441496 8e84bd8 vmlinux
92189231 20826144 36417536 149432911 8e82a4f vmlinux6_unaligned_be_after

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/unaligned/access_ok.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/unaligned/access_ok.h b/include/linux/unaligned/access_ok.h
index 99c1b4d20b0f..33383ca23837 100644
--- a/include/linux/unaligned/access_ok.h
+++ b/include/linux/unaligned/access_ok.h
@@ -4,62 +4,62 @@
 #include <linux/kernel.h>
 #include <asm/byteorder.h>
 
-static inline u16 get_unaligned_le16(const void *p)
+static __always_inline u16 get_unaligned_le16(const void *p)
 {
 	return le16_to_cpup((__le16 *)p);
 }
 
-static inline u32 get_unaligned_le32(const void *p)
+static __always_inline u32 get_unaligned_le32(const void *p)
 {
 	return le32_to_cpup((__le32 *)p);
 }
 
-static inline u64 get_unaligned_le64(const void *p)
+static __always_inline u64 get_unaligned_le64(const void *p)
 {
 	return le64_to_cpup((__le64 *)p);
 }
 
-static inline u16 get_unaligned_be16(const void *p)
+static __always_inline u16 get_unaligned_be16(const void *p)
 {
 	return be16_to_cpup((__be16 *)p);
 }
 
-static inline u32 get_unaligned_be32(const void *p)
+static __always_inline u32 get_unaligned_be32(const void *p)
 {
 	return be32_to_cpup((__be32 *)p);
 }
 
-static inline u64 get_unaligned_be64(const void *p)
+static __always_inline u64 get_unaligned_be64(const void *p)
 {
 	return be64_to_cpup((__be64 *)p);
 }
 
-static inline void put_unaligned_le16(u16 val, void *p)
+static __always_inline void put_unaligned_le16(u16 val, void *p)
 {
 	*((__le16 *)p) = cpu_to_le16(val);
 }
 
-static inline void put_unaligned_le32(u32 val, void *p)
+static __always_inline void put_unaligned_le32(u32 val, void *p)
 {
 	*((__le32 *)p) = cpu_to_le32(val);
 }
 
-static inline void put_unaligned_le64(u64 val, void *p)
+static __always_inline void put_unaligned_le64(u64 val, void *p)
 {
 	*((__le64 *)p) = cpu_to_le64(val);
 }
 
-static inline void put_unaligned_be16(u16 val, void *p)
+static __always_inline void put_unaligned_be16(u16 val, void *p)
 {
 	*((__be16 *)p) = cpu_to_be16(val);
 }
 
-static inline void put_unaligned_be32(u32 val, void *p)
+static __always_inline void put_unaligned_be32(u32 val, void *p)
 {
 	*((__be32 *)p) = cpu_to_be32(val);
 }
 
-static inline void put_unaligned_be64(u64 val, void *p)
+static __always_inline void put_unaligned_be64(u64 val, void *p)
 {
 	*((__be64 *)p) = cpu_to_be64(val);
 }
-- 
cgit v1.2.3


From ef951599074ba4fad2d0efa0a977129b41e6d203 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Mar 2016 14:22:50 -0700
Subject: lib: move strtobool() to kstrtobool()

Create the kstrtobool_from_user() helper and move strtobool() logic into
the new kstrtobool() (matching all the other kstrto* functions).
Provides an inline wrapper for existing strtobool() callers.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Joe Perches <joe@perches.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Amitkumar Karwar <akarwar@marvell.com>
Cc: Nishant Sarmukadam <nishants@marvell.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Steve French <sfrench@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h |  2 ++
 include/linux/string.h |  6 +++++-
 lib/kstrtox.c          | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/string.c           | 29 -----------------------------
 4 files changed, 57 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f31638c6e873..f4fa2b29c38c 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -357,6 +357,7 @@ int __must_check kstrtou16(const char *s, unsigned int base, u16 *res);
 int __must_check kstrtos16(const char *s, unsigned int base, s16 *res);
 int __must_check kstrtou8(const char *s, unsigned int base, u8 *res);
 int __must_check kstrtos8(const char *s, unsigned int base, s8 *res);
+int __must_check kstrtobool(const char *s, bool *res);
 
 int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res);
 int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res);
@@ -368,6 +369,7 @@ int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigne
 int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res);
 int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res);
 int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res);
+int __must_check kstrtobool_from_user(const char __user *s, size_t count, bool *res);
 
 static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res)
 {
diff --git a/include/linux/string.h b/include/linux/string.h
index 0f235e80d355..d3993a79a325 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -128,7 +128,11 @@ extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
 extern void argv_free(char **argv);
 
 extern bool sysfs_streq(const char *s1, const char *s2);
-extern int strtobool(const char *s, bool *res);
+extern int kstrtobool(const char *s, bool *res);
+static inline int strtobool(const char *s, bool *res)
+{
+	return kstrtobool(s, res);
+}
 
 int match_string(const char * const *array, size_t n, const char *string);
 
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index 94be244e8441..e8ba4a013e82 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -321,6 +321,56 @@ int kstrtos8(const char *s, unsigned int base, s8 *res)
 }
 EXPORT_SYMBOL(kstrtos8);
 
+/**
+ * kstrtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
+ * Otherwise it will return -EINVAL.  Value pointed to by res is
+ * updated upon finding a match.
+ */
+int kstrtobool(const char *s, bool *res)
+{
+	if (!s)
+		return -EINVAL;
+
+	switch (s[0]) {
+	case 'y':
+	case 'Y':
+	case '1':
+		*res = true;
+		return 0;
+	case 'n':
+	case 'N':
+	case '0':
+		*res = false;
+		return 0;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(kstrtobool);
+
+/*
+ * Since "base" would be a nonsense argument, this open-codes the
+ * _from_user helper instead of using the helper macro below.
+ */
+int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
+{
+	/* Longest string needed to differentiate, newline, terminator */
+	char buf[4];
+
+	count = min(count, sizeof(buf) - 1);
+	if (copy_from_user(buf, s, count))
+		return -EFAULT;
+	buf[count] = '\0';
+	return kstrtobool(buf, res);
+}
+EXPORT_SYMBOL(kstrtobool_from_user);
+
 #define kstrto_from_user(f, g, type)					\
 int f(const char __user *s, size_t count, unsigned int base, type *res)	\
 {									\
diff --git a/lib/string.c b/lib/string.c
index e9c9db161d2c..ed83562a53ae 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -656,35 +656,6 @@ int match_string(const char * const *array, size_t n, const char *string)
 }
 EXPORT_SYMBOL(match_string);
 
-/**
- * strtobool - convert common user inputs into boolean values
- * @s: input string
- * @res: result
- *
- * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
- * Otherwise it will return -EINVAL.  Value pointed to by res is
- * updated upon finding a match.
- */
-int strtobool(const char *s, bool *res)
-{
-	switch (s[0]) {
-	case 'y':
-	case 'Y':
-	case '1':
-		*res = true;
-		break;
-	case 'n':
-	case 'N':
-	case '0':
-		*res = false;
-		break;
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(strtobool);
-
 #ifndef __HAVE_ARCH_MEMSET
 /**
  * memset - Fill a region of memory with the given value
-- 
cgit v1.2.3


From 4cc7ecb7f2a60e8deb783b8fbf7c1ae467acb920 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Mar 2016 14:23:00 -0700
Subject: param: convert some "on"/"off" users to strtobool

This changes several users of manual "on"/"off" parsing to use
strtobool.

Some side-effects:
- these uses will now parse y/n/1/0 meaningfully too
- the early_param uses will now bubble up parse errors

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Amitkumar Karwar <akarwar@marvell.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Joe Perches <joe@perches.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Nishant Sarmukadam <nishants@marvell.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Steve French <sfrench@samba.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/rtasd.c                  |  9 ++-------
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 10 ++--------
 arch/s390/kernel/time.c                      |  8 ++------
 arch/s390/kernel/topology.c                  |  7 ++-----
 arch/x86/kernel/aperture_64.c                | 12 ++----------
 include/linux/tick.h                         |  2 +-
 kernel/time/hrtimer.c                        | 10 ++--------
 kernel/time/tick-sched.c                     | 10 ++--------
 8 files changed, 15 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 5a2c049c1c61..aa610ce8742f 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -49,7 +49,7 @@ static unsigned int rtas_error_log_buffer_max;
 static unsigned int event_scan;
 static unsigned int rtas_event_scan_rate;
 
-static int full_rtas_msgs = 0;
+static bool full_rtas_msgs;
 
 /* Stop logging to nvram after first fatal error */
 static int logging_enabled; /* Until we initialize everything,
@@ -592,11 +592,6 @@ __setup("surveillance=", surveillance_setup);
 
 static int __init rtasmsgs_setup(char *str)
 {
-	if (strcmp(str, "on") == 0)
-		full_rtas_msgs = 1;
-	else if (strcmp(str, "off") == 0)
-		full_rtas_msgs = 0;
-
-	return 1;
+	return (kstrtobool(str, &full_rtas_msgs) == 0);
 }
 __setup("rtasmsgs=", rtasmsgs_setup);
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 32274f72fe3f..282837a1d74b 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -47,20 +47,14 @@ static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
 
 static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
 
-static int cede_offline_enabled __read_mostly = 1;
+static bool cede_offline_enabled __read_mostly = true;
 
 /*
  * Enable/disable cede_offline when available.
  */
 static int __init setup_cede_offline(char *str)
 {
-	if (!strcmp(str, "off"))
-		cede_offline_enabled = 0;
-	else if (!strcmp(str, "on"))
-		cede_offline_enabled = 1;
-	else
-		return 0;
-	return 1;
+	return (kstrtobool(str, &cede_offline_enabled) == 0);
 }
 
 __setup("cede_offline=", setup_cede_offline);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index c4e5f183f225..9409d32f285e 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -1432,7 +1432,7 @@ device_initcall(etr_init_sysfs);
 /*
  * Server Time Protocol (STP) code.
  */
-static int stp_online;
+static bool stp_online;
 static struct stp_sstpi stp_info;
 static void *stp_page;
 
@@ -1443,11 +1443,7 @@ static struct timer_list stp_timer;
 
 static int __init early_parse_stp(char *p)
 {
-	if (strncmp(p, "off", 3) == 0)
-		stp_online = 0;
-	else if (strncmp(p, "on", 2) == 0)
-		stp_online = 1;
-	return 0;
+	return kstrtobool(p, &stp_online);
 }
 early_param("stp", early_parse_stp);
 
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 40b8102fdadb..64298a867589 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -37,7 +37,7 @@ static void set_topology_timer(void);
 static void topology_work_fn(struct work_struct *work);
 static struct sysinfo_15_1_x *tl_info;
 
-static int topology_enabled = 1;
+static bool topology_enabled = true;
 static DECLARE_WORK(topology_work, topology_work_fn);
 
 /*
@@ -444,10 +444,7 @@ static const struct cpumask *cpu_book_mask(int cpu)
 
 static int __init early_parse_topology(char *p)
 {
-	if (strncmp(p, "off", 3))
-		return 0;
-	topology_enabled = 0;
-	return 0;
+	return kstrtobool(p, &topology_enabled);
 }
 early_param("topology", early_parse_topology);
 
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 6e85f713641d..0a2bb1f62e72 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -227,19 +227,11 @@ static u32 __init search_agp_bridge(u32 *order, int *valid_agp)
 	return 0;
 }
 
-static int gart_fix_e820 __initdata = 1;
+static bool gart_fix_e820 __initdata = true;
 
 static int __init parse_gart_mem(char *p)
 {
-	if (!p)
-		return -EINVAL;
-
-	if (!strncmp(p, "off", 3))
-		gart_fix_e820 = 0;
-	else if (!strncmp(p, "on", 2))
-		gart_fix_e820 = 1;
-
-	return 0;
+	return kstrtobool(p, &gart_fix_e820);
 }
 early_param("gart_fix_e820", parse_gart_mem);
 
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 21f73649a4dc..62be0786d6d0 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -111,7 +111,7 @@ enum tick_dep_bits {
 #define TICK_DEP_MASK_CLOCK_UNSTABLE	(1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
 
 #ifdef CONFIG_NO_HZ_COMMON
-extern int tick_nohz_enabled;
+extern bool tick_nohz_enabled;
 extern int tick_nohz_tick_stopped(void);
 extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 58a321c34cfb..fa0b983290cf 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -515,7 +515,7 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
 /*
  * High resolution timer enabled ?
  */
-static int hrtimer_hres_enabled __read_mostly  = 1;
+static bool hrtimer_hres_enabled __read_mostly  = true;
 unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC;
 EXPORT_SYMBOL_GPL(hrtimer_resolution);
 
@@ -524,13 +524,7 @@ EXPORT_SYMBOL_GPL(hrtimer_resolution);
  */
 static int __init setup_hrtimer_hres(char *str)
 {
-	if (!strcmp(str, "off"))
-		hrtimer_hres_enabled = 0;
-	else if (!strcmp(str, "on"))
-		hrtimer_hres_enabled = 1;
-	else
-		return 0;
-	return 1;
+	return (kstrtobool(str, &hrtimer_hres_enabled) == 0);
 }
 
 __setup("highres=", setup_hrtimer_hres);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 969e6704c3c9..195fe7d2caad 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -486,20 +486,14 @@ void __init tick_nohz_init(void)
 /*
  * NO HZ enabled ?
  */
-int tick_nohz_enabled __read_mostly = 1;
+bool tick_nohz_enabled __read_mostly  = true;
 unsigned long tick_nohz_active  __read_mostly;
 /*
  * Enable / Disable tickless mode
  */
 static int __init setup_tick_nohz(char *str)
 {
-	if (!strcmp(str, "off"))
-		tick_nohz_enabled = 0;
-	else if (!strcmp(str, "on"))
-		tick_nohz_enabled = 1;
-	else
-		return 0;
-	return 1;
+	return (kstrtobool(str, &tick_nohz_enabled) == 0);
 }
 
 __setup("nohz=", setup_tick_nohz);
-- 
cgit v1.2.3


From 0b81d0779072696371822e5ed9e7c6292e547024 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 15 May 2015 16:26:10 -0700
Subject: fs crypto: move per-file encryption from f2fs tree to fs/crypto

This patch adds the renamed functions moved from the f2fs crypto files.

1. definitions for per-file encryption used by ext4 and f2fs.

2. crypto.c for encrypt/decrypt functions
 a. IO preparation:
  - fscrypt_get_ctx / fscrypt_release_ctx
 b. before IOs:
  - fscrypt_encrypt_page
  - fscrypt_decrypt_page
  - fscrypt_zeroout_range
 c. after IOs:
  - fscrypt_decrypt_bio_pages
  - fscrypt_pullback_bio_page
  - fscrypt_restore_control_page

3. policy.c supporting context management.
 a. For ioctls:
  - fscrypt_process_policy
  - fscrypt_get_policy
 b. For context permission
  - fscrypt_has_permitted_context
  - fscrypt_inherit_context

4. keyinfo.c to handle permissions
  - fscrypt_get_encryption_info
  - fscrypt_free_encryption_info

5. fname.c to support filename encryption
 a. general wrapper functions
  - fscrypt_fname_disk_to_usr
  - fscrypt_fname_usr_to_disk
  - fscrypt_setup_filename
  - fscrypt_free_filename

 b. specific filename handling functions
  - fscrypt_fname_alloc_buffer
  - fscrypt_fname_free_buffer

6. Makefile and Kconfig

Cc: Al Viro <viro@ftp.linux.org.uk>
Signed-off-by: Michael Halcrow <mhalcrow@google.com>
Signed-off-by: Ildar Muslukhov <ildarm@google.com>
Signed-off-by: Uday Savagaonkar <savagaon@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/Kconfig               |   2 +
 fs/Makefile              |   1 +
 fs/crypto/Kconfig        |  18 ++
 fs/crypto/Makefile       |   3 +
 fs/crypto/crypto.c       | 556 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/crypto/fname.c        | 427 ++++++++++++++++++++++++++++++++++++
 fs/crypto/keyinfo.c      | 278 ++++++++++++++++++++++++
 fs/crypto/policy.c       | 229 +++++++++++++++++++
 fs/f2fs/Kconfig          |  10 +-
 fs/f2fs/Makefile         |   2 -
 fs/f2fs/crypto.c         | 473 ----------------------------------------
 fs/f2fs/crypto_fname.c   | 446 -------------------------------------
 fs/f2fs/crypto_key.c     | 267 -----------------------
 fs/f2fs/crypto_policy.c  | 210 ------------------
 fs/f2fs/data.c           |  31 ++-
 fs/f2fs/dir.c            |  44 ++--
 fs/f2fs/f2fs.h           | 172 +++------------
 fs/f2fs/f2fs_crypto.h    | 151 -------------
 fs/f2fs/file.c           |  38 ++--
 fs/f2fs/inline.c         |   4 +-
 fs/f2fs/inode.c          |   5 +-
 fs/f2fs/namei.c          |  56 +++--
 fs/f2fs/super.c          |  55 +++--
 include/linux/dcache.h   |   2 +
 include/linux/fs.h       |   8 +
 include/linux/fscrypto.h | 433 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/fs.h  |  18 ++
 27 files changed, 2125 insertions(+), 1814 deletions(-)
 create mode 100644 fs/crypto/Kconfig
 create mode 100644 fs/crypto/Makefile
 create mode 100644 fs/crypto/crypto.c
 create mode 100644 fs/crypto/fname.c
 create mode 100644 fs/crypto/keyinfo.c
 create mode 100644 fs/crypto/policy.c
 delete mode 100644 fs/f2fs/crypto.c
 delete mode 100644 fs/f2fs/crypto_fname.c
 delete mode 100644 fs/f2fs/crypto_key.c
 delete mode 100644 fs/f2fs/crypto_policy.c
 delete mode 100644 fs/f2fs/f2fs_crypto.h
 create mode 100644 include/linux/fscrypto.h

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index 9adee0d7536e..9d757673bf40 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -84,6 +84,8 @@ config MANDATORY_FILE_LOCKING
 
 	  To the best of my knowledge this is dead code that no one cares about.
 
+source "fs/crypto/Kconfig"
+
 source "fs/notify/Kconfig"
 
 source "fs/quota/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 79f522575cba..252c96898a43 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_EVENTFD)		+= eventfd.o
 obj-$(CONFIG_USERFAULTFD)	+= userfaultfd.o
 obj-$(CONFIG_AIO)               += aio.o
 obj-$(CONFIG_FS_DAX)		+= dax.o
+obj-$(CONFIG_FS_ENCRYPTION)	+= crypto/
 obj-$(CONFIG_FILE_LOCKING)      += locks.o
 obj-$(CONFIG_COMPAT)		+= compat.o compat_ioctl.o
 obj-$(CONFIG_BINFMT_AOUT)	+= binfmt_aout.o
diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig
new file mode 100644
index 000000000000..92348faf9865
--- /dev/null
+++ b/fs/crypto/Kconfig
@@ -0,0 +1,18 @@
+config FS_ENCRYPTION
+	tristate "FS Encryption (Per-file encryption)"
+	depends on BLOCK
+	select CRYPTO
+	select CRYPTO_AES
+	select CRYPTO_CBC
+	select CRYPTO_ECB
+	select CRYPTO_XTS
+	select CRYPTO_CTS
+	select CRYPTO_CTR
+	select CRYPTO_SHA256
+	select KEYS
+	select ENCRYPTED_KEYS
+	help
+	  Enable encryption of files and directories.  This
+	  feature is similar to ecryptfs, but it is more memory
+	  efficient since it avoids caching the encrypted and
+	  decrypted pages in the page cache.
diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile
new file mode 100644
index 000000000000..f17684c48739
--- /dev/null
+++ b/fs/crypto/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_FS_ENCRYPTION)	+= fscrypto.o
+
+fscrypto-y := crypto.o fname.o policy.o keyinfo.o
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
new file mode 100644
index 000000000000..d45c33157e2b
--- /dev/null
+++ b/fs/crypto/crypto.c
@@ -0,0 +1,556 @@
+/*
+ * This contains encryption functions for per-file encryption.
+ *
+ * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, Motorola Mobility
+ *
+ * Written by Michael Halcrow, 2014.
+ *
+ * Filename encryption additions
+ *	Uday Savagaonkar, 2014
+ * Encryption policy handling additions
+ *	Ildar Muslukhov, 2014
+ * Add fscrypt_pullback_bio_page()
+ *	Jaegeuk Kim, 2015.
+ *
+ * This has not yet undergone a rigorous security audit.
+ *
+ * The usage of AES-XTS should conform to recommendations in NIST
+ * Special Publication 800-38E and IEEE P1619/D16.
+ */
+
+#include <linux/crypto.h>
+#include <linux/ecryptfs.h>
+#include <linux/pagemap.h>
+#include <linux/mempool.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/ratelimit.h>
+#include <linux/bio.h>
+#include <linux/dcache.h>
+#include <linux/fscrypto.h>
+
+static unsigned int num_prealloc_crypto_pages = 32;
+static unsigned int num_prealloc_crypto_ctxs = 128;
+
+module_param(num_prealloc_crypto_pages, uint, 0444);
+MODULE_PARM_DESC(num_prealloc_crypto_pages,
+		"Number of crypto pages to preallocate");
+module_param(num_prealloc_crypto_ctxs, uint, 0444);
+MODULE_PARM_DESC(num_prealloc_crypto_ctxs,
+		"Number of crypto contexts to preallocate");
+
+static mempool_t *fscrypt_bounce_page_pool = NULL;
+
+static LIST_HEAD(fscrypt_free_ctxs);
+static DEFINE_SPINLOCK(fscrypt_ctx_lock);
+
+static struct workqueue_struct *fscrypt_read_workqueue;
+static DEFINE_MUTEX(fscrypt_init_mutex);
+
+static struct kmem_cache *fscrypt_ctx_cachep;
+struct kmem_cache *fscrypt_info_cachep;
+
+/**
+ * fscrypt_release_ctx() - Releases an encryption context
+ * @ctx: The encryption context to release.
+ *
+ * If the encryption context was allocated from the pre-allocated pool, returns
+ * it to that pool. Else, frees it.
+ *
+ * If there's a bounce page in the context, this frees that.
+ */
+void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
+{
+	unsigned long flags;
+
+	if (ctx->flags & FS_WRITE_PATH_FL && ctx->w.bounce_page) {
+		mempool_free(ctx->w.bounce_page, fscrypt_bounce_page_pool);
+		ctx->w.bounce_page = NULL;
+	}
+	ctx->w.control_page = NULL;
+	if (ctx->flags & FS_CTX_REQUIRES_FREE_ENCRYPT_FL) {
+		kmem_cache_free(fscrypt_ctx_cachep, ctx);
+	} else {
+		spin_lock_irqsave(&fscrypt_ctx_lock, flags);
+		list_add(&ctx->free_list, &fscrypt_free_ctxs);
+		spin_unlock_irqrestore(&fscrypt_ctx_lock, flags);
+	}
+}
+EXPORT_SYMBOL(fscrypt_release_ctx);
+
+/**
+ * fscrypt_get_ctx() - Gets an encryption context
+ * @inode:       The inode for which we are doing the crypto
+ *
+ * Allocates and initializes an encryption context.
+ *
+ * Return: An allocated and initialized encryption context on success; error
+ * value or NULL otherwise.
+ */
+struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode)
+{
+	struct fscrypt_ctx *ctx = NULL;
+	struct fscrypt_info *ci = inode->i_crypt_info;
+	unsigned long flags;
+
+	if (ci == NULL)
+		return ERR_PTR(-ENOKEY);
+
+	/*
+	 * We first try getting the ctx from a free list because in
+	 * the common case the ctx will have an allocated and
+	 * initialized crypto tfm, so it's probably a worthwhile
+	 * optimization. For the bounce page, we first try getting it
+	 * from the kernel allocator because that's just about as fast
+	 * as getting it from a list and because a cache of free pages
+	 * should generally be a "last resort" option for a filesystem
+	 * to be able to do its job.
+	 */
+	spin_lock_irqsave(&fscrypt_ctx_lock, flags);
+	ctx = list_first_entry_or_null(&fscrypt_free_ctxs,
+					struct fscrypt_ctx, free_list);
+	if (ctx)
+		list_del(&ctx->free_list);
+	spin_unlock_irqrestore(&fscrypt_ctx_lock, flags);
+	if (!ctx) {
+		ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, GFP_NOFS);
+		if (!ctx)
+			return ERR_PTR(-ENOMEM);
+		ctx->flags |= FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
+	} else {
+		ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
+	}
+	ctx->flags &= ~FS_WRITE_PATH_FL;
+	return ctx;
+}
+EXPORT_SYMBOL(fscrypt_get_ctx);
+
+/**
+ * fscrypt_complete() - The completion callback for page encryption
+ * @req: The asynchronous encryption request context
+ * @res: The result of the encryption operation
+ */
+static void fscrypt_complete(struct crypto_async_request *req, int res)
+{
+	struct fscrypt_completion_result *ecr = req->data;
+
+	if (res == -EINPROGRESS)
+		return;
+	ecr->res = res;
+	complete(&ecr->completion);
+}
+
+typedef enum {
+	FS_DECRYPT = 0,
+	FS_ENCRYPT,
+} fscrypt_direction_t;
+
+static int do_page_crypto(struct inode *inode,
+			fscrypt_direction_t rw, pgoff_t index,
+			struct page *src_page, struct page *dest_page)
+{
+	u8 xts_tweak[FS_XTS_TWEAK_SIZE];
+	struct ablkcipher_request *req = NULL;
+	DECLARE_FS_COMPLETION_RESULT(ecr);
+	struct scatterlist dst, src;
+	struct fscrypt_info *ci = inode->i_crypt_info;
+	struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+	int res = 0;
+
+	req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+	if (!req) {
+		printk_ratelimited(KERN_ERR
+				"%s: crypto_request_alloc() failed\n",
+				__func__);
+		return -ENOMEM;
+	}
+
+	ablkcipher_request_set_callback(
+		req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+		fscrypt_complete, &ecr);
+
+	BUILD_BUG_ON(FS_XTS_TWEAK_SIZE < sizeof(index));
+	memcpy(xts_tweak, &inode->i_ino, sizeof(index));
+	memset(&xts_tweak[sizeof(index)], 0,
+			FS_XTS_TWEAK_SIZE - sizeof(index));
+
+	sg_init_table(&dst, 1);
+	sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
+	sg_init_table(&src, 1);
+	sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
+	ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
+					xts_tweak);
+	if (rw == FS_DECRYPT)
+		res = crypto_ablkcipher_decrypt(req);
+	else
+		res = crypto_ablkcipher_encrypt(req);
+	if (res == -EINPROGRESS || res == -EBUSY) {
+		BUG_ON(req->base.data != &ecr);
+		wait_for_completion(&ecr.completion);
+		res = ecr.res;
+	}
+	ablkcipher_request_free(req);
+	if (res) {
+		printk_ratelimited(KERN_ERR
+			"%s: crypto_ablkcipher_encrypt() returned %d\n",
+			__func__, res);
+		return res;
+	}
+	return 0;
+}
+
+static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx)
+{
+	ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool,
+							GFP_NOWAIT);
+	if (ctx->w.bounce_page == NULL)
+		return ERR_PTR(-ENOMEM);
+	ctx->flags |= FS_WRITE_PATH_FL;
+	return ctx->w.bounce_page;
+}
+
+/**
+ * fscypt_encrypt_page() - Encrypts a page
+ * @inode:          The inode for which the encryption should take place
+ * @plaintext_page: The page to encrypt. Must be locked.
+ *
+ * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx
+ * encryption context.
+ *
+ * Called on the page write path.  The caller must call
+ * fscrypt_restore_control_page() on the returned ciphertext page to
+ * release the bounce buffer and the encryption context.
+ *
+ * Return: An allocated page with the encrypted content on success. Else, an
+ * error value or NULL.
+ */
+struct page *fscrypt_encrypt_page(struct inode *inode,
+				struct page *plaintext_page)
+{
+	struct fscrypt_ctx *ctx;
+	struct page *ciphertext_page = NULL;
+	int err;
+
+	BUG_ON(!PageLocked(plaintext_page));
+
+	ctx = fscrypt_get_ctx(inode);
+	if (IS_ERR(ctx))
+		return (struct page *)ctx;
+
+	/* The encryption operation will require a bounce page. */
+	ciphertext_page = alloc_bounce_page(ctx);
+	if (IS_ERR(ciphertext_page))
+		goto errout;
+
+	ctx->w.control_page = plaintext_page;
+	err = do_page_crypto(inode, FS_ENCRYPT, plaintext_page->index,
+					plaintext_page, ciphertext_page);
+	if (err) {
+		ciphertext_page = ERR_PTR(err);
+		goto errout;
+	}
+	SetPagePrivate(ciphertext_page);
+	set_page_private(ciphertext_page, (unsigned long)ctx);
+	lock_page(ciphertext_page);
+	return ciphertext_page;
+
+errout:
+	fscrypt_release_ctx(ctx);
+	return ciphertext_page;
+}
+EXPORT_SYMBOL(fscrypt_encrypt_page);
+
+/**
+ * f2crypt_decrypt_page() - Decrypts a page in-place
+ * @page: The page to decrypt. Must be locked.
+ *
+ * Decrypts page in-place using the ctx encryption context.
+ *
+ * Called from the read completion callback.
+ *
+ * Return: Zero on success, non-zero otherwise.
+ */
+int fscrypt_decrypt_page(struct page *page)
+{
+	BUG_ON(!PageLocked(page));
+
+	return do_page_crypto(page->mapping->host,
+			FS_DECRYPT, page->index, page, page);
+}
+EXPORT_SYMBOL(fscrypt_decrypt_page);
+
+int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk,
+				sector_t pblk, unsigned int len)
+{
+	struct fscrypt_ctx *ctx;
+	struct page *ciphertext_page = NULL;
+	struct bio *bio;
+	int ret, err = 0;
+
+	BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE);
+
+	ctx = fscrypt_get_ctx(inode);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	ciphertext_page = alloc_bounce_page(ctx);
+	if (IS_ERR(ciphertext_page)) {
+		err = PTR_ERR(ciphertext_page);
+		goto errout;
+	}
+
+	while (len--) {
+		err = do_page_crypto(inode, FS_ENCRYPT, lblk,
+						ZERO_PAGE(0), ciphertext_page);
+		if (err)
+			goto errout;
+
+		bio = bio_alloc(GFP_KERNEL, 1);
+		if (!bio) {
+			err = -ENOMEM;
+			goto errout;
+		}
+		bio->bi_bdev = inode->i_sb->s_bdev;
+		bio->bi_iter.bi_sector =
+			pblk << (inode->i_sb->s_blocksize_bits - 9);
+		ret = bio_add_page(bio, ciphertext_page,
+					inode->i_sb->s_blocksize, 0);
+		if (ret != inode->i_sb->s_blocksize) {
+			/* should never happen! */
+			WARN_ON(1);
+			bio_put(bio);
+			err = -EIO;
+			goto errout;
+		}
+		err = submit_bio_wait(WRITE, bio);
+		if ((err == 0) && bio->bi_error)
+			err = -EIO;
+		bio_put(bio);
+		if (err)
+			goto errout;
+		lblk++;
+		pblk++;
+	}
+	err = 0;
+errout:
+	fscrypt_release_ctx(ctx);
+	return err;
+}
+EXPORT_SYMBOL(fscrypt_zeroout_range);
+
+/*
+ * Validate dentries for encrypted directories to make sure we aren't
+ * potentially caching stale data after a key has been added or
+ * removed.
+ */
+static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	struct inode *dir = d_inode(dentry->d_parent);
+	struct fscrypt_info *ci = dir->i_crypt_info;
+	int dir_has_key, cached_with_key;
+
+	if (!dir->i_sb->s_cop->is_encrypted(dir))
+		return 0;
+
+	if (ci && ci->ci_keyring_key &&
+	    (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+					  (1 << KEY_FLAG_REVOKED) |
+					  (1 << KEY_FLAG_DEAD))))
+		ci = NULL;
+
+	/* this should eventually be an flag in d_flags */
+	spin_lock(&dentry->d_lock);
+	cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY;
+	spin_unlock(&dentry->d_lock);
+	dir_has_key = (ci != NULL);
+
+	/*
+	 * If the dentry was cached without the key, and it is a
+	 * negative dentry, it might be a valid name.  We can't check
+	 * if the key has since been made available due to locking
+	 * reasons, so we fail the validation so ext4_lookup() can do
+	 * this check.
+	 *
+	 * We also fail the validation if the dentry was created with
+	 * the key present, but we no longer have the key, or vice versa.
+	 */
+	if ((!cached_with_key && d_is_negative(dentry)) ||
+			(!cached_with_key && dir_has_key) ||
+			(cached_with_key && !dir_has_key))
+		return 0;
+	return 1;
+}
+
+const struct dentry_operations fscrypt_d_ops = {
+	.d_revalidate = fscrypt_d_revalidate,
+};
+EXPORT_SYMBOL(fscrypt_d_ops);
+
+/*
+ * Call fscrypt_decrypt_page on every single page, reusing the encryption
+ * context.
+ */
+static void completion_pages(struct work_struct *work)
+{
+	struct fscrypt_ctx *ctx =
+		container_of(work, struct fscrypt_ctx, r.work);
+	struct bio *bio = ctx->r.bio;
+	struct bio_vec *bv;
+	int i;
+
+	bio_for_each_segment_all(bv, bio, i) {
+		struct page *page = bv->bv_page;
+		int ret = fscrypt_decrypt_page(page);
+
+		if (ret) {
+			WARN_ON_ONCE(1);
+			SetPageError(page);
+		} else {
+			SetPageUptodate(page);
+		}
+		unlock_page(page);
+	}
+	fscrypt_release_ctx(ctx);
+	bio_put(bio);
+}
+
+void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio)
+{
+	INIT_WORK(&ctx->r.work, completion_pages);
+	ctx->r.bio = bio;
+	queue_work(fscrypt_read_workqueue, &ctx->r.work);
+}
+EXPORT_SYMBOL(fscrypt_decrypt_bio_pages);
+
+void fscrypt_pullback_bio_page(struct page **page, bool restore)
+{
+	struct fscrypt_ctx *ctx;
+	struct page *bounce_page;
+
+	/* The bounce data pages are unmapped. */
+	if ((*page)->mapping)
+		return;
+
+	/* The bounce data page is unmapped. */
+	bounce_page = *page;
+	ctx = (struct fscrypt_ctx *)page_private(bounce_page);
+
+	/* restore control page */
+	*page = ctx->w.control_page;
+
+	if (restore)
+		fscrypt_restore_control_page(bounce_page);
+}
+EXPORT_SYMBOL(fscrypt_pullback_bio_page);
+
+void fscrypt_restore_control_page(struct page *page)
+{
+	struct fscrypt_ctx *ctx;
+
+	ctx = (struct fscrypt_ctx *)page_private(page);
+	set_page_private(page, (unsigned long)NULL);
+	ClearPagePrivate(page);
+	unlock_page(page);
+	fscrypt_release_ctx(ctx);
+}
+EXPORT_SYMBOL(fscrypt_restore_control_page);
+
+static void fscrypt_destroy(void)
+{
+	struct fscrypt_ctx *pos, *n;
+
+	list_for_each_entry_safe(pos, n, &fscrypt_free_ctxs, free_list)
+		kmem_cache_free(fscrypt_ctx_cachep, pos);
+	INIT_LIST_HEAD(&fscrypt_free_ctxs);
+	mempool_destroy(fscrypt_bounce_page_pool);
+	fscrypt_bounce_page_pool = NULL;
+}
+
+/**
+ * fscrypt_initialize() - allocate major buffers for fs encryption.
+ *
+ * We only call this when we start accessing encrypted files, since it
+ * results in memory getting allocated that wouldn't otherwise be used.
+ *
+ * Return: Zero on success, non-zero otherwise.
+ */
+int fscrypt_initialize(void)
+{
+	int i, res = -ENOMEM;
+
+	if (fscrypt_bounce_page_pool)
+		return 0;
+
+	mutex_lock(&fscrypt_init_mutex);
+	if (fscrypt_bounce_page_pool)
+		goto already_initialized;
+
+	for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
+		struct fscrypt_ctx *ctx;
+
+		ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, GFP_NOFS);
+		if (!ctx)
+			goto fail;
+		list_add(&ctx->free_list, &fscrypt_free_ctxs);
+	}
+
+	fscrypt_bounce_page_pool =
+		mempool_create_page_pool(num_prealloc_crypto_pages, 0);
+	if (!fscrypt_bounce_page_pool)
+		goto fail;
+
+already_initialized:
+	mutex_unlock(&fscrypt_init_mutex);
+	return 0;
+fail:
+	fscrypt_destroy();
+	mutex_unlock(&fscrypt_init_mutex);
+	return res;
+}
+EXPORT_SYMBOL(fscrypt_initialize);
+
+/**
+ * fscrypt_init() - Set up for fs encryption.
+ */
+static int __init fscrypt_init(void)
+{
+	fscrypt_read_workqueue = alloc_workqueue("fscrypt_read_queue",
+							WQ_HIGHPRI, 0);
+	if (!fscrypt_read_workqueue)
+		goto fail;
+
+	fscrypt_ctx_cachep = KMEM_CACHE(fscrypt_ctx, SLAB_RECLAIM_ACCOUNT);
+	if (!fscrypt_ctx_cachep)
+		goto fail_free_queue;
+
+	fscrypt_info_cachep = KMEM_CACHE(fscrypt_info, SLAB_RECLAIM_ACCOUNT);
+	if (!fscrypt_info_cachep)
+		goto fail_free_ctx;
+
+	return 0;
+
+fail_free_ctx:
+	kmem_cache_destroy(fscrypt_ctx_cachep);
+fail_free_queue:
+	destroy_workqueue(fscrypt_read_workqueue);
+fail:
+	return -ENOMEM;
+}
+module_init(fscrypt_init)
+
+/**
+ * fscrypt_exit() - Shutdown the fs encryption system
+ */
+static void __exit fscrypt_exit(void)
+{
+	fscrypt_destroy();
+
+	if (fscrypt_read_workqueue)
+		destroy_workqueue(fscrypt_read_workqueue);
+	kmem_cache_destroy(fscrypt_ctx_cachep);
+	kmem_cache_destroy(fscrypt_info_cachep);
+}
+module_exit(fscrypt_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
new file mode 100644
index 000000000000..5e4ddeeba267
--- /dev/null
+++ b/fs/crypto/fname.c
@@ -0,0 +1,427 @@
+/*
+ * This contains functions for filename crypto management
+ *
+ * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, Motorola Mobility
+ *
+ * Written by Uday Savagaonkar, 2014.
+ * Modified by Jaegeuk Kim, 2015.
+ *
+ * This has not yet undergone a rigorous security audit.
+ */
+
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <keys/encrypted-type.h>
+#include <keys/user-type.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <linux/ratelimit.h>
+#include <linux/fscrypto.h>
+
+static u32 size_round_up(size_t size, size_t blksize)
+{
+	return ((size + blksize - 1) / blksize) * blksize;
+}
+
+/**
+ * dir_crypt_complete() -
+ */
+static void dir_crypt_complete(struct crypto_async_request *req, int res)
+{
+	struct fscrypt_completion_result *ecr = req->data;
+
+	if (res == -EINPROGRESS)
+		return;
+	ecr->res = res;
+	complete(&ecr->completion);
+}
+
+/**
+ * fname_encrypt() -
+ *
+ * This function encrypts the input filename, and returns the length of the
+ * ciphertext. Errors are returned as negative numbers.  We trust the caller to
+ * allocate sufficient memory to oname string.
+ */
+static int fname_encrypt(struct inode *inode,
+			const struct qstr *iname, struct fscrypt_str *oname)
+{
+	u32 ciphertext_len;
+	struct ablkcipher_request *req = NULL;
+	DECLARE_FS_COMPLETION_RESULT(ecr);
+	struct fscrypt_info *ci = inode->i_crypt_info;
+	struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+	int res = 0;
+	char iv[FS_CRYPTO_BLOCK_SIZE];
+	struct scatterlist src_sg, dst_sg;
+	int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK);
+	char *workbuf, buf[32], *alloc_buf = NULL;
+	unsigned lim;
+
+	lim = inode->i_sb->s_cop->max_namelen(inode);
+	if (iname->len <= 0 || iname->len > lim)
+		return -EIO;
+
+	ciphertext_len = (iname->len < FS_CRYPTO_BLOCK_SIZE) ?
+					FS_CRYPTO_BLOCK_SIZE : iname->len;
+	ciphertext_len = size_round_up(ciphertext_len, padding);
+	ciphertext_len = (ciphertext_len > lim) ? lim : ciphertext_len;
+
+	if (ciphertext_len <= sizeof(buf)) {
+		workbuf = buf;
+	} else {
+		alloc_buf = kmalloc(ciphertext_len, GFP_NOFS);
+		if (!alloc_buf)
+			return -ENOMEM;
+		workbuf = alloc_buf;
+	}
+
+	/* Allocate request */
+	req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+	if (!req) {
+		printk_ratelimited(KERN_ERR
+			"%s: crypto_request_alloc() failed\n", __func__);
+		kfree(alloc_buf);
+		return -ENOMEM;
+	}
+	ablkcipher_request_set_callback(req,
+			CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+			dir_crypt_complete, &ecr);
+
+	/* Copy the input */
+	memcpy(workbuf, iname->name, iname->len);
+	if (iname->len < ciphertext_len)
+		memset(workbuf + iname->len, 0, ciphertext_len - iname->len);
+
+	/* Initialize IV */
+	memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
+
+	/* Create encryption request */
+	sg_init_one(&src_sg, workbuf, ciphertext_len);
+	sg_init_one(&dst_sg, oname->name, ciphertext_len);
+	ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
+	res = crypto_ablkcipher_encrypt(req);
+	if (res == -EINPROGRESS || res == -EBUSY) {
+		wait_for_completion(&ecr.completion);
+		res = ecr.res;
+	}
+	kfree(alloc_buf);
+	ablkcipher_request_free(req);
+	if (res < 0)
+		printk_ratelimited(KERN_ERR
+				"%s: Error (error code %d)\n", __func__, res);
+
+	oname->len = ciphertext_len;
+	return res;
+}
+
+/*
+ * fname_decrypt()
+ *	This function decrypts the input filename, and returns
+ *	the length of the plaintext.
+ *	Errors are returned as negative numbers.
+ *	We trust the caller to allocate sufficient memory to oname string.
+ */
+static int fname_decrypt(struct inode *inode,
+				const struct fscrypt_str *iname,
+				struct fscrypt_str *oname)
+{
+	struct ablkcipher_request *req = NULL;
+	DECLARE_FS_COMPLETION_RESULT(ecr);
+	struct scatterlist src_sg, dst_sg;
+	struct fscrypt_info *ci = inode->i_crypt_info;
+	struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+	int res = 0;
+	char iv[FS_CRYPTO_BLOCK_SIZE];
+	unsigned lim;
+
+	lim = inode->i_sb->s_cop->max_namelen(inode);
+	if (iname->len <= 0 || iname->len > lim)
+		return -EIO;
+
+	/* Allocate request */
+	req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+	if (!req) {
+		printk_ratelimited(KERN_ERR
+			"%s: crypto_request_alloc() failed\n",  __func__);
+		return -ENOMEM;
+	}
+	ablkcipher_request_set_callback(req,
+		CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+		dir_crypt_complete, &ecr);
+
+	/* Initialize IV */
+	memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
+
+	/* Create decryption request */
+	sg_init_one(&src_sg, iname->name, iname->len);
+	sg_init_one(&dst_sg, oname->name, oname->len);
+	ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
+	res = crypto_ablkcipher_decrypt(req);
+	if (res == -EINPROGRESS || res == -EBUSY) {
+		wait_for_completion(&ecr.completion);
+		res = ecr.res;
+	}
+	ablkcipher_request_free(req);
+	if (res < 0) {
+		printk_ratelimited(KERN_ERR
+				"%s: Error (error code %d)\n", __func__, res);
+		return res;
+	}
+
+	oname->len = strnlen(oname->name, iname->len);
+	return oname->len;
+}
+
+static const char *lookup_table =
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+
+/**
+ * digest_encode() -
+ *
+ * Encodes the input digest using characters from the set [a-zA-Z0-9_+].
+ * The encoded string is roughly 4/3 times the size of the input string.
+ */
+static int digest_encode(const char *src, int len, char *dst)
+{
+	int i = 0, bits = 0, ac = 0;
+	char *cp = dst;
+
+	while (i < len) {
+		ac += (((unsigned char) src[i]) << bits);
+		bits += 8;
+		do {
+			*cp++ = lookup_table[ac & 0x3f];
+			ac >>= 6;
+			bits -= 6;
+		} while (bits >= 6);
+		i++;
+	}
+	if (bits)
+		*cp++ = lookup_table[ac & 0x3f];
+	return cp - dst;
+}
+
+static int digest_decode(const char *src, int len, char *dst)
+{
+	int i = 0, bits = 0, ac = 0;
+	const char *p;
+	char *cp = dst;
+
+	while (i < len) {
+		p = strchr(lookup_table, src[i]);
+		if (p == NULL || src[i] == 0)
+			return -2;
+		ac += (p - lookup_table) << bits;
+		bits += 6;
+		if (bits >= 8) {
+			*cp++ = ac & 0xff;
+			ac >>= 8;
+			bits -= 8;
+		}
+		i++;
+	}
+	if (ac)
+		return -1;
+	return cp - dst;
+}
+
+u32 fscrypt_fname_encrypted_size(struct inode *inode, u32 ilen)
+{
+	int padding = 32;
+	struct fscrypt_info *ci = inode->i_crypt_info;
+
+	if (ci)
+		padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK);
+	if (ilen < FS_CRYPTO_BLOCK_SIZE)
+		ilen = FS_CRYPTO_BLOCK_SIZE;
+	return size_round_up(ilen, padding);
+}
+EXPORT_SYMBOL(fscrypt_fname_encrypted_size);
+
+/**
+ * fscrypt_fname_crypto_alloc_obuff() -
+ *
+ * Allocates an output buffer that is sufficient for the crypto operation
+ * specified by the context and the direction.
+ */
+int fscrypt_fname_alloc_buffer(struct inode *inode,
+				u32 ilen, struct fscrypt_str *crypto_str)
+{
+	unsigned int olen = fscrypt_fname_encrypted_size(inode, ilen);
+
+	crypto_str->len = olen;
+	if (olen < FS_FNAME_CRYPTO_DIGEST_SIZE * 2)
+		olen = FS_FNAME_CRYPTO_DIGEST_SIZE * 2;
+	/*
+	 * Allocated buffer can hold one more character to null-terminate the
+	 * string
+	 */
+	crypto_str->name = kmalloc(olen + 1, GFP_NOFS);
+	if (!(crypto_str->name))
+		return -ENOMEM;
+	return 0;
+}
+EXPORT_SYMBOL(fscrypt_fname_alloc_buffer);
+
+/**
+ * fscrypt_fname_crypto_free_buffer() -
+ *
+ * Frees the buffer allocated for crypto operation.
+ */
+void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str)
+{
+	if (!crypto_str)
+		return;
+	kfree(crypto_str->name);
+	crypto_str->name = NULL;
+}
+EXPORT_SYMBOL(fscrypt_fname_free_buffer);
+
+/**
+ * fscrypt_fname_disk_to_usr() - converts a filename from disk space to user
+ * space
+ */
+int fscrypt_fname_disk_to_usr(struct inode *inode,
+			u32 hash, u32 minor_hash,
+			const struct fscrypt_str *iname,
+			struct fscrypt_str *oname)
+{
+	const struct qstr qname = FSTR_TO_QSTR(iname);
+	char buf[24];
+	int ret;
+
+	if (fscrypt_is_dot_dotdot(&qname)) {
+		oname->name[0] = '.';
+		oname->name[iname->len - 1] = '.';
+		oname->len = iname->len;
+		return oname->len;
+	}
+
+	if (iname->len < FS_CRYPTO_BLOCK_SIZE)
+		return -EUCLEAN;
+
+	if (inode->i_crypt_info)
+		return fname_decrypt(inode, iname, oname);
+
+	if (iname->len <= FS_FNAME_CRYPTO_DIGEST_SIZE) {
+		ret = digest_encode(iname->name, iname->len, oname->name);
+		oname->len = ret;
+		return ret;
+	}
+	if (hash) {
+		memcpy(buf, &hash, 4);
+		memcpy(buf + 4, &minor_hash, 4);
+	} else {
+		memset(buf, 0, 8);
+	}
+	memcpy(buf + 8, iname->name + iname->len - 16, 16);
+	oname->name[0] = '_';
+	ret = digest_encode(buf, 24, oname->name + 1);
+	oname->len = ret + 1;
+	return ret + 1;
+}
+EXPORT_SYMBOL(fscrypt_fname_disk_to_usr);
+
+/**
+ * fscrypt_fname_usr_to_disk() - converts a filename from user space to disk
+ * space
+ */
+int fscrypt_fname_usr_to_disk(struct inode *inode,
+			const struct qstr *iname,
+			struct fscrypt_str *oname)
+{
+	if (fscrypt_is_dot_dotdot(iname)) {
+		oname->name[0] = '.';
+		oname->name[iname->len - 1] = '.';
+		oname->len = iname->len;
+		return oname->len;
+	}
+	if (inode->i_crypt_info)
+		return fname_encrypt(inode, iname, oname);
+	/*
+	 * Without a proper key, a user is not allowed to modify the filenames
+	 * in a directory. Consequently, a user space name cannot be mapped to
+	 * a disk-space name
+	 */
+	return -EACCES;
+}
+EXPORT_SYMBOL(fscrypt_fname_usr_to_disk);
+
+int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
+			      int lookup, struct fscrypt_name *fname)
+{
+	int ret = 0, bigname = 0;
+
+	memset(fname, 0, sizeof(struct fscrypt_name));
+	fname->usr_fname = iname;
+
+	if (!dir->i_sb->s_cop->is_encrypted(dir) ||
+				fscrypt_is_dot_dotdot(iname)) {
+		fname->disk_name.name = (unsigned char *)iname->name;
+		fname->disk_name.len = iname->len;
+		return 0;
+	}
+	ret = get_crypt_info(dir);
+	if (ret && ret != -EOPNOTSUPP)
+		return ret;
+
+	if (dir->i_crypt_info) {
+		ret = fscrypt_fname_alloc_buffer(dir, iname->len,
+							&fname->crypto_buf);
+		if (ret < 0)
+			return ret;
+		ret = fname_encrypt(dir, iname, &fname->crypto_buf);
+		if (ret < 0)
+			goto errout;
+		fname->disk_name.name = fname->crypto_buf.name;
+		fname->disk_name.len = fname->crypto_buf.len;
+		return 0;
+	}
+	if (!lookup)
+		return -EACCES;
+
+	/*
+	 * We don't have the key and we are doing a lookup; decode the
+	 * user-supplied name
+	 */
+	if (iname->name[0] == '_')
+		bigname = 1;
+	if ((bigname && (iname->len != 33)) || (!bigname && (iname->len > 43)))
+		return -ENOENT;
+
+	fname->crypto_buf.name = kmalloc(32, GFP_KERNEL);
+	if (fname->crypto_buf.name == NULL)
+		return -ENOMEM;
+
+	ret = digest_decode(iname->name + bigname, iname->len - bigname,
+				fname->crypto_buf.name);
+	if (ret < 0) {
+		ret = -ENOENT;
+		goto errout;
+	}
+	fname->crypto_buf.len = ret;
+	if (bigname) {
+		memcpy(&fname->hash, fname->crypto_buf.name, 4);
+		memcpy(&fname->minor_hash, fname->crypto_buf.name + 4, 4);
+	} else {
+		fname->disk_name.name = fname->crypto_buf.name;
+		fname->disk_name.len = fname->crypto_buf.len;
+	}
+	return 0;
+
+errout:
+	fscrypt_fname_free_buffer(&fname->crypto_buf);
+	return ret;
+}
+EXPORT_SYMBOL(fscrypt_setup_filename);
+
+void fscrypt_free_filename(struct fscrypt_name *fname)
+{
+	kfree(fname->crypto_buf.name);
+	fname->crypto_buf.name = NULL;
+	fname->usr_fname = NULL;
+	fname->disk_name.name = NULL;
+}
+EXPORT_SYMBOL(fscrypt_free_filename);
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
new file mode 100644
index 000000000000..cb618425b73c
--- /dev/null
+++ b/fs/crypto/keyinfo.c
@@ -0,0 +1,278 @@
+/*
+ * key management facility for FS encryption support.
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * This contains encryption key functions.
+ *
+ * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
+ */
+
+#include <keys/encrypted-type.h>
+#include <keys/user-type.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+#include <uapi/linux/keyctl.h>
+#include <crypto/hash.h>
+#include <linux/fscrypto.h>
+
+static void derive_crypt_complete(struct crypto_async_request *req, int rc)
+{
+	struct fscrypt_completion_result *ecr = req->data;
+
+	if (rc == -EINPROGRESS)
+		return;
+
+	ecr->res = rc;
+	complete(&ecr->completion);
+}
+
+/**
+ * derive_key_aes() - Derive a key using AES-128-ECB
+ * @deriving_key: Encryption key used for derivation.
+ * @source_key:   Source key to which to apply derivation.
+ * @derived_key:  Derived key.
+ *
+ * Return: Zero on success; non-zero otherwise.
+ */
+static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
+				u8 source_key[FS_AES_256_XTS_KEY_SIZE],
+				u8 derived_key[FS_AES_256_XTS_KEY_SIZE])
+{
+	int res = 0;
+	struct ablkcipher_request *req = NULL;
+	DECLARE_FS_COMPLETION_RESULT(ecr);
+	struct scatterlist src_sg, dst_sg;
+	struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0,
+								0);
+
+	if (IS_ERR(tfm)) {
+		res = PTR_ERR(tfm);
+		tfm = NULL;
+		goto out;
+	}
+	crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+	req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+	if (!req) {
+		res = -ENOMEM;
+		goto out;
+	}
+	ablkcipher_request_set_callback(req,
+			CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+			derive_crypt_complete, &ecr);
+	res = crypto_ablkcipher_setkey(tfm, deriving_key,
+					FS_AES_128_ECB_KEY_SIZE);
+	if (res < 0)
+		goto out;
+
+	sg_init_one(&src_sg, source_key, FS_AES_256_XTS_KEY_SIZE);
+	sg_init_one(&dst_sg, derived_key, FS_AES_256_XTS_KEY_SIZE);
+	ablkcipher_request_set_crypt(req, &src_sg, &dst_sg,
+					FS_AES_256_XTS_KEY_SIZE, NULL);
+	res = crypto_ablkcipher_encrypt(req);
+	if (res == -EINPROGRESS || res == -EBUSY) {
+		wait_for_completion(&ecr.completion);
+		res = ecr.res;
+	}
+out:
+	if (req)
+		ablkcipher_request_free(req);
+	if (tfm)
+		crypto_free_ablkcipher(tfm);
+	return res;
+}
+
+static void put_crypt_info(struct fscrypt_info *ci)
+{
+	if (!ci)
+		return;
+
+	if (ci->ci_keyring_key)
+		key_put(ci->ci_keyring_key);
+	crypto_free_ablkcipher(ci->ci_ctfm);
+	kmem_cache_free(fscrypt_info_cachep, ci);
+}
+
+int get_crypt_info(struct inode *inode)
+{
+	struct fscrypt_info *crypt_info;
+	u8 full_key_descriptor[FS_KEY_DESC_PREFIX_SIZE +
+				(FS_KEY_DESCRIPTOR_SIZE * 2) + 1];
+	struct key *keyring_key = NULL;
+	struct fscrypt_key *master_key;
+	struct fscrypt_context ctx;
+	const struct user_key_payload *ukp;
+	struct crypto_ablkcipher *ctfm;
+	const char *cipher_str;
+	u8 raw_key[FS_MAX_KEY_SIZE];
+	u8 mode;
+	int res;
+
+	res = fscrypt_initialize();
+	if (res)
+		return res;
+
+	if (!inode->i_sb->s_cop->get_context)
+		return -EOPNOTSUPP;
+retry:
+	crypt_info = ACCESS_ONCE(inode->i_crypt_info);
+	if (crypt_info) {
+		if (!crypt_info->ci_keyring_key ||
+				key_validate(crypt_info->ci_keyring_key) == 0)
+			return 0;
+		fscrypt_put_encryption_info(inode, crypt_info);
+		goto retry;
+	}
+
+	res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
+	if (res < 0) {
+		if (!fscrypt_dummy_context_enabled(inode))
+			return res;
+		ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
+		ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
+		ctx.flags = 0;
+	} else if (res != sizeof(ctx)) {
+		return -EINVAL;
+	}
+	res = 0;
+
+	crypt_info = kmem_cache_alloc(fscrypt_info_cachep, GFP_NOFS);
+	if (!crypt_info)
+		return -ENOMEM;
+
+	crypt_info->ci_flags = ctx.flags;
+	crypt_info->ci_data_mode = ctx.contents_encryption_mode;
+	crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
+	crypt_info->ci_ctfm = NULL;
+	crypt_info->ci_keyring_key = NULL;
+	memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
+				sizeof(crypt_info->ci_master_key));
+	if (S_ISREG(inode->i_mode))
+		mode = crypt_info->ci_data_mode;
+	else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+		mode = crypt_info->ci_filename_mode;
+	else
+		BUG();
+
+	switch (mode) {
+	case FS_ENCRYPTION_MODE_AES_256_XTS:
+		cipher_str = "xts(aes)";
+		break;
+	case FS_ENCRYPTION_MODE_AES_256_CTS:
+		cipher_str = "cts(cbc(aes))";
+		break;
+	default:
+		printk_once(KERN_WARNING
+			    "%s: unsupported key mode %d (ino %u)\n",
+			    __func__, mode, (unsigned) inode->i_ino);
+		res = -ENOKEY;
+		goto out;
+	}
+	if (fscrypt_dummy_context_enabled(inode)) {
+		memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE);
+		goto got_key;
+	}
+	memcpy(full_key_descriptor, FS_KEY_DESC_PREFIX,
+					FS_KEY_DESC_PREFIX_SIZE);
+	sprintf(full_key_descriptor + FS_KEY_DESC_PREFIX_SIZE,
+					"%*phN", FS_KEY_DESCRIPTOR_SIZE,
+					ctx.master_key_descriptor);
+	full_key_descriptor[FS_KEY_DESC_PREFIX_SIZE +
+					(2 * FS_KEY_DESCRIPTOR_SIZE)] = '\0';
+	keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
+	if (IS_ERR(keyring_key)) {
+		res = PTR_ERR(keyring_key);
+		keyring_key = NULL;
+		goto out;
+	}
+	crypt_info->ci_keyring_key = keyring_key;
+	if (keyring_key->type != &key_type_logon) {
+		printk_once(KERN_WARNING
+				"%s: key type must be logon\n", __func__);
+		res = -ENOKEY;
+		goto out;
+	}
+	down_read(&keyring_key->sem);
+	ukp = user_key_payload(keyring_key);
+	if (ukp->datalen != sizeof(struct fscrypt_key)) {
+		res = -EINVAL;
+		up_read(&keyring_key->sem);
+		goto out;
+	}
+	master_key = (struct fscrypt_key *)ukp->data;
+	BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE);
+
+	if (master_key->size != FS_AES_256_XTS_KEY_SIZE) {
+		printk_once(KERN_WARNING
+				"%s: key size incorrect: %d\n",
+				__func__, master_key->size);
+		res = -ENOKEY;
+		up_read(&keyring_key->sem);
+		goto out;
+	}
+	res = derive_key_aes(ctx.nonce, master_key->raw, raw_key);
+	up_read(&keyring_key->sem);
+	if (res)
+		goto out;
+got_key:
+	ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
+	if (!ctfm || IS_ERR(ctfm)) {
+		res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
+		printk(KERN_DEBUG
+		       "%s: error %d (inode %u) allocating crypto tfm\n",
+		       __func__, res, (unsigned) inode->i_ino);
+		goto out;
+	}
+	crypt_info->ci_ctfm = ctfm;
+	crypto_ablkcipher_clear_flags(ctfm, ~0);
+	crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
+					CRYPTO_TFM_REQ_WEAK_KEY);
+	res = crypto_ablkcipher_setkey(ctfm, raw_key, fscrypt_key_size(mode));
+	if (res)
+		goto out;
+
+	memzero_explicit(raw_key, sizeof(raw_key));
+	if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) {
+		put_crypt_info(crypt_info);
+		goto retry;
+	}
+	return 0;
+
+out:
+	if (res == -ENOKEY)
+		res = 0;
+	put_crypt_info(crypt_info);
+	memzero_explicit(raw_key, sizeof(raw_key));
+	return res;
+}
+
+void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
+{
+	struct fscrypt_info *prev;
+
+	if (ci == NULL)
+		ci = ACCESS_ONCE(inode->i_crypt_info);
+	if (ci == NULL)
+		return;
+
+	prev = cmpxchg(&inode->i_crypt_info, ci, NULL);
+	if (prev != ci)
+		return;
+
+	put_crypt_info(ci);
+}
+EXPORT_SYMBOL(fscrypt_put_encryption_info);
+
+int fscrypt_get_encryption_info(struct inode *inode)
+{
+	struct fscrypt_info *ci = inode->i_crypt_info;
+
+	if (!ci ||
+		(ci->ci_keyring_key &&
+		 (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+					       (1 << KEY_FLAG_REVOKED) |
+					       (1 << KEY_FLAG_DEAD)))))
+		return get_crypt_info(inode);
+	return 0;
+}
+EXPORT_SYMBOL(fscrypt_get_encryption_info);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
new file mode 100644
index 000000000000..0f9961eede1e
--- /dev/null
+++ b/fs/crypto/policy.c
@@ -0,0 +1,229 @@
+/*
+ * Encryption policy functions for per-file encryption support.
+ *
+ * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, Motorola Mobility.
+ *
+ * Written by Michael Halcrow, 2015.
+ * Modified by Jaegeuk Kim, 2015.
+ */
+
+#include <linux/random.h>
+#include <linux/string.h>
+#include <linux/fscrypto.h>
+
+static int inode_has_encryption_context(struct inode *inode)
+{
+	if (!inode->i_sb->s_cop->get_context)
+		return 0;
+	return (inode->i_sb->s_cop->get_context(inode, NULL, 0L) > 0);
+}
+
+/*
+ * check whether the policy is consistent with the encryption context
+ * for the inode
+ */
+static int is_encryption_context_consistent_with_policy(struct inode *inode,
+				const struct fscrypt_policy *policy)
+{
+	struct fscrypt_context ctx;
+	int res;
+
+	if (!inode->i_sb->s_cop->get_context)
+		return 0;
+
+	res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
+	if (res != sizeof(ctx))
+		return 0;
+
+	return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
+			FS_KEY_DESCRIPTOR_SIZE) == 0 &&
+			(ctx.flags == policy->flags) &&
+			(ctx.contents_encryption_mode ==
+			 policy->contents_encryption_mode) &&
+			(ctx.filenames_encryption_mode ==
+			 policy->filenames_encryption_mode));
+}
+
+static int create_encryption_context_from_policy(struct inode *inode,
+				const struct fscrypt_policy *policy)
+{
+	struct fscrypt_context ctx;
+	int res;
+
+	if (!inode->i_sb->s_cop->set_context)
+		return -EOPNOTSUPP;
+
+	if (inode->i_sb->s_cop->prepare_context) {
+		res = inode->i_sb->s_cop->prepare_context(inode);
+		if (res)
+			return res;
+	}
+
+	ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
+	memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
+					FS_KEY_DESCRIPTOR_SIZE);
+
+	if (!fscrypt_valid_contents_enc_mode(
+				policy->contents_encryption_mode)) {
+		printk(KERN_WARNING
+		       "%s: Invalid contents encryption mode %d\n", __func__,
+			policy->contents_encryption_mode);
+		return -EINVAL;
+	}
+
+	if (!fscrypt_valid_filenames_enc_mode(
+				policy->filenames_encryption_mode)) {
+		printk(KERN_WARNING
+			"%s: Invalid filenames encryption mode %d\n", __func__,
+			policy->filenames_encryption_mode);
+		return -EINVAL;
+	}
+
+	if (policy->flags & ~FS_POLICY_FLAGS_VALID)
+		return -EINVAL;
+
+	ctx.contents_encryption_mode = policy->contents_encryption_mode;
+	ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
+	ctx.flags = policy->flags;
+	BUILD_BUG_ON(sizeof(ctx.nonce) != FS_KEY_DERIVATION_NONCE_SIZE);
+	get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+
+	return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL);
+}
+
+int fscrypt_process_policy(struct inode *inode,
+				const struct fscrypt_policy *policy)
+{
+	if (policy->version != 0)
+		return -EINVAL;
+
+	if (!inode_has_encryption_context(inode)) {
+		if (!inode->i_sb->s_cop->empty_dir)
+			return -EOPNOTSUPP;
+		if (!inode->i_sb->s_cop->empty_dir(inode))
+			return -ENOTEMPTY;
+		return create_encryption_context_from_policy(inode, policy);
+	}
+
+	if (is_encryption_context_consistent_with_policy(inode, policy))
+		return 0;
+
+	printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n",
+	       __func__);
+	return -EINVAL;
+}
+EXPORT_SYMBOL(fscrypt_process_policy);
+
+int fscrypt_get_policy(struct inode *inode, struct fscrypt_policy *policy)
+{
+	struct fscrypt_context ctx;
+	int res;
+
+	if (!inode->i_sb->s_cop->get_context ||
+			!inode->i_sb->s_cop->is_encrypted(inode))
+		return -ENODATA;
+
+	res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
+	if (res != sizeof(ctx))
+		return -ENODATA;
+	if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1)
+		return -EINVAL;
+
+	policy->version = 0;
+	policy->contents_encryption_mode = ctx.contents_encryption_mode;
+	policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
+	policy->flags = ctx.flags;
+	memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
+				FS_KEY_DESCRIPTOR_SIZE);
+	return 0;
+}
+EXPORT_SYMBOL(fscrypt_get_policy);
+
+int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
+{
+	struct fscrypt_info *parent_ci, *child_ci;
+	int res;
+
+	if ((parent == NULL) || (child == NULL)) {
+		printk(KERN_ERR	"parent %p child %p\n", parent, child);
+		BUG_ON(1);
+	}
+
+	/* no restrictions if the parent directory is not encrypted */
+	if (!parent->i_sb->s_cop->is_encrypted(parent))
+		return 1;
+	/* if the child directory is not encrypted, this is always a problem */
+	if (!parent->i_sb->s_cop->is_encrypted(child))
+		return 0;
+	res = fscrypt_get_encryption_info(parent);
+	if (res)
+		return 0;
+	res = fscrypt_get_encryption_info(child);
+	if (res)
+		return 0;
+	parent_ci = parent->i_crypt_info;
+	child_ci = child->i_crypt_info;
+	if (!parent_ci && !child_ci)
+		return 1;
+	if (!parent_ci || !child_ci)
+		return 0;
+
+	return (memcmp(parent_ci->ci_master_key,
+			child_ci->ci_master_key,
+			FS_KEY_DESCRIPTOR_SIZE) == 0 &&
+		(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
+		(parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
+		(parent_ci->ci_flags == child_ci->ci_flags));
+}
+EXPORT_SYMBOL(fscrypt_has_permitted_context);
+
+/**
+ * fscrypt_inherit_context() - Sets a child context from its parent
+ * @parent: Parent inode from which the context is inherited.
+ * @child:  Child inode that inherits the context from @parent.
+ * @fs_data:  private data given by FS.
+ * @preload:  preload child i_crypt_info
+ *
+ * Return: Zero on success, non-zero otherwise
+ */
+int fscrypt_inherit_context(struct inode *parent, struct inode *child,
+						void *fs_data, bool preload)
+{
+	struct fscrypt_context ctx;
+	struct fscrypt_info *ci;
+	int res;
+
+	if (!parent->i_sb->s_cop->set_context)
+		return -EOPNOTSUPP;
+
+	res = fscrypt_get_encryption_info(parent);
+	if (res < 0)
+		return res;
+
+	ci = parent->i_crypt_info;
+	if (ci == NULL)
+		return -ENOKEY;
+
+	ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
+	if (fscrypt_dummy_context_enabled(parent)) {
+		ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
+		ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
+		ctx.flags = 0;
+		memset(ctx.master_key_descriptor, 0x42, FS_KEY_DESCRIPTOR_SIZE);
+		res = 0;
+	} else {
+		ctx.contents_encryption_mode = ci->ci_data_mode;
+		ctx.filenames_encryption_mode = ci->ci_filename_mode;
+		ctx.flags = ci->ci_flags;
+		memcpy(ctx.master_key_descriptor, ci->ci_master_key,
+				FS_KEY_DESCRIPTOR_SIZE);
+	}
+	get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+	res = parent->i_sb->s_cop->set_context(child, &ctx,
+						sizeof(ctx), fs_data);
+	if (res)
+		return res;
+	return preload ? fscrypt_get_encryption_info(child): 0;
+}
+EXPORT_SYMBOL(fscrypt_inherit_context);
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index b0a9dc929f88..402792bae503 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -76,15 +76,7 @@ config F2FS_FS_ENCRYPTION
 	bool "F2FS Encryption"
 	depends on F2FS_FS
 	depends on F2FS_FS_XATTR
-	select CRYPTO_AES
-	select CRYPTO_CBC
-	select CRYPTO_ECB
-	select CRYPTO_XTS
-	select CRYPTO_CTS
-	select CRYPTO_CTR
-	select CRYPTO_SHA256
-	select KEYS
-	select ENCRYPTED_KEYS
+	select FS_ENCRYPTION
 	help
 	  Enable encryption of f2fs files and directories.  This
 	  feature is similar to ecryptfs, but it is more memory
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index 08e101ed914c..ca949ea7c02f 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -7,5 +7,3 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
 f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
 f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
 f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o
-f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o \
-		crypto_key.o crypto_fname.o
diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c
deleted file mode 100644
index 3ef37868d0c7..000000000000
--- a/fs/f2fs/crypto.c
+++ /dev/null
@@ -1,473 +0,0 @@
-/*
- * linux/fs/f2fs/crypto.c
- *
- * Copied from linux/fs/ext4/crypto.c
- *
- * Copyright (C) 2015, Google, Inc.
- * Copyright (C) 2015, Motorola Mobility
- *
- * This contains encryption functions for f2fs
- *
- * Written by Michael Halcrow, 2014.
- *
- * Filename encryption additions
- *	Uday Savagaonkar, 2014
- * Encryption policy handling additions
- *	Ildar Muslukhov, 2014
- * Remove ext4_encrypted_zeroout(),
- *   add f2fs_restore_and_release_control_page()
- *	Jaegeuk Kim, 2015.
- *
- * This has not yet undergone a rigorous security audit.
- *
- * The usage of AES-XTS should conform to recommendations in NIST
- * Special Publication 800-38E and IEEE P1619/D16.
- */
-#include <crypto/hash.h>
-#include <crypto/sha.h>
-#include <keys/user-type.h>
-#include <keys/encrypted-type.h>
-#include <linux/crypto.h>
-#include <linux/ecryptfs.h>
-#include <linux/gfp.h>
-#include <linux/kernel.h>
-#include <linux/key.h>
-#include <linux/list.h>
-#include <linux/mempool.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/random.h>
-#include <linux/scatterlist.h>
-#include <linux/spinlock_types.h>
-#include <linux/f2fs_fs.h>
-#include <linux/ratelimit.h>
-#include <linux/bio.h>
-
-#include "f2fs.h"
-#include "xattr.h"
-
-/* Encryption added and removed here! (L: */
-
-static unsigned int num_prealloc_crypto_pages = 32;
-static unsigned int num_prealloc_crypto_ctxs = 128;
-
-module_param(num_prealloc_crypto_pages, uint, 0444);
-MODULE_PARM_DESC(num_prealloc_crypto_pages,
-		"Number of crypto pages to preallocate");
-module_param(num_prealloc_crypto_ctxs, uint, 0444);
-MODULE_PARM_DESC(num_prealloc_crypto_ctxs,
-		"Number of crypto contexts to preallocate");
-
-static mempool_t *f2fs_bounce_page_pool;
-
-static LIST_HEAD(f2fs_free_crypto_ctxs);
-static DEFINE_SPINLOCK(f2fs_crypto_ctx_lock);
-
-static struct workqueue_struct *f2fs_read_workqueue;
-static DEFINE_MUTEX(crypto_init);
-
-static struct kmem_cache *f2fs_crypto_ctx_cachep;
-struct kmem_cache *f2fs_crypt_info_cachep;
-
-/**
- * f2fs_release_crypto_ctx() - Releases an encryption context
- * @ctx: The encryption context to release.
- *
- * If the encryption context was allocated from the pre-allocated pool, returns
- * it to that pool. Else, frees it.
- *
- * If there's a bounce page in the context, this frees that.
- */
-void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx)
-{
-	unsigned long flags;
-
-	if (ctx->flags & F2FS_WRITE_PATH_FL && ctx->w.bounce_page) {
-		mempool_free(ctx->w.bounce_page, f2fs_bounce_page_pool);
-		ctx->w.bounce_page = NULL;
-	}
-	ctx->w.control_page = NULL;
-	if (ctx->flags & F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL) {
-		kmem_cache_free(f2fs_crypto_ctx_cachep, ctx);
-	} else {
-		spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags);
-		list_add(&ctx->free_list, &f2fs_free_crypto_ctxs);
-		spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags);
-	}
-}
-
-/**
- * f2fs_get_crypto_ctx() - Gets an encryption context
- * @inode:       The inode for which we are doing the crypto
- *
- * Allocates and initializes an encryption context.
- *
- * Return: An allocated and initialized encryption context on success; error
- * value or NULL otherwise.
- */
-struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode)
-{
-	struct f2fs_crypto_ctx *ctx = NULL;
-	unsigned long flags;
-	struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
-
-	if (ci == NULL)
-		return ERR_PTR(-ENOKEY);
-
-	/*
-	 * We first try getting the ctx from a free list because in
-	 * the common case the ctx will have an allocated and
-	 * initialized crypto tfm, so it's probably a worthwhile
-	 * optimization. For the bounce page, we first try getting it
-	 * from the kernel allocator because that's just about as fast
-	 * as getting it from a list and because a cache of free pages
-	 * should generally be a "last resort" option for a filesystem
-	 * to be able to do its job.
-	 */
-	spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags);
-	ctx = list_first_entry_or_null(&f2fs_free_crypto_ctxs,
-					struct f2fs_crypto_ctx, free_list);
-	if (ctx)
-		list_del(&ctx->free_list);
-	spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags);
-	if (!ctx) {
-		ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_NOFS);
-		if (!ctx)
-			return ERR_PTR(-ENOMEM);
-		ctx->flags |= F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
-	} else {
-		ctx->flags &= ~F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
-	}
-	ctx->flags &= ~F2FS_WRITE_PATH_FL;
-	return ctx;
-}
-
-/*
- * Call f2fs_decrypt on every single page, reusing the encryption
- * context.
- */
-static void completion_pages(struct work_struct *work)
-{
-	struct f2fs_crypto_ctx *ctx =
-		container_of(work, struct f2fs_crypto_ctx, r.work);
-	struct bio *bio = ctx->r.bio;
-	struct bio_vec *bv;
-	int i;
-
-	bio_for_each_segment_all(bv, bio, i) {
-		struct page *page = bv->bv_page;
-		int ret = f2fs_decrypt(page);
-
-		if (ret) {
-			WARN_ON_ONCE(1);
-			SetPageError(page);
-		} else
-			SetPageUptodate(page);
-		unlock_page(page);
-	}
-	f2fs_release_crypto_ctx(ctx);
-	bio_put(bio);
-}
-
-void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *ctx, struct bio *bio)
-{
-	INIT_WORK(&ctx->r.work, completion_pages);
-	ctx->r.bio = bio;
-	queue_work(f2fs_read_workqueue, &ctx->r.work);
-}
-
-static void f2fs_crypto_destroy(void)
-{
-	struct f2fs_crypto_ctx *pos, *n;
-
-	list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list)
-		kmem_cache_free(f2fs_crypto_ctx_cachep, pos);
-	INIT_LIST_HEAD(&f2fs_free_crypto_ctxs);
-	if (f2fs_bounce_page_pool)
-		mempool_destroy(f2fs_bounce_page_pool);
-	f2fs_bounce_page_pool = NULL;
-}
-
-/**
- * f2fs_crypto_initialize() - Set up for f2fs encryption.
- *
- * We only call this when we start accessing encrypted files, since it
- * results in memory getting allocated that wouldn't otherwise be used.
- *
- * Return: Zero on success, non-zero otherwise.
- */
-int f2fs_crypto_initialize(void)
-{
-	int i, res = -ENOMEM;
-
-	if (f2fs_bounce_page_pool)
-		return 0;
-
-	mutex_lock(&crypto_init);
-	if (f2fs_bounce_page_pool)
-		goto already_initialized;
-
-	for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
-		struct f2fs_crypto_ctx *ctx;
-
-		ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_KERNEL);
-		if (!ctx)
-			goto fail;
-		list_add(&ctx->free_list, &f2fs_free_crypto_ctxs);
-	}
-
-	/* must be allocated at the last step to avoid race condition above */
-	f2fs_bounce_page_pool =
-		mempool_create_page_pool(num_prealloc_crypto_pages, 0);
-	if (!f2fs_bounce_page_pool)
-		goto fail;
-
-already_initialized:
-	mutex_unlock(&crypto_init);
-	return 0;
-fail:
-	f2fs_crypto_destroy();
-	mutex_unlock(&crypto_init);
-	return res;
-}
-
-/**
- * f2fs_exit_crypto() - Shutdown the f2fs encryption system
- */
-void f2fs_exit_crypto(void)
-{
-	f2fs_crypto_destroy();
-
-	if (f2fs_read_workqueue)
-		destroy_workqueue(f2fs_read_workqueue);
-	if (f2fs_crypto_ctx_cachep)
-		kmem_cache_destroy(f2fs_crypto_ctx_cachep);
-	if (f2fs_crypt_info_cachep)
-		kmem_cache_destroy(f2fs_crypt_info_cachep);
-}
-
-int __init f2fs_init_crypto(void)
-{
-	int res = -ENOMEM;
-
-	f2fs_read_workqueue = alloc_workqueue("f2fs_crypto", WQ_HIGHPRI, 0);
-	if (!f2fs_read_workqueue)
-		goto fail;
-
-	f2fs_crypto_ctx_cachep = KMEM_CACHE(f2fs_crypto_ctx,
-						SLAB_RECLAIM_ACCOUNT);
-	if (!f2fs_crypto_ctx_cachep)
-		goto fail;
-
-	f2fs_crypt_info_cachep = KMEM_CACHE(f2fs_crypt_info,
-						SLAB_RECLAIM_ACCOUNT);
-	if (!f2fs_crypt_info_cachep)
-		goto fail;
-
-	return 0;
-fail:
-	f2fs_exit_crypto();
-	return res;
-}
-
-void f2fs_restore_and_release_control_page(struct page **page)
-{
-	struct f2fs_crypto_ctx *ctx;
-	struct page *bounce_page;
-
-	/* The bounce data pages are unmapped. */
-	if ((*page)->mapping)
-		return;
-
-	/* The bounce data page is unmapped. */
-	bounce_page = *page;
-	ctx = (struct f2fs_crypto_ctx *)page_private(bounce_page);
-
-	/* restore control page */
-	*page = ctx->w.control_page;
-
-	f2fs_restore_control_page(bounce_page);
-}
-
-void f2fs_restore_control_page(struct page *data_page)
-{
-	struct f2fs_crypto_ctx *ctx =
-		(struct f2fs_crypto_ctx *)page_private(data_page);
-
-	set_page_private(data_page, (unsigned long)NULL);
-	ClearPagePrivate(data_page);
-	unlock_page(data_page);
-	f2fs_release_crypto_ctx(ctx);
-}
-
-/**
- * f2fs_crypt_complete() - The completion callback for page encryption
- * @req: The asynchronous encryption request context
- * @res: The result of the encryption operation
- */
-static void f2fs_crypt_complete(struct crypto_async_request *req, int res)
-{
-	struct f2fs_completion_result *ecr = req->data;
-
-	if (res == -EINPROGRESS)
-		return;
-	ecr->res = res;
-	complete(&ecr->completion);
-}
-
-typedef enum {
-	F2FS_DECRYPT = 0,
-	F2FS_ENCRYPT,
-} f2fs_direction_t;
-
-static int f2fs_page_crypto(struct inode *inode,
-				f2fs_direction_t rw,
-				pgoff_t index,
-				struct page *src_page,
-				struct page *dest_page)
-{
-	u8 xts_tweak[F2FS_XTS_TWEAK_SIZE];
-	struct ablkcipher_request *req = NULL;
-	DECLARE_F2FS_COMPLETION_RESULT(ecr);
-	struct scatterlist dst, src;
-	struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
-	struct crypto_ablkcipher *tfm = ci->ci_ctfm;
-	int res = 0;
-
-	req = ablkcipher_request_alloc(tfm, GFP_NOFS);
-	if (!req) {
-		printk_ratelimited(KERN_ERR
-				"%s: crypto_request_alloc() failed\n",
-				__func__);
-		return -ENOMEM;
-	}
-	ablkcipher_request_set_callback(
-		req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
-		f2fs_crypt_complete, &ecr);
-
-	BUILD_BUG_ON(F2FS_XTS_TWEAK_SIZE < sizeof(index));
-	memcpy(xts_tweak, &index, sizeof(index));
-	memset(&xts_tweak[sizeof(index)], 0,
-			F2FS_XTS_TWEAK_SIZE - sizeof(index));
-
-	sg_init_table(&dst, 1);
-	sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
-	sg_init_table(&src, 1);
-	sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
-	ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
-					xts_tweak);
-	if (rw == F2FS_DECRYPT)
-		res = crypto_ablkcipher_decrypt(req);
-	else
-		res = crypto_ablkcipher_encrypt(req);
-	if (res == -EINPROGRESS || res == -EBUSY) {
-		wait_for_completion(&ecr.completion);
-		res = ecr.res;
-	}
-	ablkcipher_request_free(req);
-	if (res) {
-		printk_ratelimited(KERN_ERR
-			"%s: crypto_ablkcipher_encrypt() returned %d\n",
-			__func__, res);
-		return res;
-	}
-	return 0;
-}
-
-static struct page *alloc_bounce_page(struct f2fs_crypto_ctx *ctx)
-{
-	ctx->w.bounce_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOWAIT);
-	if (ctx->w.bounce_page == NULL)
-		return ERR_PTR(-ENOMEM);
-	ctx->flags |= F2FS_WRITE_PATH_FL;
-	return ctx->w.bounce_page;
-}
-
-/**
- * f2fs_encrypt() - Encrypts a page
- * @inode:          The inode for which the encryption should take place
- * @plaintext_page: The page to encrypt. Must be locked.
- *
- * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx
- * encryption context.
- *
- * Called on the page write path.  The caller must call
- * f2fs_restore_control_page() on the returned ciphertext page to
- * release the bounce buffer and the encryption context.
- *
- * Return: An allocated page with the encrypted content on success. Else, an
- * error value or NULL.
- */
-struct page *f2fs_encrypt(struct inode *inode,
-			  struct page *plaintext_page)
-{
-	struct f2fs_crypto_ctx *ctx;
-	struct page *ciphertext_page = NULL;
-	int err;
-
-	BUG_ON(!PageLocked(plaintext_page));
-
-	ctx = f2fs_get_crypto_ctx(inode);
-	if (IS_ERR(ctx))
-		return (struct page *)ctx;
-
-	/* The encryption operation will require a bounce page. */
-	ciphertext_page = alloc_bounce_page(ctx);
-	if (IS_ERR(ciphertext_page))
-		goto err_out;
-
-	ctx->w.control_page = plaintext_page;
-	err = f2fs_page_crypto(inode, F2FS_ENCRYPT, plaintext_page->index,
-					plaintext_page, ciphertext_page);
-	if (err) {
-		ciphertext_page = ERR_PTR(err);
-		goto err_out;
-	}
-
-	SetPagePrivate(ciphertext_page);
-	set_page_private(ciphertext_page, (unsigned long)ctx);
-	lock_page(ciphertext_page);
-	return ciphertext_page;
-
-err_out:
-	f2fs_release_crypto_ctx(ctx);
-	return ciphertext_page;
-}
-
-/**
- * f2fs_decrypt() - Decrypts a page in-place
- * @ctx:  The encryption context.
- * @page: The page to decrypt. Must be locked.
- *
- * Decrypts page in-place using the ctx encryption context.
- *
- * Called from the read completion callback.
- *
- * Return: Zero on success, non-zero otherwise.
- */
-int f2fs_decrypt(struct page *page)
-{
-	BUG_ON(!PageLocked(page));
-
-	return f2fs_page_crypto(page->mapping->host,
-				F2FS_DECRYPT, page->index, page, page);
-}
-
-bool f2fs_valid_contents_enc_mode(uint32_t mode)
-{
-	return (mode == F2FS_ENCRYPTION_MODE_AES_256_XTS);
-}
-
-/**
- * f2fs_validate_encryption_key_size() - Validate the encryption key size
- * @mode: The key mode.
- * @size: The key size to validate.
- *
- * Return: The validated key size for @mode. Zero if invalid.
- */
-uint32_t f2fs_validate_encryption_key_size(uint32_t mode, uint32_t size)
-{
-	if (size == f2fs_encryption_key_size(mode))
-		return size;
-	return 0;
-}
diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c
deleted file mode 100644
index 6dfdc978fe45..000000000000
--- a/fs/f2fs/crypto_fname.c
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * linux/fs/f2fs/crypto_fname.c
- *
- * Copied from linux/fs/ext4/crypto.c
- *
- * Copyright (C) 2015, Google, Inc.
- * Copyright (C) 2015, Motorola Mobility
- *
- * This contains functions for filename crypto management in f2fs
- *
- * Written by Uday Savagaonkar, 2014.
- *
- * Adjust f2fs dentry structure
- *	Jaegeuk Kim, 2015.
- *
- * This has not yet undergone a rigorous security audit.
- */
-#include <crypto/hash.h>
-#include <crypto/sha.h>
-#include <keys/encrypted-type.h>
-#include <keys/user-type.h>
-#include <linux/crypto.h>
-#include <linux/gfp.h>
-#include <linux/kernel.h>
-#include <linux/key.h>
-#include <linux/list.h>
-#include <linux/mempool.h>
-#include <linux/random.h>
-#include <linux/scatterlist.h>
-#include <linux/spinlock_types.h>
-#include <linux/f2fs_fs.h>
-#include <linux/ratelimit.h>
-
-#include "f2fs.h"
-#include "f2fs_crypto.h"
-#include "xattr.h"
-
-/**
- * f2fs_dir_crypt_complete() -
- */
-static void f2fs_dir_crypt_complete(struct crypto_async_request *req, int res)
-{
-	struct f2fs_completion_result *ecr = req->data;
-
-	if (res == -EINPROGRESS)
-		return;
-	ecr->res = res;
-	complete(&ecr->completion);
-}
-
-bool f2fs_valid_filenames_enc_mode(uint32_t mode)
-{
-	return (mode == F2FS_ENCRYPTION_MODE_AES_256_CTS);
-}
-
-static unsigned max_name_len(struct inode *inode)
-{
-	return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
-					F2FS_NAME_LEN;
-}
-
-/**
- * f2fs_fname_encrypt() -
- *
- * This function encrypts the input filename, and returns the length of the
- * ciphertext. Errors are returned as negative numbers.  We trust the caller to
- * allocate sufficient memory to oname string.
- */
-static int f2fs_fname_encrypt(struct inode *inode,
-			const struct qstr *iname, struct f2fs_str *oname)
-{
-	u32 ciphertext_len;
-	struct ablkcipher_request *req = NULL;
-	DECLARE_F2FS_COMPLETION_RESULT(ecr);
-	struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
-	struct crypto_ablkcipher *tfm = ci->ci_ctfm;
-	int res = 0;
-	char iv[F2FS_CRYPTO_BLOCK_SIZE];
-	struct scatterlist src_sg, dst_sg;
-	int padding = 4 << (ci->ci_flags & F2FS_POLICY_FLAGS_PAD_MASK);
-	char *workbuf, buf[32], *alloc_buf = NULL;
-	unsigned lim = max_name_len(inode);
-
-	if (iname->len <= 0 || iname->len > lim)
-		return -EIO;
-
-	ciphertext_len = (iname->len < F2FS_CRYPTO_BLOCK_SIZE) ?
-		F2FS_CRYPTO_BLOCK_SIZE : iname->len;
-	ciphertext_len = f2fs_fname_crypto_round_up(ciphertext_len, padding);
-	ciphertext_len = (ciphertext_len > lim) ? lim : ciphertext_len;
-
-	if (ciphertext_len <= sizeof(buf)) {
-		workbuf = buf;
-	} else {
-		alloc_buf = kmalloc(ciphertext_len, GFP_NOFS);
-		if (!alloc_buf)
-			return -ENOMEM;
-		workbuf = alloc_buf;
-	}
-
-	/* Allocate request */
-	req = ablkcipher_request_alloc(tfm, GFP_NOFS);
-	if (!req) {
-		printk_ratelimited(KERN_ERR
-			"%s: crypto_request_alloc() failed\n", __func__);
-		kfree(alloc_buf);
-		return -ENOMEM;
-	}
-	ablkcipher_request_set_callback(req,
-			CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
-			f2fs_dir_crypt_complete, &ecr);
-
-	/* Copy the input */
-	memcpy(workbuf, iname->name, iname->len);
-	if (iname->len < ciphertext_len)
-		memset(workbuf + iname->len, 0, ciphertext_len - iname->len);
-
-	/* Initialize IV */
-	memset(iv, 0, F2FS_CRYPTO_BLOCK_SIZE);
-
-	/* Create encryption request */
-	sg_init_one(&src_sg, workbuf, ciphertext_len);
-	sg_init_one(&dst_sg, oname->name, ciphertext_len);
-	ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
-	res = crypto_ablkcipher_encrypt(req);
-	if (res == -EINPROGRESS || res == -EBUSY) {
-		wait_for_completion(&ecr.completion);
-		res = ecr.res;
-	}
-	kfree(alloc_buf);
-	ablkcipher_request_free(req);
-	if (res < 0) {
-		printk_ratelimited(KERN_ERR
-				"%s: Error (error code %d)\n", __func__, res);
-	}
-	oname->len = ciphertext_len;
-	return res;
-}
-
-/*
- * f2fs_fname_decrypt()
- *	This function decrypts the input filename, and returns
- *	the length of the plaintext.
- *	Errors are returned as negative numbers.
- *	We trust the caller to allocate sufficient memory to oname string.
- */
-static int f2fs_fname_decrypt(struct inode *inode,
-			const struct f2fs_str *iname, struct f2fs_str *oname)
-{
-	struct ablkcipher_request *req = NULL;
-	DECLARE_F2FS_COMPLETION_RESULT(ecr);
-	struct scatterlist src_sg, dst_sg;
-	struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
-	struct crypto_ablkcipher *tfm = ci->ci_ctfm;
-	int res = 0;
-	char iv[F2FS_CRYPTO_BLOCK_SIZE];
-	unsigned lim = max_name_len(inode);
-
-	if (iname->len <= 0 || iname->len > lim)
-		return -EIO;
-
-	/* Allocate request */
-	req = ablkcipher_request_alloc(tfm, GFP_NOFS);
-	if (!req) {
-		printk_ratelimited(KERN_ERR
-			"%s: crypto_request_alloc() failed\n",  __func__);
-		return -ENOMEM;
-	}
-	ablkcipher_request_set_callback(req,
-		CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
-		f2fs_dir_crypt_complete, &ecr);
-
-	/* Initialize IV */
-	memset(iv, 0, F2FS_CRYPTO_BLOCK_SIZE);
-
-	/* Create decryption request */
-	sg_init_one(&src_sg, iname->name, iname->len);
-	sg_init_one(&dst_sg, oname->name, oname->len);
-	ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
-	res = crypto_ablkcipher_decrypt(req);
-	if (res == -EINPROGRESS || res == -EBUSY) {
-		wait_for_completion(&ecr.completion);
-		res = ecr.res;
-	}
-	ablkcipher_request_free(req);
-	if (res < 0) {
-		printk_ratelimited(KERN_ERR
-			"%s: Error in f2fs_fname_decrypt (error code %d)\n",
-			__func__, res);
-		return res;
-	}
-
-	oname->len = strnlen(oname->name, iname->len);
-	return oname->len;
-}
-
-static const char *lookup_table =
-	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
-
-/**
- * f2fs_fname_encode_digest() -
- *
- * Encodes the input digest using characters from the set [a-zA-Z0-9_+].
- * The encoded string is roughly 4/3 times the size of the input string.
- */
-static int digest_encode(const char *src, int len, char *dst)
-{
-	int i = 0, bits = 0, ac = 0;
-	char *cp = dst;
-
-	while (i < len) {
-		ac += (((unsigned char) src[i]) << bits);
-		bits += 8;
-		do {
-			*cp++ = lookup_table[ac & 0x3f];
-			ac >>= 6;
-			bits -= 6;
-		} while (bits >= 6);
-		i++;
-	}
-	if (bits)
-		*cp++ = lookup_table[ac & 0x3f];
-	return cp - dst;
-}
-
-static int digest_decode(const char *src, int len, char *dst)
-{
-	int i = 0, bits = 0, ac = 0;
-	const char *p;
-	char *cp = dst;
-
-	while (i < len) {
-		p = strchr(lookup_table, src[i]);
-		if (p == NULL || src[i] == 0)
-			return -2;
-		ac += (p - lookup_table) << bits;
-		bits += 6;
-		if (bits >= 8) {
-			*cp++ = ac & 0xff;
-			ac >>= 8;
-			bits -= 8;
-		}
-		i++;
-	}
-	if (ac)
-		return -1;
-	return cp - dst;
-}
-
-/**
- * f2fs_fname_crypto_round_up() -
- *
- * Return: The next multiple of block size
- */
-u32 f2fs_fname_crypto_round_up(u32 size, u32 blksize)
-{
-	return ((size + blksize - 1) / blksize) * blksize;
-}
-
-unsigned f2fs_fname_encrypted_size(struct inode *inode, u32 ilen)
-{
-	struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
-	int padding = 32;
-
-	if (ci)
-		padding = 4 << (ci->ci_flags & F2FS_POLICY_FLAGS_PAD_MASK);
-	if (ilen < F2FS_CRYPTO_BLOCK_SIZE)
-		ilen = F2FS_CRYPTO_BLOCK_SIZE;
-	return f2fs_fname_crypto_round_up(ilen, padding);
-}
-
-/**
- * f2fs_fname_crypto_alloc_obuff() -
- *
- * Allocates an output buffer that is sufficient for the crypto operation
- * specified by the context and the direction.
- */
-int f2fs_fname_crypto_alloc_buffer(struct inode *inode,
-				   u32 ilen, struct f2fs_str *crypto_str)
-{
-	unsigned int olen = f2fs_fname_encrypted_size(inode, ilen);
-
-	crypto_str->len = olen;
-	if (olen < F2FS_FNAME_CRYPTO_DIGEST_SIZE * 2)
-		olen = F2FS_FNAME_CRYPTO_DIGEST_SIZE * 2;
-	/* Allocated buffer can hold one more character to null-terminate the
-	 * string */
-	crypto_str->name = kmalloc(olen + 1, GFP_NOFS);
-	if (!(crypto_str->name))
-		return -ENOMEM;
-	return 0;
-}
-
-/**
- * f2fs_fname_crypto_free_buffer() -
- *
- * Frees the buffer allocated for crypto operation.
- */
-void f2fs_fname_crypto_free_buffer(struct f2fs_str *crypto_str)
-{
-	if (!crypto_str)
-		return;
-	kfree(crypto_str->name);
-	crypto_str->name = NULL;
-}
-
-/**
- * f2fs_fname_disk_to_usr() - converts a filename from disk space to user space
- */
-int f2fs_fname_disk_to_usr(struct inode *inode,
-			f2fs_hash_t *hash,
-			const struct f2fs_str *iname,
-			struct f2fs_str *oname)
-{
-	const struct qstr qname = FSTR_TO_QSTR(iname);
-	char buf[24];
-	int ret;
-
-	if (is_dot_dotdot(&qname)) {
-		oname->name[0] = '.';
-		oname->name[iname->len - 1] = '.';
-		oname->len = iname->len;
-		return oname->len;
-	}
-	if (iname->len < F2FS_CRYPTO_BLOCK_SIZE) {
-		printk("encrypted inode too small");
-		return -EUCLEAN;
-	}
-	if (F2FS_I(inode)->i_crypt_info)
-		return f2fs_fname_decrypt(inode, iname, oname);
-
-	if (iname->len <= F2FS_FNAME_CRYPTO_DIGEST_SIZE) {
-		ret = digest_encode(iname->name, iname->len, oname->name);
-		oname->len = ret;
-		return ret;
-	}
-	if (hash) {
-		memcpy(buf, hash, 4);
-		memset(buf + 4, 0, 4);
-	} else
-		memset(buf, 0, 8);
-	memcpy(buf + 8, iname->name + iname->len - 16, 16);
-	oname->name[0] = '_';
-	ret = digest_encode(buf, 24, oname->name + 1);
-	oname->len = ret + 1;
-	return ret + 1;
-}
-
-/**
- * f2fs_fname_usr_to_disk() - converts a filename from user space to disk space
- */
-int f2fs_fname_usr_to_disk(struct inode *inode,
-			const struct qstr *iname,
-			struct f2fs_str *oname)
-{
-	int res;
-	struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
-
-	if (is_dot_dotdot(iname)) {
-		oname->name[0] = '.';
-		oname->name[iname->len - 1] = '.';
-		oname->len = iname->len;
-		return oname->len;
-	}
-
-	if (ci) {
-		res = f2fs_fname_encrypt(inode, iname, oname);
-		return res;
-	}
-	/* Without a proper key, a user is not allowed to modify the filenames
-	 * in a directory. Consequently, a user space name cannot be mapped to
-	 * a disk-space name */
-	return -EACCES;
-}
-
-int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname,
-			      int lookup, struct f2fs_filename *fname)
-{
-	struct f2fs_crypt_info *ci;
-	int ret = 0, bigname = 0;
-
-	memset(fname, 0, sizeof(struct f2fs_filename));
-	fname->usr_fname = iname;
-
-	if (!f2fs_encrypted_inode(dir) || is_dot_dotdot(iname)) {
-		fname->disk_name.name = (unsigned char *)iname->name;
-		fname->disk_name.len = iname->len;
-		return 0;
-	}
-	ret = f2fs_get_encryption_info(dir);
-	if (ret)
-		return ret;
-	ci = F2FS_I(dir)->i_crypt_info;
-	if (ci) {
-		ret = f2fs_fname_crypto_alloc_buffer(dir, iname->len,
-						     &fname->crypto_buf);
-		if (ret < 0)
-			return ret;
-		ret = f2fs_fname_encrypt(dir, iname, &fname->crypto_buf);
-		if (ret < 0)
-			goto errout;
-		fname->disk_name.name = fname->crypto_buf.name;
-		fname->disk_name.len = fname->crypto_buf.len;
-		return 0;
-	}
-	if (!lookup)
-		return -EACCES;
-
-	/* We don't have the key and we are doing a lookup; decode the
-	 * user-supplied name
-	 */
-	if (iname->name[0] == '_')
-		bigname = 1;
-	if ((bigname && (iname->len != 33)) ||
-	    (!bigname && (iname->len > 43)))
-		return -ENOENT;
-
-	fname->crypto_buf.name = kmalloc(32, GFP_KERNEL);
-	if (fname->crypto_buf.name == NULL)
-		return -ENOMEM;
-	ret = digest_decode(iname->name + bigname, iname->len - bigname,
-				fname->crypto_buf.name);
-	if (ret < 0) {
-		ret = -ENOENT;
-		goto errout;
-	}
-	fname->crypto_buf.len = ret;
-	if (bigname) {
-		memcpy(&fname->hash, fname->crypto_buf.name, 4);
-	} else {
-		fname->disk_name.name = fname->crypto_buf.name;
-		fname->disk_name.len = fname->crypto_buf.len;
-	}
-	return 0;
-errout:
-	f2fs_fname_crypto_free_buffer(&fname->crypto_buf);
-	return ret;
-}
-
-void f2fs_fname_free_filename(struct f2fs_filename *fname)
-{
-	kfree(fname->crypto_buf.name);
-	fname->crypto_buf.name = NULL;
-	fname->usr_fname = NULL;
-	fname->disk_name.name = NULL;
-}
diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c
deleted file mode 100644
index 9094fca692fb..000000000000
--- a/fs/f2fs/crypto_key.c
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * linux/fs/f2fs/crypto_key.c
- *
- * Copied from linux/fs/f2fs/crypto_key.c
- *
- * Copyright (C) 2015, Google, Inc.
- *
- * This contains encryption key functions for f2fs
- *
- * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
- */
-#include <keys/encrypted-type.h>
-#include <keys/user-type.h>
-#include <linux/random.h>
-#include <linux/scatterlist.h>
-#include <uapi/linux/keyctl.h>
-#include <crypto/hash.h>
-#include <linux/f2fs_fs.h>
-
-#include "f2fs.h"
-#include "xattr.h"
-
-static void derive_crypt_complete(struct crypto_async_request *req, int rc)
-{
-	struct f2fs_completion_result *ecr = req->data;
-
-	if (rc == -EINPROGRESS)
-		return;
-
-	ecr->res = rc;
-	complete(&ecr->completion);
-}
-
-/**
- * f2fs_derive_key_aes() - Derive a key using AES-128-ECB
- * @deriving_key: Encryption key used for derivation.
- * @source_key:   Source key to which to apply derivation.
- * @derived_key:  Derived key.
- *
- * Return: Zero on success; non-zero otherwise.
- */
-static int f2fs_derive_key_aes(char deriving_key[F2FS_AES_128_ECB_KEY_SIZE],
-				char source_key[F2FS_AES_256_XTS_KEY_SIZE],
-				char derived_key[F2FS_AES_256_XTS_KEY_SIZE])
-{
-	int res = 0;
-	struct ablkcipher_request *req = NULL;
-	DECLARE_F2FS_COMPLETION_RESULT(ecr);
-	struct scatterlist src_sg, dst_sg;
-	struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0,
-								0);
-
-	if (IS_ERR(tfm)) {
-		res = PTR_ERR(tfm);
-		tfm = NULL;
-		goto out;
-	}
-	crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
-	req = ablkcipher_request_alloc(tfm, GFP_NOFS);
-	if (!req) {
-		res = -ENOMEM;
-		goto out;
-	}
-	ablkcipher_request_set_callback(req,
-			CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
-			derive_crypt_complete, &ecr);
-	res = crypto_ablkcipher_setkey(tfm, deriving_key,
-				F2FS_AES_128_ECB_KEY_SIZE);
-	if (res < 0)
-		goto out;
-
-	sg_init_one(&src_sg, source_key, F2FS_AES_256_XTS_KEY_SIZE);
-	sg_init_one(&dst_sg, derived_key, F2FS_AES_256_XTS_KEY_SIZE);
-	ablkcipher_request_set_crypt(req, &src_sg, &dst_sg,
-					F2FS_AES_256_XTS_KEY_SIZE, NULL);
-	res = crypto_ablkcipher_encrypt(req);
-	if (res == -EINPROGRESS || res == -EBUSY) {
-		wait_for_completion(&ecr.completion);
-		res = ecr.res;
-	}
-out:
-	if (req)
-		ablkcipher_request_free(req);
-	if (tfm)
-		crypto_free_ablkcipher(tfm);
-	return res;
-}
-
-static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci)
-{
-	if (!ci)
-		return;
-
-	key_put(ci->ci_keyring_key);
-	crypto_free_ablkcipher(ci->ci_ctfm);
-	kmem_cache_free(f2fs_crypt_info_cachep, ci);
-}
-
-void f2fs_free_encryption_info(struct inode *inode, struct f2fs_crypt_info *ci)
-{
-	struct f2fs_inode_info *fi = F2FS_I(inode);
-	struct f2fs_crypt_info *prev;
-
-	if (ci == NULL)
-		ci = ACCESS_ONCE(fi->i_crypt_info);
-	if (ci == NULL)
-		return;
-	prev = cmpxchg(&fi->i_crypt_info, ci, NULL);
-	if (prev != ci)
-		return;
-
-	f2fs_free_crypt_info(ci);
-}
-
-int _f2fs_get_encryption_info(struct inode *inode)
-{
-	struct f2fs_inode_info *fi = F2FS_I(inode);
-	struct f2fs_crypt_info *crypt_info;
-	char full_key_descriptor[F2FS_KEY_DESC_PREFIX_SIZE +
-				(F2FS_KEY_DESCRIPTOR_SIZE * 2) + 1];
-	struct key *keyring_key = NULL;
-	struct f2fs_encryption_key *master_key;
-	struct f2fs_encryption_context ctx;
-	const struct user_key_payload *ukp;
-	struct crypto_ablkcipher *ctfm;
-	const char *cipher_str;
-	char raw_key[F2FS_MAX_KEY_SIZE];
-	char mode;
-	int res;
-
-	res = f2fs_crypto_initialize();
-	if (res)
-		return res;
-retry:
-	crypt_info = ACCESS_ONCE(fi->i_crypt_info);
-	if (crypt_info) {
-		if (!crypt_info->ci_keyring_key ||
-				key_validate(crypt_info->ci_keyring_key) == 0)
-			return 0;
-		f2fs_free_encryption_info(inode, crypt_info);
-		goto retry;
-	}
-
-	res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
-				F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
-				&ctx, sizeof(ctx), NULL);
-	if (res < 0)
-		return res;
-	else if (res != sizeof(ctx))
-		return -EINVAL;
-	res = 0;
-
-	crypt_info = kmem_cache_alloc(f2fs_crypt_info_cachep, GFP_NOFS);
-	if (!crypt_info)
-		return -ENOMEM;
-
-	crypt_info->ci_flags = ctx.flags;
-	crypt_info->ci_data_mode = ctx.contents_encryption_mode;
-	crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
-	crypt_info->ci_ctfm = NULL;
-	crypt_info->ci_keyring_key = NULL;
-	memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
-				sizeof(crypt_info->ci_master_key));
-	if (S_ISREG(inode->i_mode))
-		mode = crypt_info->ci_data_mode;
-	else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-		mode = crypt_info->ci_filename_mode;
-	else
-		BUG();
-
-	switch (mode) {
-	case F2FS_ENCRYPTION_MODE_AES_256_XTS:
-		cipher_str = "xts(aes)";
-		break;
-	case F2FS_ENCRYPTION_MODE_AES_256_CTS:
-		cipher_str = "cts(cbc(aes))";
-		break;
-	default:
-		printk_once(KERN_WARNING
-			    "f2fs: unsupported key mode %d (ino %u)\n",
-			    mode, (unsigned) inode->i_ino);
-		res = -ENOKEY;
-		goto out;
-	}
-
-	memcpy(full_key_descriptor, F2FS_KEY_DESC_PREFIX,
-					F2FS_KEY_DESC_PREFIX_SIZE);
-	sprintf(full_key_descriptor + F2FS_KEY_DESC_PREFIX_SIZE,
-					"%*phN", F2FS_KEY_DESCRIPTOR_SIZE,
-					ctx.master_key_descriptor);
-	full_key_descriptor[F2FS_KEY_DESC_PREFIX_SIZE +
-					(2 * F2FS_KEY_DESCRIPTOR_SIZE)] = '\0';
-	keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
-	if (IS_ERR(keyring_key)) {
-		res = PTR_ERR(keyring_key);
-		keyring_key = NULL;
-		goto out;
-	}
-	crypt_info->ci_keyring_key = keyring_key;
-	if (keyring_key->type != &key_type_logon) {
-		printk_once(KERN_WARNING "f2fs: key type must be logon\n");
-		res = -ENOKEY;
-		goto out;
-	}
-	down_read(&keyring_key->sem);
-	ukp = user_key_payload(keyring_key);
-	if (ukp->datalen != sizeof(struct f2fs_encryption_key)) {
-		res = -EINVAL;
-		up_read(&keyring_key->sem);
-		goto out;
-	}
-	master_key = (struct f2fs_encryption_key *)ukp->data;
-	BUILD_BUG_ON(F2FS_AES_128_ECB_KEY_SIZE !=
-				F2FS_KEY_DERIVATION_NONCE_SIZE);
-	if (master_key->size != F2FS_AES_256_XTS_KEY_SIZE) {
-		printk_once(KERN_WARNING
-				"f2fs: key size incorrect: %d\n",
-				master_key->size);
-		res = -ENOKEY;
-		up_read(&keyring_key->sem);
-		goto out;
-	}
-	res = f2fs_derive_key_aes(ctx.nonce, master_key->raw,
-				  raw_key);
-	up_read(&keyring_key->sem);
-	if (res)
-		goto out;
-
-	ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
-	if (!ctfm || IS_ERR(ctfm)) {
-		res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
-		printk(KERN_DEBUG
-		       "%s: error %d (inode %u) allocating crypto tfm\n",
-		       __func__, res, (unsigned) inode->i_ino);
-		goto out;
-	}
-	crypt_info->ci_ctfm = ctfm;
-	crypto_ablkcipher_clear_flags(ctfm, ~0);
-	crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
-			     CRYPTO_TFM_REQ_WEAK_KEY);
-	res = crypto_ablkcipher_setkey(ctfm, raw_key,
-					f2fs_encryption_key_size(mode));
-	if (res)
-		goto out;
-
-	memzero_explicit(raw_key, sizeof(raw_key));
-	if (cmpxchg(&fi->i_crypt_info, NULL, crypt_info) != NULL) {
-		f2fs_free_crypt_info(crypt_info);
-		goto retry;
-	}
-	return 0;
-
-out:
-	if (res == -ENOKEY && !S_ISREG(inode->i_mode))
-		res = 0;
-
-	f2fs_free_crypt_info(crypt_info);
-	memzero_explicit(raw_key, sizeof(raw_key));
-	return res;
-}
-
-int f2fs_has_encryption_key(struct inode *inode)
-{
-	struct f2fs_inode_info *fi = F2FS_I(inode);
-
-	return (fi->i_crypt_info != NULL);
-}
diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c
deleted file mode 100644
index 596f02490f27..000000000000
--- a/fs/f2fs/crypto_policy.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * copied from linux/fs/ext4/crypto_policy.c
- *
- * Copyright (C) 2015, Google, Inc.
- * Copyright (C) 2015, Motorola Mobility.
- *
- * This contains encryption policy functions for f2fs with some modifications
- * to support f2fs-specific xattr APIs.
- *
- * Written by Michael Halcrow, 2015.
- * Modified by Jaegeuk Kim, 2015.
- */
-#include <linux/random.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/f2fs_fs.h>
-
-#include "f2fs.h"
-#include "xattr.h"
-
-static int f2fs_inode_has_encryption_context(struct inode *inode)
-{
-	int res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
-			F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, NULL, 0, NULL);
-	return (res > 0);
-}
-
-/*
- * check whether the policy is consistent with the encryption context
- * for the inode
- */
-static int f2fs_is_encryption_context_consistent_with_policy(
-	struct inode *inode, const struct f2fs_encryption_policy *policy)
-{
-	struct f2fs_encryption_context ctx;
-	int res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
-				F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
-				sizeof(ctx), NULL);
-
-	if (res != sizeof(ctx))
-		return 0;
-
-	return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
-				F2FS_KEY_DESCRIPTOR_SIZE) == 0 &&
-			(ctx.flags == policy->flags) &&
-			(ctx.contents_encryption_mode ==
-			 policy->contents_encryption_mode) &&
-			(ctx.filenames_encryption_mode ==
-			 policy->filenames_encryption_mode));
-}
-
-static int f2fs_create_encryption_context_from_policy(
-	struct inode *inode, const struct f2fs_encryption_policy *policy)
-{
-	struct f2fs_encryption_context ctx;
-
-	ctx.format = F2FS_ENCRYPTION_CONTEXT_FORMAT_V1;
-	memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
-			F2FS_KEY_DESCRIPTOR_SIZE);
-
-	if (!f2fs_valid_contents_enc_mode(policy->contents_encryption_mode)) {
-		printk(KERN_WARNING
-		       "%s: Invalid contents encryption mode %d\n", __func__,
-			policy->contents_encryption_mode);
-		return -EINVAL;
-	}
-
-	if (!f2fs_valid_filenames_enc_mode(policy->filenames_encryption_mode)) {
-		printk(KERN_WARNING
-		       "%s: Invalid filenames encryption mode %d\n", __func__,
-			policy->filenames_encryption_mode);
-		return -EINVAL;
-	}
-
-	if (policy->flags & ~F2FS_POLICY_FLAGS_VALID)
-		return -EINVAL;
-
-	ctx.contents_encryption_mode = policy->contents_encryption_mode;
-	ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
-	ctx.flags = policy->flags;
-	BUILD_BUG_ON(sizeof(ctx.nonce) != F2FS_KEY_DERIVATION_NONCE_SIZE);
-	get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE);
-
-	return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
-			F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
-			sizeof(ctx), NULL, XATTR_CREATE);
-}
-
-int f2fs_process_policy(const struct f2fs_encryption_policy *policy,
-			struct inode *inode)
-{
-	if (policy->version != 0)
-		return -EINVAL;
-
-	if (!S_ISDIR(inode->i_mode))
-		return -EINVAL;
-
-	if (!f2fs_inode_has_encryption_context(inode)) {
-		if (!f2fs_empty_dir(inode))
-			return -ENOTEMPTY;
-		return f2fs_create_encryption_context_from_policy(inode,
-								  policy);
-	}
-
-	if (f2fs_is_encryption_context_consistent_with_policy(inode, policy))
-		return 0;
-
-	printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n",
-	       __func__);
-	return -EINVAL;
-}
-
-int f2fs_get_policy(struct inode *inode, struct f2fs_encryption_policy *policy)
-{
-	struct f2fs_encryption_context ctx;
-	int res;
-
-	if (!f2fs_encrypted_inode(inode))
-		return -ENODATA;
-
-	res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
-				F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
-				&ctx, sizeof(ctx), NULL);
-	if (res != sizeof(ctx))
-		return -ENODATA;
-	if (ctx.format != F2FS_ENCRYPTION_CONTEXT_FORMAT_V1)
-		return -EINVAL;
-
-	policy->version = 0;
-	policy->contents_encryption_mode = ctx.contents_encryption_mode;
-	policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
-	policy->flags = ctx.flags;
-	memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
-			F2FS_KEY_DESCRIPTOR_SIZE);
-	return 0;
-}
-
-int f2fs_is_child_context_consistent_with_parent(struct inode *parent,
-						struct inode *child)
-{
-	struct f2fs_crypt_info *parent_ci, *child_ci;
-	int res;
-
-	if ((parent == NULL) || (child == NULL)) {
-		pr_err("parent %p child %p\n", parent, child);
-		BUG_ON(1);
-	}
-
-	/* no restrictions if the parent directory is not encrypted */
-	if (!f2fs_encrypted_inode(parent))
-		return 1;
-	/* if the child directory is not encrypted, this is always a problem */
-	if (!f2fs_encrypted_inode(child))
-		return 0;
-	res = f2fs_get_encryption_info(parent);
-	if (res)
-		return 0;
-	res = f2fs_get_encryption_info(child);
-	if (res)
-		return 0;
-	parent_ci = F2FS_I(parent)->i_crypt_info;
-	child_ci = F2FS_I(child)->i_crypt_info;
-	if (!parent_ci && !child_ci)
-		return 1;
-	if (!parent_ci || !child_ci)
-		return 0;
-
-	return (memcmp(parent_ci->ci_master_key,
-			child_ci->ci_master_key,
-			F2FS_KEY_DESCRIPTOR_SIZE) == 0 &&
-		(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
-		(parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
-		(parent_ci->ci_flags == child_ci->ci_flags));
-}
-
-/**
- * f2fs_inherit_context() - Sets a child context from its parent
- * @parent: Parent inode from which the context is inherited.
- * @child:  Child inode that inherits the context from @parent.
- *
- * Return: Zero on success, non-zero otherwise
- */
-int f2fs_inherit_context(struct inode *parent, struct inode *child,
-						struct page *ipage)
-{
-	struct f2fs_encryption_context ctx;
-	struct f2fs_crypt_info *ci;
-	int res;
-
-	res = f2fs_get_encryption_info(parent);
-	if (res < 0)
-		return res;
-
-	ci = F2FS_I(parent)->i_crypt_info;
-	if (ci == NULL)
-		return -ENOKEY;
-
-	ctx.format = F2FS_ENCRYPTION_CONTEXT_FORMAT_V1;
-
-	ctx.contents_encryption_mode = ci->ci_data_mode;
-	ctx.filenames_encryption_mode = ci->ci_filename_mode;
-	ctx.flags = ci->ci_flags;
-	memcpy(ctx.master_key_descriptor, ci->ci_master_key,
-			F2FS_KEY_DESCRIPTOR_SIZE);
-
-	get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE);
-	return f2fs_setxattr(child, F2FS_XATTR_INDEX_ENCRYPTION,
-				F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
-				sizeof(ctx), ipage, XATTR_CREATE);
-}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9643d88a01af..e5c762b37239 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -34,9 +34,9 @@ static void f2fs_read_end_io(struct bio *bio)
 
 	if (f2fs_bio_encrypted(bio)) {
 		if (bio->bi_error) {
-			f2fs_release_crypto_ctx(bio->bi_private);
+			fscrypt_release_ctx(bio->bi_private);
 		} else {
-			f2fs_end_io_crypto_work(bio->bi_private, bio);
+			fscrypt_decrypt_bio_pages(bio->bi_private, bio);
 			return;
 		}
 	}
@@ -64,7 +64,7 @@ static void f2fs_write_end_io(struct bio *bio)
 	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 
-		f2fs_restore_and_release_control_page(&page);
+		fscrypt_pullback_bio_page(&page, true);
 
 		if (unlikely(bio->bi_error)) {
 			set_bit(AS_EIO, &page->mapping->flags);
@@ -129,16 +129,10 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
 
 	bio_for_each_segment_all(bvec, io->bio, i) {
 
-		if (bvec->bv_page->mapping) {
+		if (bvec->bv_page->mapping)
 			target = bvec->bv_page;
-		} else {
-			struct f2fs_crypto_ctx *ctx;
-
-			/* encrypted page */
-			ctx = (struct f2fs_crypto_ctx *)page_private(
-								bvec->bv_page);
-			target = ctx->w.control_page;
-		}
+		else
+			target = fscrypt_control_page(bvec->bv_page);
 
 		if (inode && inode == target->mapping->host)
 			return true;
@@ -220,7 +214,8 @@ void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
 int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 {
 	struct bio *bio;
-	struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+	struct page *page = fio->encrypted_page ?
+			fio->encrypted_page : fio->page;
 
 	trace_f2fs_submit_page_bio(page, fio);
 	f2fs_trace_ios(fio, 0);
@@ -992,12 +987,12 @@ submit_and_realloc:
 			bio = NULL;
 		}
 		if (bio == NULL) {
-			struct f2fs_crypto_ctx *ctx = NULL;
+			struct fscrypt_ctx *ctx = NULL;
 
 			if (f2fs_encrypted_inode(inode) &&
 					S_ISREG(inode->i_mode)) {
 
-				ctx = f2fs_get_crypto_ctx(inode);
+				ctx = fscrypt_get_ctx(inode);
 				if (IS_ERR(ctx))
 					goto set_error_page;
 
@@ -1010,7 +1005,7 @@ submit_and_realloc:
 				min_t(int, nr_pages, BIO_MAX_PAGES));
 			if (!bio) {
 				if (ctx)
-					f2fs_release_crypto_ctx(ctx);
+					fscrypt_release_ctx(ctx);
 				goto set_error_page;
 			}
 			bio->bi_bdev = bdev;
@@ -1102,7 +1097,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
 		f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode),
 							fio->old_blkaddr);
 
-		fio->encrypted_page = f2fs_encrypt(inode, fio->page);
+		fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page);
 		if (IS_ERR(fio->encrypted_page)) {
 			err = PTR_ERR(fio->encrypted_page);
 			goto out_writepage;
@@ -1608,7 +1603,7 @@ repeat:
 
 		/* avoid symlink page */
 		if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
-			err = f2fs_decrypt(page);
+			err = fscrypt_decrypt_page(page);
 			if (err)
 				goto fail;
 		}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 8f09da0552ac..f82e28b121a8 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -77,7 +77,7 @@ static unsigned long dir_block_index(unsigned int level,
 }
 
 static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
-				struct f2fs_filename *fname,
+				struct fscrypt_name *fname,
 				f2fs_hash_t namehash,
 				int *max_slots,
 				struct page **res_page)
@@ -103,15 +103,15 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
 	return de;
 }
 
-struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *fname,
+struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
 			f2fs_hash_t namehash, int *max_slots,
 			struct f2fs_dentry_ptr *d)
 {
 	struct f2fs_dir_entry *de;
 	unsigned long bit_pos = 0;
 	int max_len = 0;
-	struct f2fs_str de_name = FSTR_INIT(NULL, 0);
-	struct f2fs_str *name = &fname->disk_name;
+	struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
+	struct fscrypt_str *name = &fname->disk_name;
 
 	if (max_slots)
 		*max_slots = 0;
@@ -157,7 +157,7 @@ found:
 
 static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 					unsigned int level,
-					struct f2fs_filename *fname,
+					struct fscrypt_name *fname,
 					struct page **res_page)
 {
 	struct qstr name = FSTR_TO_QSTR(&fname->disk_name);
@@ -218,12 +218,12 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
 	struct f2fs_dir_entry *de = NULL;
 	unsigned int max_depth;
 	unsigned int level;
-	struct f2fs_filename fname;
+	struct fscrypt_name fname;
 	int err;
 
 	*res_page = NULL;
 
-	err = f2fs_fname_setup_filename(dir, child, 1, &fname);
+	err = fscrypt_setup_filename(dir, child, 1, &fname);
 	if (err)
 		return NULL;
 
@@ -251,7 +251,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
 			break;
 	}
 out:
-	f2fs_fname_free_filename(&fname);
+	fscrypt_free_filename(&fname);
 	return de;
 }
 
@@ -413,7 +413,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
 			goto put_error;
 
 		if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) {
-			err = f2fs_inherit_context(dir, inode, page);
+			err = fscrypt_inherit_context(dir, inode, page, false);
 			if (err)
 				goto put_error;
 		}
@@ -536,11 +536,11 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
 	struct f2fs_dentry_block *dentry_blk = NULL;
 	struct f2fs_dentry_ptr d;
 	struct page *page = NULL;
-	struct f2fs_filename fname;
+	struct fscrypt_name fname;
 	struct qstr new_name;
 	int slots, err;
 
-	err = f2fs_fname_setup_filename(dir, name, 0, &fname);
+	err = fscrypt_setup_filename(dir, name, 0, &fname);
 	if (err)
 		return err;
 
@@ -639,7 +639,7 @@ fail:
 	kunmap(dentry_page);
 	f2fs_put_page(dentry_page, 1);
 out:
-	f2fs_fname_free_filename(&fname);
+	fscrypt_free_filename(&fname);
 	f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
 	return err;
 }
@@ -781,12 +781,12 @@ bool f2fs_empty_dir(struct inode *dir)
 }
 
 bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
-				unsigned int start_pos, struct f2fs_str *fstr)
+			unsigned int start_pos, struct fscrypt_str *fstr)
 {
 	unsigned char d_type = DT_UNKNOWN;
 	unsigned int bit_pos;
 	struct f2fs_dir_entry *de = NULL;
-	struct f2fs_str de_name = FSTR_INIT(NULL, 0);
+	struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
 
 	bit_pos = ((unsigned long)ctx->pos % d->max);
 
@@ -820,8 +820,9 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
 
 			memcpy(de_name.name, d->filename[bit_pos], de_name.len);
 
-			ret = f2fs_fname_disk_to_usr(d->inode, &de->hash_code,
-							&de_name, fstr);
+			ret = fscrypt_fname_disk_to_usr(d->inode,
+						(u32)de->hash_code, 0,
+						&de_name, fstr);
 			kfree(de_name.name);
 			if (ret < 0)
 				return true;
@@ -849,16 +850,15 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
 	struct file_ra_state *ra = &file->f_ra;
 	unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
 	struct f2fs_dentry_ptr d;
-	struct f2fs_str fstr = FSTR_INIT(NULL, 0);
+	struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
 	int err = 0;
 
 	if (f2fs_encrypted_inode(inode)) {
-		err = f2fs_get_encryption_info(inode);
+		err = fscrypt_get_encryption_info(inode);
 		if (err)
 			return err;
 
-		err = f2fs_fname_crypto_alloc_buffer(inode, F2FS_NAME_LEN,
-								&fstr);
+		err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr);
 		if (err < 0)
 			return err;
 	}
@@ -898,14 +898,14 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
 		f2fs_put_page(dentry_page, 1);
 	}
 out:
-	f2fs_fname_crypto_free_buffer(&fstr);
+	fscrypt_fname_free_buffer(&fstr);
 	return err;
 }
 
 static int f2fs_dir_open(struct inode *inode, struct file *filp)
 {
 	if (f2fs_encrypted_inode(inode))
-		return f2fs_get_encryption_info(inode) ? -EACCES : 0;
+		return fscrypt_get_encryption_info(inode) ? -EACCES : 0;
 	return 0;
 }
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ffd03363989b..6447e9002807 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -22,6 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/fscrypto.h>
 
 #ifdef CONFIG_F2FS_CHECK_FS
 #define f2fs_bug_on(sbi, condition)	BUG_ON(condition)
@@ -231,12 +232,9 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
 #define F2FS_IOC_WRITE_CHECKPOINT	_IO(F2FS_IOCTL_MAGIC, 7)
 #define F2FS_IOC_DEFRAGMENT		_IO(F2FS_IOCTL_MAGIC, 8)
 
-#define F2FS_IOC_SET_ENCRYPTION_POLICY					\
-		_IOR('f', 19, struct f2fs_encryption_policy)
-#define F2FS_IOC_GET_ENCRYPTION_PWSALT					\
-		_IOW('f', 20, __u8[16])
-#define F2FS_IOC_GET_ENCRYPTION_POLICY					\
-		_IOW('f', 21, struct f2fs_encryption_policy)
+#define F2FS_IOC_SET_ENCRYPTION_POLICY	FS_IOC_SET_ENCRYPTION_POLICY
+#define F2FS_IOC_GET_ENCRYPTION_POLICY	FS_IOC_GET_ENCRYPTION_POLICY
+#define F2FS_IOC_GET_ENCRYPTION_PWSALT	FS_IOC_GET_ENCRYPTION_PWSALT
 
 /*
  * should be same as XFS_IOC_GOINGDOWN.
@@ -266,25 +264,6 @@ struct f2fs_defragment {
  * For INODE and NODE manager
  */
 /* for directory operations */
-struct f2fs_str {
-	unsigned char *name;
-	u32 len;
-};
-
-struct f2fs_filename {
-	const struct qstr *usr_fname;
-	struct f2fs_str disk_name;
-	f2fs_hash_t hash;
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
-	struct f2fs_str crypto_buf;
-#endif
-};
-
-#define FSTR_INIT(n, l)		{ .name = n, .len = l }
-#define FSTR_TO_QSTR(f)		QSTR_INIT((f)->name, (f)->len)
-#define fname_name(p)		((p)->disk_name.name)
-#define fname_len(p)		((p)->disk_name.len)
-
 struct f2fs_dentry_ptr {
 	struct inode *inode;
 	const void *bitmap;
@@ -412,15 +391,6 @@ struct f2fs_map_blocks {
 #define file_enc_name(inode)	is_file(inode, FADVISE_ENC_NAME_BIT)
 #define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT)
 
-/* Encryption algorithms */
-#define F2FS_ENCRYPTION_MODE_INVALID		0
-#define F2FS_ENCRYPTION_MODE_AES_256_XTS	1
-#define F2FS_ENCRYPTION_MODE_AES_256_GCM	2
-#define F2FS_ENCRYPTION_MODE_AES_256_CBC	3
-#define F2FS_ENCRYPTION_MODE_AES_256_CTS	4
-
-#include "f2fs_crypto.h"
-
 #define DEF_DIR_LEVEL		0
 
 struct f2fs_inode_info {
@@ -444,13 +414,7 @@ struct f2fs_inode_info {
 	struct list_head dirty_list;	/* linked in global dirty list */
 	struct list_head inmem_pages;	/* inmemory pages managed by f2fs */
 	struct mutex inmem_lock;	/* lock for inmemory pages */
-
 	struct extent_tree *extent_tree;	/* cached extent_tree entry */
-
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
-	/* Encryption params */
-	struct f2fs_crypt_info *i_crypt_info;
-#endif
 };
 
 static inline void get_extent_info(struct extent_info *ext,
@@ -1741,10 +1705,10 @@ struct dentry *f2fs_get_parent(struct dentry *child);
 extern unsigned char f2fs_filetype_table[F2FS_FT_MAX];
 void set_de_type(struct f2fs_dir_entry *, umode_t);
 
-struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *,
+struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *,
 			f2fs_hash_t, int *, struct f2fs_dentry_ptr *);
 bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
-			unsigned int, struct f2fs_str *);
+			unsigned int, struct fscrypt_str *);
 void do_make_empty_dir(struct inode *, struct inode *,
 			struct f2fs_dentry_ptr *);
 struct page *init_inode_metadata(struct inode *, struct inode *,
@@ -2120,7 +2084,7 @@ int f2fs_convert_inline_inode(struct inode *);
 int f2fs_write_inline_data(struct inode *, struct page *);
 bool recover_inline_data(struct inode *, struct page *);
 struct f2fs_dir_entry *find_in_inline_dir(struct inode *,
-				struct f2fs_filename *, struct page **);
+				struct fscrypt_name *, struct page **);
 struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **);
 int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
 int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *,
@@ -2129,7 +2093,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
 						struct inode *, struct inode *);
 bool f2fs_empty_inline_dir(struct inode *);
 int f2fs_read_inline_dir(struct file *, struct dir_context *,
-						struct f2fs_str *);
+						struct fscrypt_str *);
 int f2fs_inline_data_fiemap(struct inode *,
 		struct fiemap_extent_info *, __u64, __u64);
 
@@ -2159,13 +2123,9 @@ void destroy_extent_cache(void);
 /*
  * crypto support
  */
-static inline int f2fs_encrypted_inode(struct inode *inode)
+static inline bool f2fs_encrypted_inode(struct inode *inode)
 {
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
 	return file_is_encrypt(inode);
-#else
-	return 0;
-#endif
 }
 
 static inline void f2fs_set_encrypted_inode(struct inode *inode)
@@ -2177,20 +2137,12 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode)
 
 static inline bool f2fs_bio_encrypted(struct bio *bio)
 {
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
-	return unlikely(bio->bi_private != NULL);
-#else
-	return false;
-#endif
+	return bio->bi_private != NULL;
 }
 
 static inline int f2fs_sb_has_crypto(struct super_block *sb)
 {
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
 	return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT);
-#else
-	return 0;
-#endif
 }
 
 static inline bool f2fs_may_encrypt(struct inode *inode)
@@ -2204,86 +2156,28 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
 #endif
 }
 
-/* crypto_policy.c */
-int f2fs_is_child_context_consistent_with_parent(struct inode *,
-							struct inode *);
-int f2fs_inherit_context(struct inode *, struct inode *, struct page *);
-int f2fs_process_policy(const struct f2fs_encryption_policy *, struct inode *);
-int f2fs_get_policy(struct inode *, struct f2fs_encryption_policy *);
-
-/* crypt.c */
-extern struct kmem_cache *f2fs_crypt_info_cachep;
-bool f2fs_valid_contents_enc_mode(uint32_t);
-uint32_t f2fs_validate_encryption_key_size(uint32_t, uint32_t);
-struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *);
-void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *);
-struct page *f2fs_encrypt(struct inode *, struct page *);
-int f2fs_decrypt(struct page *);
-void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *);
-
-/* crypto_key.c */
-void f2fs_free_encryption_info(struct inode *, struct f2fs_crypt_info *);
-int _f2fs_get_encryption_info(struct inode *inode);
-
-/* crypto_fname.c */
-bool f2fs_valid_filenames_enc_mode(uint32_t);
-u32 f2fs_fname_crypto_round_up(u32, u32);
-unsigned f2fs_fname_encrypted_size(struct inode *, u32);
-int f2fs_fname_crypto_alloc_buffer(struct inode *, u32, struct f2fs_str *);
-int f2fs_fname_disk_to_usr(struct inode *, f2fs_hash_t *,
-			const struct f2fs_str *, struct f2fs_str *);
-int f2fs_fname_usr_to_disk(struct inode *, const struct qstr *,
-			struct f2fs_str *);
-
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
-void f2fs_restore_and_release_control_page(struct page **);
-void f2fs_restore_control_page(struct page *);
-
-int __init f2fs_init_crypto(void);
-int f2fs_crypto_initialize(void);
-void f2fs_exit_crypto(void);
-
-int f2fs_has_encryption_key(struct inode *);
-
-static inline int f2fs_get_encryption_info(struct inode *inode)
-{
-	struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
-
-	if (!ci ||
-		(ci->ci_keyring_key &&
-		 (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-					       (1 << KEY_FLAG_REVOKED) |
-					       (1 << KEY_FLAG_DEAD)))))
-		return _f2fs_get_encryption_info(inode);
-	return 0;
-}
-
-void f2fs_fname_crypto_free_buffer(struct f2fs_str *);
-int f2fs_fname_setup_filename(struct inode *, const struct qstr *,
-				int lookup, struct f2fs_filename *);
-void f2fs_fname_free_filename(struct f2fs_filename *);
-#else
-static inline void f2fs_restore_and_release_control_page(struct page **p) { }
-static inline void f2fs_restore_control_page(struct page *p) { }
-
-static inline int __init f2fs_init_crypto(void) { return 0; }
-static inline void f2fs_exit_crypto(void) { }
-
-static inline int f2fs_has_encryption_key(struct inode *i) { return 0; }
-static inline int f2fs_get_encryption_info(struct inode *i) { return 0; }
-static inline void f2fs_fname_crypto_free_buffer(struct f2fs_str *p) { }
-
-static inline int f2fs_fname_setup_filename(struct inode *dir,
-					const struct qstr *iname,
-					int lookup, struct f2fs_filename *fname)
-{
-	memset(fname, 0, sizeof(struct f2fs_filename));
-	fname->usr_fname = iname;
-	fname->disk_name.name = (unsigned char *)iname->name;
-	fname->disk_name.len = iname->len;
-	return 0;
-}
-
-static inline void f2fs_fname_free_filename(struct f2fs_filename *fname) { }
+#ifndef CONFIG_F2FS_FS_ENCRYPTION
+#define fscrypt_set_d_op(i)
+#define fscrypt_get_ctx			fscrypt_notsupp_get_ctx
+#define fscrypt_release_ctx		fscrypt_notsupp_release_ctx
+#define fscrypt_encrypt_page		fscrypt_notsupp_encrypt_page
+#define fscrypt_decrypt_page		fscrypt_notsupp_decrypt_page
+#define fscrypt_decrypt_bio_pages	fscrypt_notsupp_decrypt_bio_pages
+#define fscrypt_pullback_bio_page	fscrypt_notsupp_pullback_bio_page
+#define fscrypt_restore_control_page	fscrypt_notsupp_restore_control_page
+#define fscrypt_zeroout_range		fscrypt_notsupp_zeroout_range
+#define fscrypt_process_policy		fscrypt_notsupp_process_policy
+#define fscrypt_get_policy		fscrypt_notsupp_get_policy
+#define fscrypt_has_permitted_context	fscrypt_notsupp_has_permitted_context
+#define fscrypt_inherit_context		fscrypt_notsupp_inherit_context
+#define fscrypt_get_encryption_info	fscrypt_notsupp_get_encryption_info
+#define fscrypt_put_encryption_info	fscrypt_notsupp_put_encryption_info
+#define fscrypt_setup_filename		fscrypt_notsupp_setup_filename
+#define fscrypt_free_filename		fscrypt_notsupp_free_filename
+#define fscrypt_fname_encrypted_size	fscrypt_notsupp_fname_encrypted_size
+#define fscrypt_fname_alloc_buffer	fscrypt_notsupp_fname_alloc_buffer
+#define fscrypt_fname_free_buffer	fscrypt_notsupp_fname_free_buffer
+#define fscrypt_fname_disk_to_usr	fscrypt_notsupp_fname_disk_to_usr
+#define fscrypt_fname_usr_to_disk	fscrypt_notsupp_fname_usr_to_disk
 #endif
 #endif
diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h
deleted file mode 100644
index c2c1c2b63b25..000000000000
--- a/fs/f2fs/f2fs_crypto.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * linux/fs/f2fs/f2fs_crypto.h
- *
- * Copied from linux/fs/ext4/ext4_crypto.h
- *
- * Copyright (C) 2015, Google, Inc.
- *
- * This contains encryption header content for f2fs
- *
- * Written by Michael Halcrow, 2015.
- * Modified by Jaegeuk Kim, 2015.
- */
-#ifndef _F2FS_CRYPTO_H
-#define _F2FS_CRYPTO_H
-
-#include <linux/fs.h>
-
-#define F2FS_KEY_DESCRIPTOR_SIZE	8
-
-/* Policy provided via an ioctl on the topmost directory */
-struct f2fs_encryption_policy {
-	char version;
-	char contents_encryption_mode;
-	char filenames_encryption_mode;
-	char flags;
-	char master_key_descriptor[F2FS_KEY_DESCRIPTOR_SIZE];
-} __attribute__((__packed__));
-
-#define F2FS_ENCRYPTION_CONTEXT_FORMAT_V1	1
-#define F2FS_KEY_DERIVATION_NONCE_SIZE		16
-
-#define F2FS_POLICY_FLAGS_PAD_4		0x00
-#define F2FS_POLICY_FLAGS_PAD_8		0x01
-#define F2FS_POLICY_FLAGS_PAD_16	0x02
-#define F2FS_POLICY_FLAGS_PAD_32	0x03
-#define F2FS_POLICY_FLAGS_PAD_MASK	0x03
-#define F2FS_POLICY_FLAGS_VALID		0x03
-
-/**
- * Encryption context for inode
- *
- * Protector format:
- *  1 byte: Protector format (1 = this version)
- *  1 byte: File contents encryption mode
- *  1 byte: File names encryption mode
- *  1 byte: Flags
- *  8 bytes: Master Key descriptor
- *  16 bytes: Encryption Key derivation nonce
- */
-struct f2fs_encryption_context {
-	char format;
-	char contents_encryption_mode;
-	char filenames_encryption_mode;
-	char flags;
-	char master_key_descriptor[F2FS_KEY_DESCRIPTOR_SIZE];
-	char nonce[F2FS_KEY_DERIVATION_NONCE_SIZE];
-} __attribute__((__packed__));
-
-/* Encryption parameters */
-#define F2FS_XTS_TWEAK_SIZE 16
-#define F2FS_AES_128_ECB_KEY_SIZE 16
-#define F2FS_AES_256_GCM_KEY_SIZE 32
-#define F2FS_AES_256_CBC_KEY_SIZE 32
-#define F2FS_AES_256_CTS_KEY_SIZE 32
-#define F2FS_AES_256_XTS_KEY_SIZE 64
-#define F2FS_MAX_KEY_SIZE 64
-
-#define F2FS_KEY_DESC_PREFIX "f2fs:"
-#define F2FS_KEY_DESC_PREFIX_SIZE 5
-
-struct f2fs_encryption_key {
-	__u32 mode;
-	char raw[F2FS_MAX_KEY_SIZE];
-	__u32 size;
-} __attribute__((__packed__));
-
-struct f2fs_crypt_info {
-	char		ci_data_mode;
-	char		ci_filename_mode;
-	char		ci_flags;
-	struct crypto_ablkcipher *ci_ctfm;
-	struct key	*ci_keyring_key;
-	char		ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE];
-};
-
-#define F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL             0x00000001
-#define F2FS_WRITE_PATH_FL			      0x00000002
-
-struct f2fs_crypto_ctx {
-	union {
-		struct {
-			struct page *bounce_page;       /* Ciphertext page */
-			struct page *control_page;      /* Original page  */
-		} w;
-		struct {
-			struct bio *bio;
-			struct work_struct work;
-		} r;
-		struct list_head free_list;     /* Free list */
-	};
-	char flags;                      /* Flags */
-};
-
-struct f2fs_completion_result {
-	struct completion completion;
-	int res;
-};
-
-#define DECLARE_F2FS_COMPLETION_RESULT(ecr) \
-	struct f2fs_completion_result ecr = { \
-		COMPLETION_INITIALIZER((ecr).completion), 0 }
-
-static inline int f2fs_encryption_key_size(int mode)
-{
-	switch (mode) {
-	case F2FS_ENCRYPTION_MODE_AES_256_XTS:
-		return F2FS_AES_256_XTS_KEY_SIZE;
-	case F2FS_ENCRYPTION_MODE_AES_256_GCM:
-		return F2FS_AES_256_GCM_KEY_SIZE;
-	case F2FS_ENCRYPTION_MODE_AES_256_CBC:
-		return F2FS_AES_256_CBC_KEY_SIZE;
-	case F2FS_ENCRYPTION_MODE_AES_256_CTS:
-		return F2FS_AES_256_CTS_KEY_SIZE;
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-#define F2FS_FNAME_NUM_SCATTER_ENTRIES	4
-#define F2FS_CRYPTO_BLOCK_SIZE		16
-#define F2FS_FNAME_CRYPTO_DIGEST_SIZE	32
-
-/**
- * For encrypted symlinks, the ciphertext length is stored at the beginning
- * of the string in little-endian format.
- */
-struct f2fs_encrypted_symlink_data {
-	__le16 len;
-	char encrypted_path[1];
-} __attribute__((__packed__));
-
-/**
- * This function is used to calculate the disk space required to
- * store a filename of length l in encrypted symlink format.
- */
-static inline u32 encrypted_symlink_data_len(u32 l)
-{
-	return (l + sizeof(struct f2fs_encrypted_symlink_data) - 1);
-}
-#endif	/* _F2FS_CRYPTO_H */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index ffa1ec20f963..04ab1e4fc1df 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -421,7 +421,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	int err;
 
 	if (f2fs_encrypted_inode(inode)) {
-		err = f2fs_get_encryption_info(inode);
+		err = fscrypt_get_encryption_info(inode);
 		if (err)
 			return 0;
 		if (!f2fs_encrypted_inode(inode))
@@ -443,10 +443,10 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
 	int ret = generic_file_open(inode, filp);
 
 	if (!ret && f2fs_encrypted_inode(inode)) {
-		ret = f2fs_get_encryption_info(inode);
+		ret = fscrypt_get_encryption_info(inode);
 		if (ret)
 			return -EACCES;
-		if (!f2fs_encrypted_inode(inode))
+		if (!fscrypt_has_encryption_key(inode))
 			return -ENOKEY;
 	}
 	return ret;
@@ -526,7 +526,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
 truncate_out:
 	f2fs_wait_on_page_writeback(page, DATA, true);
 	zero_user(page, offset, PAGE_CACHE_SIZE - offset);
-	if (!cache_only || !f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode))
+	if (!cache_only || !f2fs_encrypted_inode(inode) ||
+					!S_ISREG(inode->i_mode))
 		set_page_dirty(page);
 	f2fs_put_page(page, 1);
 	return 0;
@@ -674,7 +675,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	if (attr->ia_valid & ATTR_SIZE) {
 		if (f2fs_encrypted_inode(inode) &&
-				f2fs_get_encryption_info(inode))
+				fscrypt_get_encryption_info(inode))
 			return -EACCES;
 
 		if (attr->ia_size <= i_size_read(inode)) {
@@ -1529,39 +1530,30 @@ static bool uuid_is_nonzero(__u8 u[16])
 
 static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
 {
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
-	struct f2fs_encryption_policy policy;
+	struct fscrypt_policy policy;
 	struct inode *inode = file_inode(filp);
 
-	if (copy_from_user(&policy, (struct f2fs_encryption_policy __user *)arg,
-				sizeof(policy)))
+	if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg,
+							sizeof(policy)))
 		return -EFAULT;
 
 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
-	return f2fs_process_policy(&policy, inode);
-#else
-	return -EOPNOTSUPP;
-#endif
+	return fscrypt_process_policy(inode, &policy);
 }
 
 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
 {
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
-	struct f2fs_encryption_policy policy;
+	struct fscrypt_policy policy;
 	struct inode *inode = file_inode(filp);
 	int err;
 
-	err = f2fs_get_policy(inode, &policy);
+	err = fscrypt_get_policy(inode, &policy);
 	if (err)
 		return err;
 
-	if (copy_to_user((struct f2fs_encryption_policy __user *)arg, &policy,
-							sizeof(policy)))
+	if (copy_to_user((struct fscrypt_policy __user *)arg, &policy, sizeof(policy)))
 		return -EFAULT;
 	return 0;
-#else
-	return -EOPNOTSUPP;
-#endif
 }
 
 static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
@@ -1873,8 +1865,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	ssize_t ret;
 
 	if (f2fs_encrypted_inode(inode) &&
-				!f2fs_has_encryption_key(inode) &&
-				f2fs_get_encryption_info(inode))
+				!fscrypt_has_encryption_key(inode) &&
+				fscrypt_get_encryption_info(inode))
 		return -EACCES;
 
 	inode_lock(inode);
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 1c00f2c718ae..358214e9f707 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -277,7 +277,7 @@ process_inline:
 }
 
 struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
-			struct f2fs_filename *fname, struct page **res_page)
+			struct fscrypt_name *fname, struct page **res_page)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
 	struct f2fs_inline_dentry *inline_dentry;
@@ -535,7 +535,7 @@ bool f2fs_empty_inline_dir(struct inode *dir)
 }
 
 int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
-				struct f2fs_str *fstr)
+				struct fscrypt_str *fstr)
 {
 	struct inode *inode = file_inode(file);
 	struct f2fs_inline_dentry *inline_dentry = NULL;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index d4477073dbb0..cb269c46ac25 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -389,10 +389,7 @@ no_delete:
 		}
 	}
 out_clear:
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
-	if (fi->i_crypt_info)
-		f2fs_free_encryption_info(inode, fi->i_crypt_info);
-#endif
+	fscrypt_put_encryption_info(inode, NULL);
 	clear_inode(inode);
 }
 
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index b3c423a645bc..3bddd9f657e5 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -169,7 +169,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 	int err;
 
 	if (f2fs_encrypted_inode(dir) &&
-		!f2fs_is_child_context_consistent_with_parent(dir, inode))
+			!fscrypt_has_permitted_context(dir, inode))
 		return -EPERM;
 
 	f2fs_balance_fs(sbi, true);
@@ -352,20 +352,20 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
 	struct inode *inode;
 	size_t len = strlen(symname);
-	struct f2fs_str disk_link = FSTR_INIT((char *)symname, len + 1);
-	struct f2fs_encrypted_symlink_data *sd = NULL;
+	struct fscrypt_str disk_link = FSTR_INIT((char *)symname, len + 1);
+	struct fscrypt_symlink_data *sd = NULL;
 	int err;
 
 	if (f2fs_encrypted_inode(dir)) {
-		err = f2fs_get_encryption_info(dir);
+		err = fscrypt_get_encryption_info(dir);
 		if (err)
 			return err;
 
-		if (!f2fs_encrypted_inode(dir))
+		if (!fscrypt_has_encryption_key(dir))
 			return -EPERM;
 
-		disk_link.len = (f2fs_fname_encrypted_size(dir, len) +
-				sizeof(struct f2fs_encrypted_symlink_data));
+		disk_link.len = (fscrypt_fname_encrypted_size(dir, len) +
+				sizeof(struct fscrypt_symlink_data));
 	}
 
 	if (disk_link.len > dir->i_sb->s_blocksize)
@@ -393,7 +393,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 
 	if (f2fs_encrypted_inode(inode)) {
 		struct qstr istr = QSTR_INIT(symname, len);
-		struct f2fs_str ostr;
+		struct fscrypt_str ostr;
 
 		sd = kzalloc(disk_link.len, GFP_NOFS);
 		if (!sd) {
@@ -401,18 +401,18 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 			goto err_out;
 		}
 
-		err = f2fs_get_encryption_info(inode);
+		err = fscrypt_get_encryption_info(inode);
 		if (err)
 			goto err_out;
 
-		if (!f2fs_encrypted_inode(inode)) {
+		if (!fscrypt_has_encryption_key(inode)) {
 			err = -EPERM;
 			goto err_out;
 		}
 
 		ostr.name = sd->encrypted_path;
 		ostr.len = disk_link.len;
-		err = f2fs_fname_usr_to_disk(inode, &istr, &ostr);
+		err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr);
 		if (err < 0)
 			goto err_out;
 
@@ -593,7 +593,7 @@ out:
 static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	if (f2fs_encrypted_inode(dir)) {
-		int err = f2fs_get_encryption_info(dir);
+		int err = fscrypt_get_encryption_info(dir);
 		if (err)
 			return err;
 	}
@@ -623,8 +623,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	int err = -ENOENT;
 
 	if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
-		!f2fs_is_child_context_consistent_with_parent(new_dir,
-							old_inode)) {
+			!fscrypt_has_permitted_context(new_dir, old_inode)) {
 		err = -EPERM;
 		goto out;
 	}
@@ -804,11 +803,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 	int err = -ENOENT;
 
 	if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) &&
-		(old_dir != new_dir) &&
-		(!f2fs_is_child_context_consistent_with_parent(new_dir,
-								old_inode) ||
-		!f2fs_is_child_context_consistent_with_parent(old_dir,
-								new_inode)))
+			(old_dir != new_dir) &&
+			(!fscrypt_has_permitted_context(new_dir, old_inode) ||
+			 !fscrypt_has_permitted_context(old_dir, new_inode)))
 		return -EPERM;
 
 	old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
@@ -970,16 +967,15 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
 	return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
 }
 
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
 static const char *f2fs_encrypted_get_link(struct dentry *dentry,
 					   struct inode *inode,
 					   struct delayed_call *done)
 {
 	struct page *cpage = NULL;
 	char *caddr, *paddr = NULL;
-	struct f2fs_str cstr = FSTR_INIT(NULL, 0);
-	struct f2fs_str pstr = FSTR_INIT(NULL, 0);
-	struct f2fs_encrypted_symlink_data *sd;
+	struct fscrypt_str cstr = FSTR_INIT(NULL, 0);
+	struct fscrypt_str pstr = FSTR_INIT(NULL, 0);
+	struct fscrypt_symlink_data *sd;
 	loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
 	u32 max_size = inode->i_sb->s_blocksize;
 	int res;
@@ -987,7 +983,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
 	if (!dentry)
 		return ERR_PTR(-ECHILD);
 
-	res = f2fs_get_encryption_info(inode);
+	res = fscrypt_get_encryption_info(inode);
 	if (res)
 		return ERR_PTR(res);
 
@@ -998,7 +994,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
 	caddr[size] = 0;
 
 	/* Symlink is encrypted */
-	sd = (struct f2fs_encrypted_symlink_data *)caddr;
+	sd = (struct fscrypt_symlink_data *)caddr;
 	cstr.name = sd->encrypted_path;
 	cstr.len = le16_to_cpu(sd->len);
 
@@ -1014,17 +1010,16 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
 		goto errout;
 	}
 
-	if ((cstr.len + sizeof(struct f2fs_encrypted_symlink_data) - 1) >
-								max_size) {
+	if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > max_size) {
 		/* Symlink data on the disk is corrupted */
 		res = -EIO;
 		goto errout;
 	}
-	res = f2fs_fname_crypto_alloc_buffer(inode, cstr.len, &pstr);
+	res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr);
 	if (res)
 		goto errout;
 
-	res = f2fs_fname_disk_to_usr(inode, NULL, &cstr, &pstr);
+	res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr);
 	if (res < 0)
 		goto errout;
 
@@ -1037,7 +1032,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
 	set_delayed_call(done, kfree_link, paddr);
 	return paddr;
 errout:
-	f2fs_fname_crypto_free_buffer(&pstr);
+	fscrypt_fname_free_buffer(&pstr);
 	page_cache_release(cpage);
 	return ERR_PTR(res);
 }
@@ -1054,7 +1049,6 @@ const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
 	.removexattr	= generic_removexattr,
 #endif
 };
-#endif
 
 const struct inode_operations f2fs_dir_inode_operations = {
 	.create		= f2fs_create,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 579372d9291f..7b62016e66cd 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -470,10 +470,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 
 	/* Will be used by directory only */
 	fi->i_dir_level = F2FS_SB(sb)->dir_level;
-
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
-	fi->i_crypt_info = NULL;
-#endif
 	return &fi->vfs_inode;
 }
 
@@ -507,11 +503,7 @@ static int f2fs_drop_inode(struct inode *inode)
 
 			sb_end_intwrite(inode->i_sb);
 
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
-			if (F2FS_I(inode)->i_crypt_info)
-				f2fs_free_encryption_info(inode,
-					F2FS_I(inode)->i_crypt_info);
-#endif
+			fscrypt_put_encryption_info(inode, NULL);
 			spin_lock(&inode->i_lock);
 			atomic_dec(&inode->i_count);
 		}
@@ -891,6 +883,41 @@ static struct super_operations f2fs_sops = {
 	.remount_fs	= f2fs_remount,
 };
 
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
+{
+	return f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
+				F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
+				ctx, len, NULL);
+}
+
+static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
+							void *fs_data)
+{
+	return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
+				F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
+				ctx, len, fs_data, XATTR_CREATE);
+}
+
+static unsigned f2fs_max_namelen(struct inode *inode)
+{
+	return S_ISLNK(inode->i_mode) ?
+			inode->i_sb->s_blocksize : F2FS_NAME_LEN;
+}
+
+static struct fscrypt_operations f2fs_cryptops = {
+	.get_context	= f2fs_get_context,
+	.set_context	= f2fs_set_context,
+	.is_encrypted	= f2fs_encrypted_inode,
+	.empty_dir	= f2fs_empty_dir,
+	.max_namelen	= f2fs_max_namelen,
+};
+#else
+static struct fscrypt_operations f2fs_cryptops = {
+	.is_encrypted	= f2fs_encrypted_inode,
+};
+#endif
+
 static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
 		u64 ino, u32 generation)
 {
@@ -1314,6 +1341,7 @@ try_onemore:
 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 
 	sb->s_op = &f2fs_sops;
+	sb->s_cop = &f2fs_cryptops;
 	sb->s_xattr = f2fs_xattr_handlers;
 	sb->s_export_op = &f2fs_export_ops;
 	sb->s_magic = F2FS_SUPER_MAGIC;
@@ -1619,13 +1647,9 @@ static int __init init_f2fs_fs(void)
 		err = -ENOMEM;
 		goto free_extent_cache;
 	}
-	err = f2fs_init_crypto();
-	if (err)
-		goto free_kset;
-
 	err = register_shrinker(&f2fs_shrinker_info);
 	if (err)
-		goto free_crypto;
+		goto free_kset;
 
 	err = register_filesystem(&f2fs_fs_type);
 	if (err)
@@ -1640,8 +1664,6 @@ free_filesystem:
 	unregister_filesystem(&f2fs_fs_type);
 free_shrinker:
 	unregister_shrinker(&f2fs_shrinker_info);
-free_crypto:
-	f2fs_exit_crypto();
 free_kset:
 	kset_unregister(f2fs_kset);
 free_extent_cache:
@@ -1664,7 +1686,6 @@ static void __exit exit_f2fs_fs(void)
 	f2fs_destroy_root_stats();
 	unregister_shrinker(&f2fs_shrinker_info);
 	unregister_filesystem(&f2fs_fs_type);
-	f2fs_exit_crypto();
 	destroy_extent_cache();
 	destroy_checkpoint_caches();
 	destroy_segment_manager_caches();
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 7781ce110503..c7bdfc54cec5 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -228,6 +228,8 @@ struct dentry_operations {
 #define DCACHE_FALLTHRU			0x01000000 /* Fall through to lower layer */
 #define DCACHE_OP_SELECT_INODE		0x02000000 /* Unioned entry: dcache op selects inode */
 
+#define DCACHE_ENCRYPTED_WITH_KEY	0x04000000 /* dir is encrypted with a valid key */
+
 extern seqlock_t rename_lock;
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ae681002100a..28fc12189997 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -53,6 +53,8 @@ struct swap_info_struct;
 struct seq_file;
 struct workqueue_struct;
 struct iov_iter;
+struct fscrypt_info;
+struct fscrypt_operations;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -678,6 +680,10 @@ struct inode {
 	struct hlist_head	i_fsnotify_marks;
 #endif
 
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+	struct fscrypt_info	*i_crypt_info;
+#endif
+
 	void			*i_private; /* fs or device private pointer */
 };
 
@@ -1323,6 +1329,8 @@ struct super_block {
 #endif
 	const struct xattr_handler **s_xattr;
 
+	const struct fscrypt_operations	*s_cop;
+
 	struct hlist_bl_head	s_anon;		/* anonymous dentries for (nfs) exporting */
 	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
 	struct block_device	*s_bdev;
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
new file mode 100644
index 000000000000..895cdac4fcdd
--- /dev/null
+++ b/include/linux/fscrypto.h
@@ -0,0 +1,433 @@
+/*
+ * General per-file encryption definition
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * Written by Michael Halcrow, 2015.
+ * Modified by Jaegeuk Kim, 2015.
+ */
+
+#ifndef _LINUX_FSCRYPTO_H
+#define _LINUX_FSCRYPTO_H
+
+#include <linux/key.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/bio.h>
+#include <linux/dcache.h>
+#include <uapi/linux/fs.h>
+
+#define FS_KEY_DERIVATION_NONCE_SIZE		16
+#define FS_ENCRYPTION_CONTEXT_FORMAT_V1		1
+
+#define FS_POLICY_FLAGS_PAD_4		0x00
+#define FS_POLICY_FLAGS_PAD_8		0x01
+#define FS_POLICY_FLAGS_PAD_16		0x02
+#define FS_POLICY_FLAGS_PAD_32		0x03
+#define FS_POLICY_FLAGS_PAD_MASK	0x03
+#define FS_POLICY_FLAGS_VALID		0x03
+
+/* Encryption algorithms */
+#define FS_ENCRYPTION_MODE_INVALID		0
+#define FS_ENCRYPTION_MODE_AES_256_XTS		1
+#define FS_ENCRYPTION_MODE_AES_256_GCM		2
+#define FS_ENCRYPTION_MODE_AES_256_CBC		3
+#define FS_ENCRYPTION_MODE_AES_256_CTS		4
+
+/**
+ * Encryption context for inode
+ *
+ * Protector format:
+ *  1 byte: Protector format (1 = this version)
+ *  1 byte: File contents encryption mode
+ *  1 byte: File names encryption mode
+ *  1 byte: Flags
+ *  8 bytes: Master Key descriptor
+ *  16 bytes: Encryption Key derivation nonce
+ */
+struct fscrypt_context {
+	u8 format;
+	u8 contents_encryption_mode;
+	u8 filenames_encryption_mode;
+	u8 flags;
+	u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+	u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+} __packed;
+
+/* Encryption parameters */
+#define FS_XTS_TWEAK_SIZE		16
+#define FS_AES_128_ECB_KEY_SIZE		16
+#define FS_AES_256_GCM_KEY_SIZE		32
+#define FS_AES_256_CBC_KEY_SIZE		32
+#define FS_AES_256_CTS_KEY_SIZE		32
+#define FS_AES_256_XTS_KEY_SIZE		64
+#define FS_MAX_KEY_SIZE			64
+
+#define FS_KEY_DESC_PREFIX		"fscrypt:"
+#define FS_KEY_DESC_PREFIX_SIZE		8
+
+/* This is passed in from userspace into the kernel keyring */
+struct fscrypt_key {
+	u32 mode;
+	u8 raw[FS_MAX_KEY_SIZE];
+	u32 size;
+} __packed;
+
+struct fscrypt_info {
+	u8 ci_data_mode;
+	u8 ci_filename_mode;
+	u8 ci_flags;
+	struct crypto_ablkcipher *ci_ctfm;
+	struct key *ci_keyring_key;
+	u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
+};
+
+#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL		0x00000001
+#define FS_WRITE_PATH_FL			0x00000002
+
+struct fscrypt_ctx {
+	union {
+		struct {
+			struct page *bounce_page;	/* Ciphertext page */
+			struct page *control_page;	/* Original page  */
+		} w;
+		struct {
+			struct bio *bio;
+			struct work_struct work;
+		} r;
+		struct list_head free_list;	/* Free list */
+	};
+	u8 flags;				/* Flags */
+	u8 mode;				/* Encryption mode for tfm */
+};
+
+struct fscrypt_completion_result {
+	struct completion completion;
+	int res;
+};
+
+#define DECLARE_FS_COMPLETION_RESULT(ecr) \
+	struct fscrypt_completion_result ecr = { \
+		COMPLETION_INITIALIZER((ecr).completion), 0 }
+
+static inline int fscrypt_key_size(int mode)
+{
+	switch (mode) {
+	case FS_ENCRYPTION_MODE_AES_256_XTS:
+		return FS_AES_256_XTS_KEY_SIZE;
+	case FS_ENCRYPTION_MODE_AES_256_GCM:
+		return FS_AES_256_GCM_KEY_SIZE;
+	case FS_ENCRYPTION_MODE_AES_256_CBC:
+		return FS_AES_256_CBC_KEY_SIZE;
+	case FS_ENCRYPTION_MODE_AES_256_CTS:
+		return FS_AES_256_CTS_KEY_SIZE;
+	default:
+		BUG();
+	}
+	return 0;
+}
+
+#define FS_FNAME_NUM_SCATTER_ENTRIES	4
+#define FS_CRYPTO_BLOCK_SIZE		16
+#define FS_FNAME_CRYPTO_DIGEST_SIZE	32
+
+/**
+ * For encrypted symlinks, the ciphertext length is stored at the beginning
+ * of the string in little-endian format.
+ */
+struct fscrypt_symlink_data {
+	__le16 len;
+	char encrypted_path[1];
+} __packed;
+
+/**
+ * This function is used to calculate the disk space required to
+ * store a filename of length l in encrypted symlink format.
+ */
+static inline u32 fscrypt_symlink_data_len(u32 l)
+{
+	if (l < FS_CRYPTO_BLOCK_SIZE)
+		l = FS_CRYPTO_BLOCK_SIZE;
+	return (l + sizeof(struct fscrypt_symlink_data) - 1);
+}
+
+struct fscrypt_str {
+	unsigned char *name;
+	u32 len;
+};
+
+struct fscrypt_name {
+	const struct qstr *usr_fname;
+	struct fscrypt_str disk_name;
+	u32 hash;
+	u32 minor_hash;
+	struct fscrypt_str crypto_buf;
+};
+
+#define FSTR_INIT(n, l)		{ .name = n, .len = l }
+#define FSTR_TO_QSTR(f)		QSTR_INIT((f)->name, (f)->len)
+#define fname_name(p)		((p)->disk_name.name)
+#define fname_len(p)		((p)->disk_name.len)
+
+/*
+ * crypto opertions for filesystems
+ */
+struct fscrypt_operations {
+	int (*get_context)(struct inode *, void *, size_t);
+	int (*prepare_context)(struct inode *);
+	int (*set_context)(struct inode *, const void *, size_t, void *);
+	int (*dummy_context)(struct inode *);
+	bool (*is_encrypted)(struct inode *);
+	bool (*empty_dir)(struct inode *);
+	unsigned (*max_namelen)(struct inode *);
+};
+
+static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
+{
+	if (inode->i_sb->s_cop->dummy_context &&
+				inode->i_sb->s_cop->dummy_context(inode))
+		return true;
+	return false;
+}
+
+static inline bool fscrypt_valid_contents_enc_mode(u32 mode)
+{
+	return (mode == FS_ENCRYPTION_MODE_AES_256_XTS);
+}
+
+static inline bool fscrypt_valid_filenames_enc_mode(u32 mode)
+{
+	return (mode == FS_ENCRYPTION_MODE_AES_256_CTS);
+}
+
+static inline u32 fscrypt_validate_encryption_key_size(u32 mode, u32 size)
+{
+	if (size == fscrypt_key_size(mode))
+		return size;
+	return 0;
+}
+
+static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
+{
+	if (str->len == 1 && str->name[0] == '.')
+		return true;
+
+	if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
+		return true;
+
+	return false;
+}
+
+static inline struct page *fscrypt_control_page(struct page *page)
+{
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+	return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
+#else
+	WARN_ON_ONCE(1);
+	return ERR_PTR(-EINVAL);
+#endif
+}
+
+static inline int fscrypt_has_encryption_key(struct inode *inode)
+{
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+	return (inode->i_crypt_info != NULL);
+#else
+	return 0;
+#endif
+}
+
+static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
+{
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY;
+	spin_unlock(&dentry->d_lock);
+#endif
+}
+
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+extern const struct dentry_operations fscrypt_d_ops;
+#endif
+
+static inline void fscrypt_set_d_op(struct dentry *dentry)
+{
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+	d_set_d_op(dentry, &fscrypt_d_ops);
+#endif
+}
+
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+/* crypto.c */
+extern struct kmem_cache *fscrypt_info_cachep;
+int fscrypt_initialize(void);
+
+extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *);
+extern void fscrypt_release_ctx(struct fscrypt_ctx *);
+extern struct page *fscrypt_encrypt_page(struct inode *, struct page *);
+extern int fscrypt_decrypt_page(struct page *);
+extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
+extern void fscrypt_pullback_bio_page(struct page **, bool);
+extern void fscrypt_restore_control_page(struct page *);
+extern int fscrypt_zeroout_range(struct inode *, pgoff_t, sector_t,
+						unsigned int);
+/* policy.c */
+extern int fscrypt_process_policy(struct inode *,
+					const struct fscrypt_policy *);
+extern int fscrypt_get_policy(struct inode *, struct fscrypt_policy *);
+extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
+extern int fscrypt_inherit_context(struct inode *, struct inode *,
+					void *, bool);
+/* keyinfo.c */
+extern int get_crypt_info(struct inode *);
+extern int fscrypt_get_encryption_info(struct inode *);
+extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *);
+
+/* fname.c */
+extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
+				int lookup, struct fscrypt_name *);
+extern void fscrypt_free_filename(struct fscrypt_name *);
+extern u32 fscrypt_fname_encrypted_size(struct inode *, u32);
+extern int fscrypt_fname_alloc_buffer(struct inode *, u32,
+				struct fscrypt_str *);
+extern void fscrypt_fname_free_buffer(struct fscrypt_str *);
+extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32,
+			const struct fscrypt_str *, struct fscrypt_str *);
+extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *,
+			struct fscrypt_str *);
+#endif
+
+/* crypto.c */
+static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void fscrypt_notsupp_release_ctx(struct fscrypt_ctx *c)
+{
+	return;
+}
+
+static inline struct page *fscrypt_notsupp_encrypt_page(struct inode *i,
+						struct page *p)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int fscrypt_notsupp_decrypt_page(struct page *p)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void fscrypt_notsupp_decrypt_bio_pages(struct fscrypt_ctx *c,
+						struct bio *b)
+{
+	return;
+}
+
+static inline void fscrypt_notsupp_pullback_bio_page(struct page **p, bool b)
+{
+	return;
+}
+
+static inline void fscrypt_notsupp_restore_control_page(struct page *p)
+{
+	return;
+}
+
+static inline int fscrypt_notsupp_zeroout_range(struct inode *i, pgoff_t p,
+					sector_t s, unsigned int f)
+{
+	return -EOPNOTSUPP;
+}
+
+/* policy.c */
+static inline int fscrypt_notsupp_process_policy(struct inode *i,
+				const struct fscrypt_policy *p)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int fscrypt_notsupp_get_policy(struct inode *i,
+				struct fscrypt_policy *p)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int fscrypt_notsupp_has_permitted_context(struct inode *p,
+				struct inode *i)
+{
+	return 0;
+}
+
+static inline int fscrypt_notsupp_inherit_context(struct inode *p,
+				struct inode *i, void *v, bool b)
+{
+	return -EOPNOTSUPP;
+}
+
+/* keyinfo.c */
+static inline int fscrypt_notsupp_get_encryption_info(struct inode *i)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void fscrypt_notsupp_put_encryption_info(struct inode *i,
+					struct fscrypt_info *f)
+{
+	return;
+}
+
+ /* fname.c */
+static inline int fscrypt_notsupp_setup_filename(struct inode *dir,
+			const struct qstr *iname,
+			int lookup, struct fscrypt_name *fname)
+{
+	if (dir->i_sb->s_cop->is_encrypted(dir))
+		return -EOPNOTSUPP;
+
+	memset(fname, 0, sizeof(struct fscrypt_name));
+	fname->usr_fname = iname;
+	fname->disk_name.name = (unsigned char *)iname->name;
+	fname->disk_name.len = iname->len;
+	return 0;
+}
+
+static inline void fscrypt_notsupp_free_filename(struct fscrypt_name *fname)
+{
+	return;
+}
+
+static inline u32 fscrypt_notsupp_fname_encrypted_size(struct inode *i, u32 s)
+{
+	/* never happens */
+	WARN_ON(1);
+	return 0;
+}
+
+static inline int fscrypt_notsupp_fname_alloc_buffer(struct inode *inode,
+				u32 ilen, struct fscrypt_str *crypto_str)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void fscrypt_notsupp_fname_free_buffer(struct fscrypt_str *c)
+{
+	return;
+}
+
+static inline int fscrypt_notsupp_fname_disk_to_usr(struct inode *inode,
+			u32 hash, u32 minor_hash,
+			const struct fscrypt_str *iname,
+			struct fscrypt_str *oname)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int fscrypt_notsupp_fname_usr_to_disk(struct inode *inode,
+			const struct qstr *iname,
+			struct fscrypt_str *oname)
+{
+	return -EOPNOTSUPP;
+}
+#endif	/* _LINUX_FSCRYPTO_H */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 149bec83a907..cec100b6dfa9 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -246,6 +246,24 @@ struct fsxattr {
 #define FS_IOC_FSGETXATTR		_IOR ('X', 31, struct fsxattr)
 #define FS_IOC_FSSETXATTR		_IOW ('X', 32, struct fsxattr)
 
+/*
+ * File system encryption support
+ */
+/* Policy provided via an ioctl on the topmost directory */
+#define FS_KEY_DESCRIPTOR_SIZE	8
+
+struct fscrypt_policy {
+	__u8 version;
+	__u8 contents_encryption_mode;
+	__u8 filenames_encryption_mode;
+	__u8 flags;
+	__u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+} __packed;
+
+#define FS_IOC_SET_ENCRYPTION_POLICY	_IOR('f', 19, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_PWSALT	_IOW('f', 20, __u8[16])
+#define FS_IOC_GET_ENCRYPTION_POLICY	_IOW('f', 21, struct fscrypt_policy)
+
 /*
  * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
  *
-- 
cgit v1.2.3


From 81ad4276b505e987dd8ebbdf63605f92cd172b52 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Fri, 18 Mar 2016 10:03:24 +0800
Subject: Thermal: Ignore invalid trip points

In some cases, platform thermal driver may report invalid trip points,
thermal core should not take any action for these trip points.

This fixed a regression that bogus trip point starts to screw up thermal
control on some Lenovo laptops, after
commit bb431ba26c5cd0a17c941ca6c3a195a3a6d5d461
Author: Zhang Rui <rui.zhang@intel.com>
Date:   Fri Oct 30 16:31:47 2015 +0800

    Thermal: initialize thermal zone device correctly

    After thermal zone device registered, as we have not read any
    temperature before, thus tz->temperature should not be 0,
    which actually means 0C, and thermal trend is not available.
    In this case, we need specially handling for the first
    thermal_zone_device_update().

    Both thermal core framework and step_wise governor is
    enhanced to handle this. And since the step_wise governor
    is the only one that uses trends, so it's the only thermal
    governor that needs to be updated.

    Tested-by: Manuel Krause <manuelkrause@netscape.net>
    Tested-by: szegad <szegadlo@poczta.onet.pl>
    Tested-by: prash <prash.n.rao@gmail.com>
    Tested-by: amish <ammdispose-arch@yahoo.com>
    Tested-by: Matthias <morpheusxyz123@yahoo.de>
    Reviewed-by: Javi Merino <javi.merino@arm.com>
    Signed-off-by: Zhang Rui <rui.zhang@intel.com>
    Signed-off-by: Chen Yu <yu.c.chen@intel.com>

CC: <stable@vger.kernel.org> #3.18+
Link: https://bugzilla.redhat.com/show_bug.cgi?id=1317190
Link: https://bugzilla.kernel.org/show_bug.cgi?id=114551
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/thermal_core.c | 13 ++++++++++++-
 include/linux/thermal.h        |  2 ++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index a0a8fd1235e2..d4b54653ecf8 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -454,6 +454,10 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)
 {
 	enum thermal_trip_type type;
 
+	/* Ignore disabled trip points */
+	if (test_bit(trip, &tz->trips_disabled))
+		return;
+
 	tz->ops->get_trip_type(tz, trip, &type);
 
 	if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT)
@@ -1800,6 +1804,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 {
 	struct thermal_zone_device *tz;
 	enum thermal_trip_type trip_type;
+	int trip_temp;
 	int result;
 	int count;
 	int passive = 0;
@@ -1871,9 +1876,15 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 		goto unregister;
 
 	for (count = 0; count < trips; count++) {
-		tz->ops->get_trip_type(tz, count, &trip_type);
+		if (tz->ops->get_trip_type(tz, count, &trip_type))
+			set_bit(count, &tz->trips_disabled);
 		if (trip_type == THERMAL_TRIP_PASSIVE)
 			passive = 1;
+		if (tz->ops->get_trip_temp(tz, count, &trip_temp))
+			set_bit(count, &tz->trips_disabled);
+		/* Check for bogus trip points */
+		if (trip_temp == 0)
+			set_bit(count, &tz->trips_disabled);
 	}
 
 	if (!passive) {
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 9c481991fdc7..a55d0523f75d 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -156,6 +156,7 @@ struct thermal_attr {
  * @trip_hyst_attrs:	attributes for trip points for sysfs: trip hysteresis
  * @devdata:	private pointer for device private data
  * @trips:	number of trip points the thermal zone supports
+ * @trips_disabled;	bitmap for disabled trips
  * @passive_delay:	number of milliseconds to wait between polls when
  *			performing passive cooling.
  * @polling_delay:	number of milliseconds to wait between polls when
@@ -191,6 +192,7 @@ struct thermal_zone_device {
 	struct thermal_attr *trip_hyst_attrs;
 	void *devdata;
 	int trips;
+	unsigned long trips_disabled;	/* bitmap for disabled trips */
 	int passive_delay;
 	int polling_delay;
 	int temperature;
-- 
cgit v1.2.3


From 4c9dacb82d5aa36aa2568df60d897f2eb3d8819b Mon Sep 17 00:00:00 2001
From: Wenwei Tao <ww.tao0320@gmail.com>
Date: Thu, 3 Mar 2016 15:06:37 +0100
Subject: lightnvm: specify target's logical address area
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We can create more than one target on a lightnvm
device by specifying its begin lun and end lun.

But only specify the physical address area is not
enough, we need to get the corresponding non-
intersection logical address area division from
the backend device's logcial address space.
Otherwise the targets on the device might use
the same logical addresses cause incorrect
information in the device's l2p table.

Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c   |  1 +
 drivers/lightnvm/gennvm.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/lightnvm/gennvm.h |  6 +++++
 drivers/lightnvm/rrpc.c   | 35 +++++++++++++++++++++++--
 drivers/lightnvm/rrpc.h   |  1 +
 include/linux/lightnvm.h  |  8 ++++++
 6 files changed, 116 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 0d1fb6b40c46..2925fd0b82bb 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -466,6 +466,7 @@ static int nvm_core_init(struct nvm_dev *dev)
 	dev->total_secs = dev->nr_luns * dev->sec_per_lun;
 	INIT_LIST_HEAD(&dev->online_targets);
 	mutex_init(&dev->mlock);
+	spin_lock_init(&dev->lock);
 
 	return 0;
 }
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index d65ec36a2231..d460b37bb016 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -20,6 +20,68 @@
 
 #include "gennvm.h"
 
+static int gennvm_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len)
+{
+	struct gen_nvm *gn = dev->mp;
+	struct gennvm_area *area, *prev, *next;
+	sector_t begin = 0;
+	sector_t max_sectors = (dev->sec_size * dev->total_secs) >> 9;
+
+	if (len > max_sectors)
+		return -EINVAL;
+
+	area = kmalloc(sizeof(struct gennvm_area), GFP_KERNEL);
+	if (!area)
+		return -ENOMEM;
+
+	prev = NULL;
+
+	spin_lock(&dev->lock);
+	list_for_each_entry(next, &gn->area_list, list) {
+		if (begin + len > next->begin) {
+			begin = next->end;
+			prev = next;
+			continue;
+		}
+		break;
+	}
+
+	if ((begin + len) > max_sectors) {
+		spin_unlock(&dev->lock);
+		kfree(area);
+		return -EINVAL;
+	}
+
+	area->begin = *lba = begin;
+	area->end = begin + len;
+
+	if (prev) /* insert into sorted order */
+		list_add(&area->list, &prev->list);
+	else
+		list_add(&area->list, &gn->area_list);
+	spin_unlock(&dev->lock);
+
+	return 0;
+}
+
+static void gennvm_put_area(struct nvm_dev *dev, sector_t begin)
+{
+	struct gen_nvm *gn = dev->mp;
+	struct gennvm_area *area;
+
+	spin_lock(&dev->lock);
+	list_for_each_entry(area, &gn->area_list, list) {
+		if (area->begin != begin)
+			continue;
+
+		list_del(&area->list);
+		spin_unlock(&dev->lock);
+		kfree(area);
+		return;
+	}
+	spin_unlock(&dev->lock);
+}
+
 static void gennvm_blocks_free(struct nvm_dev *dev)
 {
 	struct gen_nvm *gn = dev->mp;
@@ -229,6 +291,7 @@ static int gennvm_register(struct nvm_dev *dev)
 
 	gn->dev = dev;
 	gn->nr_luns = dev->nr_luns;
+	INIT_LIST_HEAD(&gn->area_list);
 	dev->mp = gn;
 
 	ret = gennvm_luns_init(dev, gn);
@@ -465,6 +528,10 @@ static struct nvmm_type gennvm = {
 
 	.get_lun		= gennvm_get_lun,
 	.lun_info_print		= gennvm_lun_info_print,
+
+	.get_area		= gennvm_get_area,
+	.put_area		= gennvm_put_area,
+
 };
 
 static int __init gennvm_module_init(void)
diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h
index 9c24b5b32dac..04d7c23cfc61 100644
--- a/drivers/lightnvm/gennvm.h
+++ b/drivers/lightnvm/gennvm.h
@@ -39,8 +39,14 @@ struct gen_nvm {
 
 	int nr_luns;
 	struct gen_lun *luns;
+	struct list_head area_list;
 };
 
+struct gennvm_area {
+	struct list_head list;
+	sector_t begin;
+	sector_t end;	/* end is excluded */
+};
 #define gennvm_for_each_lun(bm, lun, i) \
 		for ((i) = 0, lun = &(bm)->luns[0]; \
 			(i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)])
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 82343783aa47..c1e3c83f06b3 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -1053,8 +1053,11 @@ static int rrpc_map_init(struct rrpc *rrpc)
 {
 	struct nvm_dev *dev = rrpc->dev;
 	sector_t i;
+	u64 slba;
 	int ret;
 
+	slba = rrpc->soffset >> (ilog2(dev->sec_size) - 9);
+
 	rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_sects);
 	if (!rrpc->trans_map)
 		return -ENOMEM;
@@ -1076,7 +1079,7 @@ static int rrpc_map_init(struct rrpc *rrpc)
 		return 0;
 
 	/* Bring up the mapping table from device */
-	ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs, rrpc_l2p_update,
+	ret = dev->ops->get_l2p_tbl(dev, slba, rrpc->nr_sects, rrpc_l2p_update,
 									rrpc);
 	if (ret) {
 		pr_err("nvm: rrpc: could not read L2P table.\n");
@@ -1086,7 +1089,6 @@ static int rrpc_map_init(struct rrpc *rrpc)
 	return 0;
 }
 
-
 /* Minimum pages needed within a lun */
 #define PAGE_POOL_SIZE 16
 #define ADDR_POOL_SIZE 64
@@ -1200,12 +1202,33 @@ err:
 	return -ENOMEM;
 }
 
+/* returns 0 on success and stores the beginning address in *begin */
+static int rrpc_area_init(struct rrpc *rrpc, sector_t *begin)
+{
+	struct nvm_dev *dev = rrpc->dev;
+	struct nvmm_type *mt = dev->mt;
+	sector_t size = rrpc->nr_sects * dev->sec_size;
+
+	size >>= 9;
+
+	return mt->get_area(dev, begin, size);
+}
+
+static void rrpc_area_free(struct rrpc *rrpc)
+{
+	struct nvm_dev *dev = rrpc->dev;
+	struct nvmm_type *mt = dev->mt;
+
+	mt->put_area(dev, rrpc->soffset);
+}
+
 static void rrpc_free(struct rrpc *rrpc)
 {
 	rrpc_gc_free(rrpc);
 	rrpc_map_free(rrpc);
 	rrpc_core_free(rrpc);
 	rrpc_luns_free(rrpc);
+	rrpc_area_free(rrpc);
 
 	kfree(rrpc);
 }
@@ -1327,6 +1350,7 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
 	struct request_queue *bqueue = dev->q;
 	struct request_queue *tqueue = tdisk->queue;
 	struct rrpc *rrpc;
+	sector_t soffset;
 	int ret;
 
 	if (!(dev->identity.dom & NVM_RSP_L2P)) {
@@ -1352,6 +1376,13 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
 	/* simple round-robin strategy */
 	atomic_set(&rrpc->next_lun, -1);
 
+	ret = rrpc_area_init(rrpc, &soffset);
+	if (ret < 0) {
+		pr_err("nvm: rrpc: could not initialize area\n");
+		return ERR_PTR(ret);
+	}
+	rrpc->soffset = soffset;
+
 	ret = rrpc_luns_init(rrpc, lun_begin, lun_end);
 	if (ret) {
 		pr_err("nvm: rrpc: could not initialize luns\n");
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index 855f4a5ca7dd..2653484a3b40 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -97,6 +97,7 @@ struct rrpc {
 	struct nvm_dev *dev;
 	struct gendisk *disk;
 
+	sector_t soffset; /* logical sector offset */
 	u64 poffset; /* physical page offset */
 	int lun_offset;
 
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index c3c43184a787..b466bd9f2cf8 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -355,6 +355,7 @@ struct nvm_dev {
 	char name[DISK_NAME_LEN];
 
 	struct mutex mlock;
+	spinlock_t lock;
 };
 
 static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
@@ -467,6 +468,9 @@ typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
 typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
 typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
 
+typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
+typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
+
 struct nvmm_type {
 	const char *name;
 	unsigned int version[3];
@@ -491,6 +495,10 @@ struct nvmm_type {
 
 	/* Statistics */
 	nvmm_lun_info_print_fn *lun_info_print;
+
+	nvmm_get_area_fn *get_area;
+	nvmm_put_area_fn *put_area;
+
 	struct list_head list;
 };
 
-- 
cgit v1.2.3


From da1e284919b0b99c5bf0618b6c98cbaf2c17e62e Mon Sep 17 00:00:00 2001
From: Wenwei Tao <ww.tao0320@gmail.com>
Date: Thu, 3 Mar 2016 15:06:38 +0100
Subject: lightnvm: add a bitmap of luns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a bitmap of luns to indicate the status
of luns: inuse/available. When create targets
do the necessary check to avoid allocating luns
that are already allocated.

Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com>
Freed dev->lun_map if nvm_core_init later failed in the init process.
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/lightnvm/core.c   |  6 ++++
 drivers/lightnvm/gennvm.c | 18 ++++++++++++
 drivers/lightnvm/rrpc.c   | 74 ++++++++++++++++++++++++++++-------------------
 include/linux/lightnvm.h  |  5 ++++
 4 files changed, 74 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 2925fd0b82bb..0dc9a80adb94 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -464,6 +464,10 @@ static int nvm_core_init(struct nvm_dev *dev)
 	dev->nr_luns = dev->luns_per_chnl * dev->nr_chnls;
 
 	dev->total_secs = dev->nr_luns * dev->sec_per_lun;
+	dev->lun_map = kcalloc(BITS_TO_LONGS(dev->nr_luns),
+					sizeof(unsigned long), GFP_KERNEL);
+	if (!dev->lun_map)
+		return -ENOMEM;
 	INIT_LIST_HEAD(&dev->online_targets);
 	mutex_init(&dev->mlock);
 	spin_lock_init(&dev->lock);
@@ -586,6 +590,7 @@ int nvm_register(struct request_queue *q, char *disk_name,
 
 	return 0;
 err_init:
+	kfree(dev->lun_map);
 	kfree(dev);
 	return ret;
 }
@@ -608,6 +613,7 @@ void nvm_unregister(char *disk_name)
 	up_write(&nvm_lock);
 
 	nvm_exit(dev);
+	kfree(dev->lun_map);
 	kfree(dev);
 }
 EXPORT_SYMBOL(nvm_unregister);
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index d460b37bb016..b97801c00099 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -192,6 +192,9 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 		lun_id = div_u64(pba, dev->sec_per_lun);
 		lun = &gn->luns[lun_id];
 
+		if (!test_bit(lun_id, dev->lun_map))
+			__set_bit(lun_id, dev->lun_map);
+
 		/* Calculate block offset into lun */
 		pba = pba - (dev->sec_per_lun * lun_id);
 		blk = &lun->vlun.blocks[div_u64(pba, dev->sec_per_blk)];
@@ -482,10 +485,23 @@ static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
 	return nvm_erase_ppa(dev, &addr, 1);
 }
 
+static int gennvm_reserve_lun(struct nvm_dev *dev, int lunid)
+{
+	return test_and_set_bit(lunid, dev->lun_map);
+}
+
+static void gennvm_release_lun(struct nvm_dev *dev, int lunid)
+{
+	WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
+}
+
 static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
 {
 	struct gen_nvm *gn = dev->mp;
 
+	if (unlikely(lunid >= dev->nr_luns))
+		return NULL;
+
 	return &gn->luns[lunid].vlun;
 }
 
@@ -527,6 +543,8 @@ static struct nvmm_type gennvm = {
 	.erase_blk		= gennvm_erase_blk,
 
 	.get_lun		= gennvm_get_lun,
+	.reserve_lun		= gennvm_reserve_lun,
+	.release_lun		= gennvm_release_lun,
 	.lun_info_print		= gennvm_lun_info_print,
 
 	.get_area		= gennvm_get_area,
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index c1e3c83f06b3..3ab6495c3fd8 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -965,25 +965,11 @@ static void rrpc_requeue(struct work_struct *work)
 
 static void rrpc_gc_free(struct rrpc *rrpc)
 {
-	struct rrpc_lun *rlun;
-	int i;
-
 	if (rrpc->krqd_wq)
 		destroy_workqueue(rrpc->krqd_wq);
 
 	if (rrpc->kgc_wq)
 		destroy_workqueue(rrpc->kgc_wq);
-
-	if (!rrpc->luns)
-		return;
-
-	for (i = 0; i < rrpc->nr_luns; i++) {
-		rlun = &rrpc->luns[i];
-
-		if (!rlun->blocks)
-			break;
-		vfree(rlun->blocks);
-	}
 }
 
 static int rrpc_gc_init(struct rrpc *rrpc)
@@ -1143,6 +1129,23 @@ static void rrpc_core_free(struct rrpc *rrpc)
 
 static void rrpc_luns_free(struct rrpc *rrpc)
 {
+	struct nvm_dev *dev = rrpc->dev;
+	struct nvm_lun *lun;
+	struct rrpc_lun *rlun;
+	int i;
+
+	if (!rrpc->luns)
+		return;
+
+	for (i = 0; i < rrpc->nr_luns; i++) {
+		rlun = &rrpc->luns[i];
+		lun = rlun->parent;
+		if (!lun)
+			break;
+		dev->mt->release_lun(dev, lun->id);
+		vfree(rlun->blocks);
+	}
+
 	kfree(rrpc->luns);
 }
 
@@ -1150,7 +1153,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 {
 	struct nvm_dev *dev = rrpc->dev;
 	struct rrpc_lun *rlun;
-	int i, j;
+	int i, j, ret = -EINVAL;
 
 	if (dev->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
 		pr_err("rrpc: number of pages per block too high.");
@@ -1166,25 +1169,26 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 
 	/* 1:1 mapping */
 	for (i = 0; i < rrpc->nr_luns; i++) {
-		struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i);
-
-		rlun = &rrpc->luns[i];
-		rlun->rrpc = rrpc;
-		rlun->parent = lun;
-		INIT_LIST_HEAD(&rlun->prio_list);
-		INIT_LIST_HEAD(&rlun->open_list);
-		INIT_LIST_HEAD(&rlun->closed_list);
+		int lunid = lun_begin + i;
+		struct nvm_lun *lun;
 
-		INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
-		spin_lock_init(&rlun->lock);
+		if (dev->mt->reserve_lun(dev, lunid)) {
+			pr_err("rrpc: lun %u is already allocated\n", lunid);
+			goto err;
+		}
 
-		rrpc->total_blocks += dev->blks_per_lun;
-		rrpc->nr_sects += dev->sec_per_lun;
+		lun = dev->mt->get_lun(dev, lunid);
+		if (!lun)
+			goto err;
 
+		rlun = &rrpc->luns[i];
+		rlun->parent = lun;
 		rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
 						rrpc->dev->blks_per_lun);
-		if (!rlun->blocks)
+		if (!rlun->blocks) {
+			ret = -ENOMEM;
 			goto err;
+		}
 
 		for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
 			struct rrpc_block *rblk = &rlun->blocks[j];
@@ -1195,11 +1199,23 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 			INIT_LIST_HEAD(&rblk->prio);
 			spin_lock_init(&rblk->lock);
 		}
+
+		rlun->rrpc = rrpc;
+		INIT_LIST_HEAD(&rlun->prio_list);
+		INIT_LIST_HEAD(&rlun->open_list);
+		INIT_LIST_HEAD(&rlun->closed_list);
+
+		INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
+		spin_lock_init(&rlun->lock);
+
+		rrpc->total_blocks += dev->blks_per_lun;
+		rrpc->nr_sects += dev->sec_per_lun;
+
 	}
 
 	return 0;
 err:
-	return -ENOMEM;
+	return ret;
 }
 
 /* returns 0 on success and stores the beginning address in *begin */
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index b466bd9f2cf8..0ee2c2c78ffd 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -346,6 +346,7 @@ struct nvm_dev {
 	int nr_luns;
 	unsigned max_pages_per_blk;
 
+	unsigned long *lun_map;
 	void *ppalist_pool;
 
 	struct nvm_id identity;
@@ -466,6 +467,8 @@ typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
 								unsigned long);
 typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
+typedef int (nvmm_reserve_lun)(struct nvm_dev *, int);
+typedef void (nvmm_release_lun)(struct nvm_dev *, int);
 typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
 
 typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
@@ -492,6 +495,8 @@ struct nvmm_type {
 
 	/* Configuration management */
 	nvmm_get_lun_fn *get_lun;
+	nvmm_reserve_lun *reserve_lun;
+	nvmm_release_lun *release_lun;
 
 	/* Statistics */
 	nvmm_lun_info_print_fn *lun_info_print;
-- 
cgit v1.2.3


From 9f867268436d799549909437e627e7cf279e1127 Mon Sep 17 00:00:00 2001
From: Matias Bjorling <m@bjorling.me>
Date: Thu, 3 Mar 2016 15:06:39 +0100
Subject: nvme: lightnvm: return ppa completion status
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PPAs sent to device is separately acknowledge in a 64bit status
variable. The status is stored in DW0 and DW1 of the completion queue
entry. Store this status inside the nvm_rq for further processing.

This can later be used to implement retry techniques for failed writes
and reads.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/lightnvm.c | 17 +++++++++++++++--
 include/linux/lightnvm.h     |  1 +
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 42a01a931989..9461dd639acd 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -146,6 +146,14 @@ struct nvme_nvm_command {
 	};
 };
 
+struct nvme_nvm_completion {
+	__le64	result;		/* Used by LightNVM to return ppa completions */
+	__le16	sq_head;	/* how much of this queue may be reclaimed */
+	__le16	sq_id;		/* submission queue that generated this entry */
+	__u16	command_id;	/* of the command which completed */
+	__le16	status;		/* did the command fail, and if so, why? */
+};
+
 #define NVME_NVM_LP_MLC_PAIRS 886
 struct nvme_nvm_lp_mlc {
 	__u16			num_pairs;
@@ -507,6 +515,10 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
 static void nvme_nvm_end_io(struct request *rq, int error)
 {
 	struct nvm_rq *rqd = rq->end_io_data;
+	struct nvme_nvm_completion *cqe = rq->special;
+
+	if (cqe)
+		rqd->ppa_status = le64_to_cpu(cqe->result);
 
 	nvm_end_io(rqd, error);
 
@@ -526,7 +538,8 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	if (IS_ERR(rq))
 		return -ENOMEM;
 
-	cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
+	cmd = kzalloc(sizeof(struct nvme_nvm_command) +
+				sizeof(struct nvme_nvm_completion), GFP_KERNEL);
 	if (!cmd) {
 		blk_mq_free_request(rq);
 		return -ENOMEM;
@@ -545,7 +558,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 
 	rq->cmd = (unsigned char *)cmd;
 	rq->cmd_len = sizeof(struct nvme_nvm_command);
-	rq->special = (void *)0;
+	rq->special = cmd + 1;
 
 	rq->end_io_data = rqd;
 
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 0ee2c2c78ffd..cdcb2ccbefa8 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -242,6 +242,7 @@ struct nvm_rq {
 	uint16_t nr_pages;
 	uint16_t flags;
 
+	u64 ppa_status; /* ppa media status */
 	int error;
 };
 
-- 
cgit v1.2.3


From 18b862dcd57a3e23e34c8cd1e939f68548c1209a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 18 Mar 2016 20:02:39 +0000
Subject: dma-buf, drm, ion: Propagate error code from
 dma_buf_start_cpu_access()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drivers, especially i915.ko, can fail during the initial migration of a
dma-buf for CPU access. However, the error code from the driver was not
being propagated back to ioctl and so userspace was blissfully ignorant
of the failure. Rendering corruption ensues.

Whilst fixing the ioctl to return the error code from
dma_buf_start_cpu_access(), also do the same for
dma_buf_end_cpu_access().  For most drivers, dma_buf_end_cpu_access()
cannot fail. i915.ko however, as most drivers would, wants to avoid being
uninterruptible (as would be required to guarrantee no failure when
flushing the buffer to the device). As userspace already has to handle
errors from the SYNC_IOCTL, take advantage of this to be able to restart
the syscall across signals.

This fixes a coherency issue for i915.ko as well as reducing the
uninterruptible hold upon its BKL, the struct_mutex.

Fixes commit c11e391da2a8fe973c3c2398452000bed505851e
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Thu Feb 11 20:04:51 2016 -0200

    dma-buf: Add ioctls to allow userspace to flush

Testcase: igt/gem_concurrent_blit/*dmabuf*interruptible
Testcase: igt/prime_mmap_coherency/ioctl-errors
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tiago Vignatti <tiago.vignatti@intel.com>
Cc: Stéphane Marchesin <marcheu@chromium.org>
Cc: David Herrmann <dh.herrmann@gmail.com>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Daniel Vetter <daniel.vetter@intel.com>
CC: linux-media@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Cc: linaro-mm-sig@lists.linaro.org
Cc: intel-gfx@lists.freedesktop.org
Cc: devel@driverdev.osuosl.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1458331359-2634-1-git-send-email-chris@chris-wilson.co.uk
---
 drivers/dma-buf/dma-buf.c                 | 17 +++++++++++------
 drivers/gpu/drm/i915/i915_gem_dmabuf.c    | 15 +++++----------
 drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c |  5 +++--
 drivers/gpu/drm/udl/udl_fb.c              |  4 ++--
 drivers/staging/android/ion/ion.c         |  6 ++++--
 include/linux/dma-buf.h                   |  6 +++---
 6 files changed, 28 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 9810d1df0691..774a60f4309a 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -259,6 +259,7 @@ static long dma_buf_ioctl(struct file *file,
 	struct dma_buf *dmabuf;
 	struct dma_buf_sync sync;
 	enum dma_data_direction direction;
+	int ret;
 
 	dmabuf = file->private_data;
 
@@ -285,11 +286,11 @@ static long dma_buf_ioctl(struct file *file,
 		}
 
 		if (sync.flags & DMA_BUF_SYNC_END)
-			dma_buf_end_cpu_access(dmabuf, direction);
+			ret = dma_buf_end_cpu_access(dmabuf, direction);
 		else
-			dma_buf_begin_cpu_access(dmabuf, direction);
+			ret = dma_buf_begin_cpu_access(dmabuf, direction);
 
-		return 0;
+		return ret;
 	default:
 		return -ENOTTY;
 	}
@@ -613,13 +614,17 @@ EXPORT_SYMBOL_GPL(dma_buf_begin_cpu_access);
  *
  * This call must always succeed.
  */
-void dma_buf_end_cpu_access(struct dma_buf *dmabuf,
-			    enum dma_data_direction direction)
+int dma_buf_end_cpu_access(struct dma_buf *dmabuf,
+			   enum dma_data_direction direction)
 {
+	int ret = 0;
+
 	WARN_ON(!dmabuf);
 
 	if (dmabuf->ops->end_cpu_access)
-		dmabuf->ops->end_cpu_access(dmabuf, direction);
+		ret = dmabuf->ops->end_cpu_access(dmabuf, direction);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 1f3eef6fb345..0506016e18e0 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -228,25 +228,20 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
 	return ret;
 }
 
-static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
+static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	bool was_interruptible;
 	int ret;
 
-	mutex_lock(&dev->struct_mutex);
-	was_interruptible = dev_priv->mm.interruptible;
-	dev_priv->mm.interruptible = false;
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ret;
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, false);
-
-	dev_priv->mm.interruptible = was_interruptible;
 	mutex_unlock(&dev->struct_mutex);
 
-	if (unlikely(ret))
-		DRM_ERROR("unable to flush buffer following CPU access; rendering may be corrupt\n");
+	return ret;
 }
 
 static const struct dma_buf_ops i915_dmabuf_ops =  {
diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index aebae1c2dab2..c75249de2984 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -93,11 +93,12 @@ static int omap_gem_dmabuf_begin_cpu_access(struct dma_buf *buffer,
 	return omap_gem_get_pages(obj, &pages, true);
 }
 
-static void omap_gem_dmabuf_end_cpu_access(struct dma_buf *buffer,
-		enum dma_data_direction dir)
+static int omap_gem_dmabuf_end_cpu_access(struct dma_buf *buffer,
+					  enum dma_data_direction dir)
 {
 	struct drm_gem_object *obj = buffer->priv;
 	omap_gem_put_pages(obj);
+	return 0;
 }
 
 
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index c427499133d6..33239a2b264a 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -423,8 +423,8 @@ static int udl_user_framebuffer_dirty(struct drm_framebuffer *fb,
 	}
 
 	if (ufb->obj->base.import_attach) {
-		dma_buf_end_cpu_access(ufb->obj->base.import_attach->dmabuf,
-				       DMA_FROM_DEVICE);
+		ret = dma_buf_end_cpu_access(ufb->obj->base.import_attach->dmabuf,
+					     DMA_FROM_DEVICE);
 	}
 
  unlock:
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 0754a37c9674..49436b4510f4 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -1075,14 +1075,16 @@ static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
 	return PTR_ERR_OR_ZERO(vaddr);
 }
 
-static void ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
-				       enum dma_data_direction direction)
+static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
+				      enum dma_data_direction direction)
 {
 	struct ion_buffer *buffer = dmabuf->priv;
 
 	mutex_lock(&buffer->lock);
 	ion_buffer_kmap_put(buffer);
 	mutex_unlock(&buffer->lock);
+
+	return 0;
 }
 
 static struct dma_buf_ops dma_buf_ops = {
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 532108ea0c1c..3fe90d494edb 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -94,7 +94,7 @@ struct dma_buf_ops {
 	void (*release)(struct dma_buf *);
 
 	int (*begin_cpu_access)(struct dma_buf *, enum dma_data_direction);
-	void (*end_cpu_access)(struct dma_buf *, enum dma_data_direction);
+	int (*end_cpu_access)(struct dma_buf *, enum dma_data_direction);
 	void *(*kmap_atomic)(struct dma_buf *, unsigned long);
 	void (*kunmap_atomic)(struct dma_buf *, unsigned long, void *);
 	void *(*kmap)(struct dma_buf *, unsigned long);
@@ -224,8 +224,8 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
 				enum dma_data_direction);
 int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 			     enum dma_data_direction dir);
-void dma_buf_end_cpu_access(struct dma_buf *dma_buf,
-			    enum dma_data_direction dir);
+int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
+			   enum dma_data_direction dir);
 void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long);
 void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *);
 void *dma_buf_kmap(struct dma_buf *, unsigned long);
-- 
cgit v1.2.3


From 897bb0c7f1ea82d7cc882b19790b5e1df00ffc29 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 19 Mar 2016 11:30:33 +0100
Subject: blk-mq: Use proper cpumask iterator

queue_for_each_ctx() iterates over per_cpu variables under the assumption that
the possible cpu mask cannot have holes. That's wrong as all cpumasks can have
holes. In case there are holes the iteration ends up accessing uninitialized
memory and crashing as a result.

Replace the macro by a proper for_each_possible_cpu() loop and drop the unused
macro blk_ctx_sum() which references queue_for_each_ctx().

Reported-by: Xiong Zhou <jencce.kernel@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sysfs.c   |  6 ++++--
 block/blk-mq.c         |  3 ++-
 include/linux/blk-mq.h | 14 --------------
 3 files changed, 6 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 431fdda21737..4ea4dd8a1eed 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -416,12 +416,14 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
 static void blk_mq_sysfs_init(struct request_queue *q)
 {
 	struct blk_mq_ctx *ctx;
-	int i;
+	int cpu;
 
 	kobject_init(&q->mq_kobj, &blk_mq_ktype);
 
-	queue_for_each_ctx(q, ctx, i)
+	for_each_possible_cpu(cpu) {
+		ctx = per_cpu_ptr(q->queue_ctx, cpu);
 		kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
+	}
 }
 
 int blk_mq_register_disk(struct gendisk *disk)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 050f7a13021b..1699baf39b78 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1798,11 +1798,12 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 	/*
 	 * Map software to hardware queues
 	 */
-	queue_for_each_ctx(q, ctx, i) {
+	for_each_possible_cpu(i) {
 		/* If the cpu isn't online, the cpu is mapped to first hctx */
 		if (!cpumask_test_cpu(i, online_mask))
 			continue;
 
+		ctx = per_cpu_ptr(q->queue_ctx, i);
 		hctx = q->mq_ops->map_queue(q, i);
 
 		cpumask_set_cpu(i, hctx->cpumask);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 15a73d49fd1d..9ac9799b702b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -263,22 +263,8 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
 	for ((i) = 0; (i) < (q)->nr_hw_queues &&			\
 	     ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
 
-#define queue_for_each_ctx(q, ctx, i)					\
-	for ((i) = 0; (i) < (q)->nr_queues &&				\
-	     ({ ctx = per_cpu_ptr((q)->queue_ctx, (i)); 1; }); (i)++)
-
 #define hctx_for_each_ctx(hctx, ctx, i)					\
 	for ((i) = 0; (i) < (hctx)->nr_ctx &&				\
 	     ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)
 
-#define blk_ctx_sum(q, sum)						\
-({									\
-	struct blk_mq_ctx *__x;						\
-	unsigned int __ret = 0, __i;					\
-									\
-	queue_for_each_ctx((q), __x, __i)				\
-		__ret += sum;						\
-	__ret;								\
-})
-
 #endif
-- 
cgit v1.2.3


From fac8e0f579695a3ecbc4d3cac369139d7f819971 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@kernel.org>
Date: Sat, 19 Mar 2016 09:32:01 -0700
Subject: tunnels: Don't apply GRO to multiple layers of encapsulation.

When drivers express support for TSO of encapsulated packets, they
only mean that they can do it for one layer of encapsulation.
Supporting additional levels would mean updating, at a minimum,
more IP length fields and they are unaware of this.

No encapsulation device expresses support for handling offloaded
encapsulated packets, so we won't generate these types of frames
in the transmit path. However, GRO doesn't have a check for
multiple levels of encapsulation and will attempt to build them.

UDP tunnel GRO actually does prevent this situation but it only
handles multiple UDP tunnels stacked on top of each other. This
generalizes that solution to prevent any kind of tunnel stacking
that would cause problems.

Fixes: bf5a755f ("net-gre-gro: Add GRE support to the GRO stack")
Signed-off-by: Jesse Gross <jesse@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++--
 net/core/dev.c            |  2 +-
 net/ipv4/af_inet.c        | 15 ++++++++++++++-
 net/ipv4/gre_offload.c    |  5 +++++
 net/ipv4/udp_offload.c    |  6 +++---
 net/ipv6/ip6_offload.c    | 15 ++++++++++++++-
 6 files changed, 39 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index be693b34662f..f9eebd518545 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2096,8 +2096,8 @@ struct napi_gro_cb {
 	/* This is non-zero if the packet may be of the same flow. */
 	u8	same_flow:1;
 
-	/* Used in udp_gro_receive */
-	u8	udp_mark:1;
+	/* Used in tunnel GRO receive */
+	u8	encap_mark:1;
 
 	/* GRO checksum is valid */
 	u8	csum_valid:1;
diff --git a/net/core/dev.c b/net/core/dev.c
index edb7179bc051..43c74cad25bc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4438,7 +4438,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		NAPI_GRO_CB(skb)->same_flow = 0;
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
-		NAPI_GRO_CB(skb)->udp_mark = 0;
+		NAPI_GRO_CB(skb)->encap_mark = 0;
 		NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
 
 		/* Setup for GRO checksum validation */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 965923325535..0fefba64ee79 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1380,6 +1380,19 @@ out:
 	return pp;
 }
 
+static struct sk_buff **ipip_gro_receive(struct sk_buff **head,
+					 struct sk_buff *skb)
+{
+	if (NAPI_GRO_CB(skb)->encap_mark) {
+		NAPI_GRO_CB(skb)->flush = 1;
+		return NULL;
+	}
+
+	NAPI_GRO_CB(skb)->encap_mark = 1;
+
+	return inet_gro_receive(head, skb);
+}
+
 #define SECONDS_PER_DAY	86400
 
 /* inet_current_timestamp - Return IP network timestamp
@@ -1682,7 +1695,7 @@ static struct packet_offload ip_packet_offload __read_mostly = {
 static const struct net_offload ipip_offload = {
 	.callbacks = {
 		.gso_segment	= inet_gso_segment,
-		.gro_receive	= inet_gro_receive,
+		.gro_receive	= ipip_gro_receive,
 		.gro_complete	= ipip_gro_complete,
 	},
 };
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 540866dbd27d..dd031617160a 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -126,6 +126,11 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
 	struct packet_offload *ptype;
 	__be16 type;
 
+	if (NAPI_GRO_CB(skb)->encap_mark)
+		goto out;
+
+	NAPI_GRO_CB(skb)->encap_mark = 1;
+
 	off = skb_gro_offset(skb);
 	hlen = off + sizeof(*greh);
 	greh = skb_gro_header_fast(skb, off);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 8a3405a80260..8007f73b8981 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -311,14 +311,14 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
 	unsigned int off = skb_gro_offset(skb);
 	int flush = 1;
 
-	if (NAPI_GRO_CB(skb)->udp_mark ||
+	if (NAPI_GRO_CB(skb)->encap_mark ||
 	    (skb->ip_summed != CHECKSUM_PARTIAL &&
 	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
 	     !NAPI_GRO_CB(skb)->csum_valid))
 		goto out;
 
-	/* mark that this skb passed once through the udp gro layer */
-	NAPI_GRO_CB(skb)->udp_mark = 1;
+	/* mark that this skb passed once through the tunnel gro layer */
+	NAPI_GRO_CB(skb)->encap_mark = 1;
 
 	rcu_read_lock();
 	uo_priv = rcu_dereference(udp_offload_base);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index eeca943f12dc..82e9f3076028 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -258,6 +258,19 @@ out:
 	return pp;
 }
 
+static struct sk_buff **sit_gro_receive(struct sk_buff **head,
+					struct sk_buff *skb)
+{
+	if (NAPI_GRO_CB(skb)->encap_mark) {
+		NAPI_GRO_CB(skb)->flush = 1;
+		return NULL;
+	}
+
+	NAPI_GRO_CB(skb)->encap_mark = 1;
+
+	return ipv6_gro_receive(head, skb);
+}
+
 static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
 {
 	const struct net_offload *ops;
@@ -302,7 +315,7 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
 static const struct net_offload sit_offload = {
 	.callbacks = {
 		.gso_segment	= ipv6_gso_segment,
-		.gro_receive    = ipv6_gro_receive,
+		.gro_receive    = sit_gro_receive,
 		.gro_complete   = sit_gro_complete,
 	},
 };
-- 
cgit v1.2.3


From ebf918cf9922748b9d4d9bf6a2620530cdc5a1d0 Mon Sep 17 00:00:00 2001
From: Tina Ruchandani <ruchandani.tina@gmail.com>
Date: Sat, 19 Mar 2016 11:21:14 -0700
Subject: isdn: Use ktime_t instead of 'struct timeval'

'struct timeval' uses 32-bit representation for seconds which will
overflow in year 2038 and beyond. mISDN/clock.c needs to compute and
store elapsed time in intervals of 125 microseconds. This patch replaces
the usage of 'struct timeval' with 64-bit ktime_t which is y2038 safe.
The patch also replaces do_gettimeofday() (wall-clock time) with
ktime_get() (monotonic time) since we only care about elapsed time here.

Signed-off-by: Tina Ruchandani <ruchandani.tina@gmail.com>
Suggested-by: Arnd Bergmnann <arnd@arndb.de>
Suggested-by: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/mISDN/clock.c | 69 +++++++++++++++++++---------------------------
 include/linux/mISDNif.h    |  2 +-
 2 files changed, 30 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/mISDN/clock.c b/drivers/isdn/mISDN/clock.c
index 693fb7c9b59a..f8f659f1ce1b 100644
--- a/drivers/isdn/mISDN/clock.c
+++ b/drivers/isdn/mISDN/clock.c
@@ -37,6 +37,7 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
+#include <linux/ktime.h>
 #include <linux/mISDNif.h>
 #include <linux/export.h>
 #include "core.h"
@@ -45,15 +46,15 @@ static u_int *debug;
 static LIST_HEAD(iclock_list);
 static DEFINE_RWLOCK(iclock_lock);
 static u16 iclock_count;		/* counter of last clock */
-static struct timeval iclock_tv;	/* time stamp of last clock */
-static int iclock_tv_valid;		/* already received one timestamp */
+static ktime_t iclock_timestamp;	/* time stamp of last clock */
+static int iclock_timestamp_valid;	/* already received one timestamp */
 static struct mISDNclock *iclock_current;
 
 void
 mISDN_init_clock(u_int *dp)
 {
 	debug = dp;
-	do_gettimeofday(&iclock_tv);
+	iclock_timestamp = ktime_get();
 }
 
 static void
@@ -86,7 +87,7 @@ select_iclock(void)
 	}
 	if (bestclock != iclock_current) {
 		/* no clock received yet */
-		iclock_tv_valid = 0;
+		iclock_timestamp_valid = 0;
 	}
 	iclock_current = bestclock;
 }
@@ -139,12 +140,11 @@ mISDN_unregister_clock(struct mISDNclock *iclock)
 EXPORT_SYMBOL(mISDN_unregister_clock);
 
 void
-mISDN_clock_update(struct mISDNclock *iclock, int samples, struct timeval *tv)
+mISDN_clock_update(struct mISDNclock *iclock, int samples, ktime_t *timestamp)
 {
 	u_long		flags;
-	struct timeval	tv_now;
-	time_t		elapsed_sec;
-	int		elapsed_8000th;
+	ktime_t		timestamp_now;
+	u16		delta;
 
 	write_lock_irqsave(&iclock_lock, flags);
 	if (iclock_current != iclock) {
@@ -156,33 +156,27 @@ mISDN_clock_update(struct mISDNclock *iclock, int samples, struct timeval *tv)
 		write_unlock_irqrestore(&iclock_lock, flags);
 		return;
 	}
-	if (iclock_tv_valid) {
+	if (iclock_timestamp_valid) {
 		/* increment sample counter by given samples */
 		iclock_count += samples;
-		if (tv) { /* tv must be set, if function call is delayed */
-			iclock_tv.tv_sec = tv->tv_sec;
-			iclock_tv.tv_usec = tv->tv_usec;
-		} else
-			do_gettimeofday(&iclock_tv);
+		if (timestamp) { /* timestamp must be set, if function call is delayed */
+			iclock_timestamp = *timestamp;
+		} else	{
+			iclock_timestamp = ktime_get();
+		}
 	} else {
 		/* calc elapsed time by system clock */
-		if (tv) { /* tv must be set, if function call is delayed */
-			tv_now.tv_sec = tv->tv_sec;
-			tv_now.tv_usec = tv->tv_usec;
-		} else
-			do_gettimeofday(&tv_now);
-		elapsed_sec = tv_now.tv_sec - iclock_tv.tv_sec;
-		elapsed_8000th = (tv_now.tv_usec / 125)
-			- (iclock_tv.tv_usec / 125);
-		if (elapsed_8000th < 0) {
-			elapsed_sec -= 1;
-			elapsed_8000th += 8000;
+		if (timestamp) { /* timestamp must be set, if function call is delayed */
+			timestamp_now = *timestamp;
+		} else {
+			timestamp_now = ktime_get();
 		}
+		delta = ktime_divns(ktime_sub(timestamp_now, iclock_timestamp),
+				(NSEC_PER_SEC / 8000));
 		/* add elapsed time to counter and set new timestamp */
-		iclock_count += elapsed_sec * 8000 + elapsed_8000th;
-		iclock_tv.tv_sec = tv_now.tv_sec;
-		iclock_tv.tv_usec = tv_now.tv_usec;
-		iclock_tv_valid = 1;
+		iclock_count += delta;
+		iclock_timestamp = timestamp_now;
+		iclock_timestamp_valid = 1;
 		if (*debug & DEBUG_CLOCK)
 			printk("Received first clock from source '%s'.\n",
 			       iclock_current ? iclock_current->name : "nothing");
@@ -195,22 +189,17 @@ unsigned short
 mISDN_clock_get(void)
 {
 	u_long		flags;
-	struct timeval	tv_now;
-	time_t		elapsed_sec;
-	int		elapsed_8000th;
+	ktime_t		timestamp_now;
+	u16		delta;
 	u16		count;
 
 	read_lock_irqsave(&iclock_lock, flags);
 	/* calc elapsed time by system clock */
-	do_gettimeofday(&tv_now);
-	elapsed_sec = tv_now.tv_sec - iclock_tv.tv_sec;
-	elapsed_8000th = (tv_now.tv_usec / 125) - (iclock_tv.tv_usec / 125);
-	if (elapsed_8000th < 0) {
-		elapsed_sec -= 1;
-		elapsed_8000th += 8000;
-	}
+	timestamp_now = ktime_get();
+	delta = ktime_divns(ktime_sub(timestamp_now, iclock_timestamp),
+			(NSEC_PER_SEC / 8000));
 	/* add elapsed time to counter */
-	count =	iclock_count + elapsed_sec * 8000 + elapsed_8000th;
+	count =	iclock_count + delta;
 	read_unlock_irqrestore(&iclock_lock, flags);
 	return count;
 }
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 246a3529ecf6..ac02c54520e9 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -596,7 +596,7 @@ static inline struct mISDNdevice *dev_to_mISDN(struct device *dev)
 }
 
 extern void	set_channel_address(struct mISDNchannel *, u_int, u_int);
-extern void	mISDN_clock_update(struct mISDNclock *, int, struct timeval *);
+extern void	mISDN_clock_update(struct mISDNclock *, int, ktime_t *);
 extern unsigned short mISDN_clock_get(void);
 extern const char *mISDNDevName4ch(struct mISDNchannel *);
 
-- 
cgit v1.2.3


From b47bcb93bbf201e9c5af698945755efeb60c0bc8 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 29 Feb 2016 10:53:11 -0500
Subject: dma-buf/fence: fix fence_is_later v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A fence is never later than itself. This caused a bunch of overhead for AMDGPU.

v2: simplify check as suggested by Michel.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 include/linux/fence.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fence.h b/include/linux/fence.h
index bb522011383b..5aa95eb886f7 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -292,7 +292,7 @@ static inline bool fence_is_later(struct fence *f1, struct fence *f2)
 	if (WARN_ON(f1->context != f2->context))
 		return false;
 
-	return f1->seqno - f2->seqno < INT_MAX;
+	return (int)(f1->seqno - f2->seqno) > 0;
 }
 
 /**
-- 
cgit v1.2.3


From aace66b170ce7feda2d1860a81eefff37fa9d1d2 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Wed, 16 Mar 2016 19:23:14 -0500
Subject: mailbox: Introduce TI message manager driver

Support for TI Message Manager Module. This hardware block manages a
bunch of hardware queues meant for communication between processor
entities.

Clients sitting on top of this would manage the required protocol
for communicating with the counterpart entities.

For more details on TI Message Manager hardware block, see documentation
that will is available here: http://www.ti.com/lit/ug/spruhy8/spruhy8.pdf
Chapter 8.1(Message Manager)

Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/Kconfig          |  11 +
 drivers/mailbox/Makefile         |   2 +
 drivers/mailbox/ti-msgmgr.c      | 639 +++++++++++++++++++++++++++++++++++++++
 include/linux/soc/ti/ti-msgmgr.h |  35 +++
 4 files changed, 687 insertions(+)
 create mode 100644 drivers/mailbox/ti-msgmgr.c
 create mode 100644 include/linux/soc/ti/ti-msgmgr.h

(limited to 'include/linux')

diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index 09719843ad4a..5305923752d2 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -87,6 +87,17 @@ config STI_MBOX
 	  Mailbox implementation for STMicroelectonics family chips with
 	  hardware for interprocessor communication.
 
+config TI_MESSAGE_MANAGER
+	tristate "Texas Instruments Message Manager Driver"
+	depends on ARCH_KEYSTONE
+	help
+	  An implementation of Message Manager slave driver for Keystone
+	  architecture SoCs from Texas Instruments. Message Manager is a
+	  communication entity found on few of Texas Instrument's keystone
+	  architecture SoCs. These may be used for communication between
+	  multiple processors within the SoC. Select this driver if your
+	  platform has support for the hardware block.
+
 config HI6220_MBOX
 	tristate "Hi6220 Mailbox"
 	depends on ARCH_HISI
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 948671733aa7..0be3e742bb7d 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -20,6 +20,8 @@ obj-$(CONFIG_BCM2835_MBOX)	+= bcm2835-mailbox.o
 
 obj-$(CONFIG_STI_MBOX)		+= mailbox-sti.o
 
+obj-$(CONFIG_TI_MESSAGE_MANAGER) += ti-msgmgr.o
+
 obj-$(CONFIG_XGENE_SLIMPRO_MBOX) += mailbox-xgene-slimpro.o
 
 obj-$(CONFIG_HI6220_MBOX)	+= hi6220-mailbox.o
diff --git a/drivers/mailbox/ti-msgmgr.c b/drivers/mailbox/ti-msgmgr.c
new file mode 100644
index 000000000000..54b9e4cb4cfa
--- /dev/null
+++ b/drivers/mailbox/ti-msgmgr.c
@@ -0,0 +1,639 @@
+/*
+ * Texas Instruments' Message Manager Driver
+ *
+ * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/
+ *	Nishanth Menon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mailbox_controller.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/soc/ti/ti-msgmgr.h>
+
+#define Q_DATA_OFFSET(proxy, queue, reg)	\
+		     ((0x10000 * (proxy)) + (0x80 * (queue)) + ((reg) * 4))
+#define Q_STATE_OFFSET(queue)			((queue) * 0x4)
+#define Q_STATE_ENTRY_COUNT_MASK		(0xFFF000)
+
+/**
+ * struct ti_msgmgr_valid_queue_desc - SoC valid queues meant for this processor
+ * @queue_id:	Queue Number for this path
+ * @proxy_id:	Proxy ID representing the processor in SoC
+ * @is_tx:	Is this a receive path?
+ */
+struct ti_msgmgr_valid_queue_desc {
+	u8 queue_id;
+	u8 proxy_id;
+	bool is_tx;
+};
+
+/**
+ * struct ti_msgmgr_desc - Description of message manager integration
+ * @queue_count:	Number of Queues
+ * @max_message_size:	Message size in bytes
+ * @max_messages:	Number of messages
+ * @q_slices:		Number of queue engines
+ * @q_proxies:		Number of queue proxies per page
+ * @data_first_reg:	First data register for proxy data region
+ * @data_last_reg:	Last data register for proxy data region
+ * @tx_polled:		Do I need to use polled mechanism for tx
+ * @tx_poll_timeout_ms: Timeout in ms if polled
+ * @valid_queues:	List of Valid queues that the processor can access
+ * @num_valid_queues:	Number of valid queues
+ *
+ * This structure is used in of match data to describe how integration
+ * for a specific compatible SoC is done.
+ */
+struct ti_msgmgr_desc {
+	u8 queue_count;
+	u8 max_message_size;
+	u8 max_messages;
+	u8 q_slices;
+	u8 q_proxies;
+	u8 data_first_reg;
+	u8 data_last_reg;
+	bool tx_polled;
+	int tx_poll_timeout_ms;
+	const struct ti_msgmgr_valid_queue_desc *valid_queues;
+	int num_valid_queues;
+};
+
+/**
+ * struct ti_queue_inst - Description of a queue instance
+ * @name:	Queue Name
+ * @queue_id:	Queue Identifier as mapped on SoC
+ * @proxy_id:	Proxy Identifier as mapped on SoC
+ * @irq:	IRQ for Rx Queue
+ * @is_tx:	'true' if transmit queue, else, 'false'
+ * @queue_buff_start: First register of Data Buffer
+ * @queue_buff_end: Last (or confirmation) register of Data buffer
+ * @queue_state: Queue status register
+ * @chan:	Mailbox channel
+ * @rx_buff:	Receive buffer pointer allocated at probe, max_message_size
+ */
+struct ti_queue_inst {
+	char name[30];
+	u8 queue_id;
+	u8 proxy_id;
+	int irq;
+	bool is_tx;
+	void __iomem *queue_buff_start;
+	void __iomem *queue_buff_end;
+	void __iomem *queue_state;
+	struct mbox_chan *chan;
+	u32 *rx_buff;
+};
+
+/**
+ * struct ti_msgmgr_inst - Description of a Message Manager Instance
+ * @dev:	device pointer corresponding to the Message Manager instance
+ * @desc:	Description of the SoC integration
+ * @queue_proxy_region:	Queue proxy region where queue buffers are located
+ * @queue_state_debug_region:	Queue status register regions
+ * @num_valid_queues:	Number of valid queues defined for the processor
+ *		Note: other queues are probably reserved for other processors
+ *		in the SoC.
+ * @qinsts:	Array of valid Queue Instances for the Processor
+ * @mbox:	Mailbox Controller
+ * @chans:	Array for channels corresponding to the Queue Instances.
+ */
+struct ti_msgmgr_inst {
+	struct device *dev;
+	const struct ti_msgmgr_desc *desc;
+	void __iomem *queue_proxy_region;
+	void __iomem *queue_state_debug_region;
+	u8 num_valid_queues;
+	struct ti_queue_inst *qinsts;
+	struct mbox_controller mbox;
+	struct mbox_chan *chans;
+};
+
+/**
+ * ti_msgmgr_queue_get_num_messages() - Get the number of pending messages
+ * @qinst:	Queue instance for which we check the number of pending messages
+ *
+ * Return: number of messages pending in the queue (0 == no pending messages)
+ */
+static inline int ti_msgmgr_queue_get_num_messages(struct ti_queue_inst *qinst)
+{
+	u32 val;
+
+	/*
+	 * We cannot use relaxed operation here - update may happen
+	 * real-time.
+	 */
+	val = readl(qinst->queue_state) & Q_STATE_ENTRY_COUNT_MASK;
+	val >>= __ffs(Q_STATE_ENTRY_COUNT_MASK);
+
+	return val;
+}
+
+/**
+ * ti_msgmgr_queue_rx_interrupt() - Interrupt handler for receive Queue
+ * @irq:	Interrupt number
+ * @p:		Channel Pointer
+ *
+ * Return: -EINVAL if there is no instance
+ * IRQ_NONE if the interrupt is not ours.
+ * IRQ_HANDLED if the rx interrupt was successfully handled.
+ */
+static irqreturn_t ti_msgmgr_queue_rx_interrupt(int irq, void *p)
+{
+	struct mbox_chan *chan = p;
+	struct device *dev = chan->mbox->dev;
+	struct ti_msgmgr_inst *inst = dev_get_drvdata(dev);
+	struct ti_queue_inst *qinst = chan->con_priv;
+	const struct ti_msgmgr_desc *desc;
+	int msg_count, num_words;
+	struct ti_msgmgr_message message;
+	void __iomem *data_reg;
+	u32 *word_data;
+
+	if (WARN_ON(!inst)) {
+		dev_err(dev, "no platform drv data??\n");
+		return -EINVAL;
+	}
+
+	/* Do I have an invalid interrupt source? */
+	if (qinst->is_tx) {
+		dev_err(dev, "Cannot handle rx interrupt on tx channel %s\n",
+			qinst->name);
+		return IRQ_NONE;
+	}
+
+	/* Do I actually have messages to read? */
+	msg_count = ti_msgmgr_queue_get_num_messages(qinst);
+	if (!msg_count) {
+		/* Shared IRQ? */
+		dev_dbg(dev, "Spurious event - 0 pending data!\n");
+		return IRQ_NONE;
+	}
+
+	/*
+	 * I have no idea about the protocol being used to communicate with the
+	 * remote producer - 0 could be valid data, so I wont make a judgement
+	 * of how many bytes I should be reading. Let the client figure this
+	 * out.. I just read the full message and pass it on..
+	 */
+	desc = inst->desc;
+	message.len = desc->max_message_size;
+	message.buf = (u8 *)qinst->rx_buff;
+
+	/*
+	 * NOTE about register access involved here:
+	 * the hardware block is implemented with 32bit access operations and no
+	 * support for data splitting.  We don't want the hardware to misbehave
+	 * with sub 32bit access - For example: if the last register read is
+	 * split into byte wise access, it can result in the queue getting
+	 * stuck or indeterminate behavior. An out of order read operation may
+	 * result in weird data results as well.
+	 * Hence, we do not use memcpy_fromio or __ioread32_copy here, instead
+	 * we depend on readl for the purpose.
+	 *
+	 * Also note that the final register read automatically marks the
+	 * queue message as read.
+	 */
+	for (data_reg = qinst->queue_buff_start, word_data = qinst->rx_buff,
+	     num_words = (desc->max_message_size / sizeof(u32));
+	     num_words; num_words--, data_reg += sizeof(u32), word_data++)
+		*word_data = readl(data_reg);
+
+	/*
+	 * Last register read automatically clears the IRQ if only 1 message
+	 * is pending - so send the data up the stack..
+	 * NOTE: Client is expected to be as optimal as possible, since
+	 * we invoke the handler in IRQ context.
+	 */
+	mbox_chan_received_data(chan, (void *)&message);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ti_msgmgr_queue_peek_data() - Peek to see if there are any rx messages.
+ * @chan:	Channel Pointer
+ *
+ * Return: 'true' if there is pending rx data, 'false' if there is none.
+ */
+static bool ti_msgmgr_queue_peek_data(struct mbox_chan *chan)
+{
+	struct ti_queue_inst *qinst = chan->con_priv;
+	int msg_count;
+
+	if (qinst->is_tx)
+		return false;
+
+	msg_count = ti_msgmgr_queue_get_num_messages(qinst);
+
+	return msg_count ? true : false;
+}
+
+/**
+ * ti_msgmgr_last_tx_done() - See if all the tx messages are sent
+ * @chan:	Channel pointer
+ *
+ * Return: 'true' is no pending tx data, 'false' if there are any.
+ */
+static bool ti_msgmgr_last_tx_done(struct mbox_chan *chan)
+{
+	struct ti_queue_inst *qinst = chan->con_priv;
+	int msg_count;
+
+	if (!qinst->is_tx)
+		return false;
+
+	msg_count = ti_msgmgr_queue_get_num_messages(qinst);
+
+	/* if we have any messages pending.. */
+	return msg_count ? false : true;
+}
+
+/**
+ * ti_msgmgr_send_data() - Send data
+ * @chan:	Channel Pointer
+ * @data:	ti_msgmgr_message * Message Pointer
+ *
+ * Return: 0 if all goes good, else appropriate error messages.
+ */
+static int ti_msgmgr_send_data(struct mbox_chan *chan, void *data)
+{
+	struct device *dev = chan->mbox->dev;
+	struct ti_msgmgr_inst *inst = dev_get_drvdata(dev);
+	const struct ti_msgmgr_desc *desc;
+	struct ti_queue_inst *qinst = chan->con_priv;
+	int num_words, trail_bytes;
+	struct ti_msgmgr_message *message = data;
+	void __iomem *data_reg;
+	u32 *word_data;
+
+	if (WARN_ON(!inst)) {
+		dev_err(dev, "no platform drv data??\n");
+		return -EINVAL;
+	}
+	desc = inst->desc;
+
+	if (desc->max_message_size < message->len) {
+		dev_err(dev, "Queue %s message length %d > max %d\n",
+			qinst->name, message->len, desc->max_message_size);
+		return -EINVAL;
+	}
+
+	/* NOTE: Constraints similar to rx path exists here as well */
+	for (data_reg = qinst->queue_buff_start,
+	     num_words = message->len / sizeof(u32),
+	     word_data = (u32 *)message->buf;
+	     num_words; num_words--, data_reg += sizeof(u32), word_data++)
+		writel(*word_data, data_reg);
+
+	trail_bytes = message->len % sizeof(u32);
+	if (trail_bytes) {
+		u32 data_trail = *word_data;
+
+		/* Ensure all unused data is 0 */
+		data_trail &= 0xFFFFFFFF >> (8 * (sizeof(u32) - trail_bytes));
+		writel(data_trail, data_reg);
+		data_reg++;
+	}
+	/*
+	 * 'data_reg' indicates next register to write. If we did not already
+	 * write on tx complete reg(last reg), we must do so for transmit
+	 */
+	if (data_reg <= qinst->queue_buff_end)
+		writel(0, qinst->queue_buff_end);
+
+	return 0;
+}
+
+/**
+ * ti_msgmgr_queue_startup() - Startup queue
+ * @chan:	Channel pointer
+ *
+ * Return: 0 if all goes good, else return corresponding error message
+ */
+static int ti_msgmgr_queue_startup(struct mbox_chan *chan)
+{
+	struct ti_queue_inst *qinst = chan->con_priv;
+	struct device *dev = chan->mbox->dev;
+	int ret;
+
+	if (!qinst->is_tx) {
+		/*
+		 * With the expectation that the IRQ might be shared in SoC
+		 */
+		ret = request_irq(qinst->irq, ti_msgmgr_queue_rx_interrupt,
+				  IRQF_SHARED, qinst->name, chan);
+		if (ret) {
+			dev_err(dev, "Unable to get IRQ %d on %s(res=%d)\n",
+				qinst->irq, qinst->name, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ti_msgmgr_queue_shutdown() - Shutdown the queue
+ * @chan:	Channel pointer
+ */
+static void ti_msgmgr_queue_shutdown(struct mbox_chan *chan)
+{
+	struct ti_queue_inst *qinst = chan->con_priv;
+
+	if (!qinst->is_tx)
+		free_irq(qinst->irq, chan);
+}
+
+/**
+ * ti_msgmgr_of_xlate() - Translation of phandle to queue
+ * @mbox:	Mailbox controller
+ * @p:		phandle pointer
+ *
+ * Return: Mailbox channel corresponding to the queue, else return error
+ * pointer.
+ */
+static struct mbox_chan *ti_msgmgr_of_xlate(struct mbox_controller *mbox,
+					    const struct of_phandle_args *p)
+{
+	struct ti_msgmgr_inst *inst;
+	int req_qid, req_pid;
+	struct ti_queue_inst *qinst;
+	int i;
+
+	inst = container_of(mbox, struct ti_msgmgr_inst, mbox);
+	if (WARN_ON(!inst))
+		return ERR_PTR(-EINVAL);
+
+	/* #mbox-cells is 2 */
+	if (p->args_count != 2) {
+		dev_err(inst->dev, "Invalid arguments in dt[%d] instead of 2\n",
+			p->args_count);
+		return ERR_PTR(-EINVAL);
+	}
+	req_qid = p->args[0];
+	req_pid = p->args[1];
+
+	for (qinst = inst->qinsts, i = 0; i < inst->num_valid_queues;
+	     i++, qinst++) {
+		if (req_qid == qinst->queue_id && req_pid == qinst->proxy_id)
+			return qinst->chan;
+	}
+
+	dev_err(inst->dev, "Queue ID %d, Proxy ID %d is wrong on %s\n",
+		req_qid, req_pid, p->np->name);
+	return ERR_PTR(-ENOENT);
+}
+
+/**
+ * ti_msgmgr_queue_setup() - Setup data structures for each queue instance
+ * @idx:	index of the queue
+ * @dev:	pointer to the message manager device
+ * @np:		pointer to the of node
+ * @inst:	Queue instance pointer
+ * @d:		Message Manager instance description data
+ * @qd:		Queue description data
+ * @qinst:	Queue instance pointer
+ * @chan:	pointer to mailbox channel
+ *
+ * Return: 0 if all went well, else return corresponding error
+ */
+static int ti_msgmgr_queue_setup(int idx, struct device *dev,
+				 struct device_node *np,
+				 struct ti_msgmgr_inst *inst,
+				 const struct ti_msgmgr_desc *d,
+				 const struct ti_msgmgr_valid_queue_desc *qd,
+				 struct ti_queue_inst *qinst,
+				 struct mbox_chan *chan)
+{
+	qinst->proxy_id = qd->proxy_id;
+	qinst->queue_id = qd->queue_id;
+
+	if (qinst->queue_id > d->queue_count) {
+		dev_err(dev, "Queue Data [idx=%d] queuid %d > %d\n",
+			idx, qinst->queue_id, d->queue_count);
+		return -ERANGE;
+	}
+
+	qinst->is_tx = qd->is_tx;
+	snprintf(qinst->name, sizeof(qinst->name), "%s %s_%03d_%03d",
+		 dev_name(dev), qinst->is_tx ? "tx" : "rx", qinst->queue_id,
+		 qinst->proxy_id);
+
+	if (!qinst->is_tx) {
+		char of_rx_irq_name[7];
+
+		snprintf(of_rx_irq_name, sizeof(of_rx_irq_name),
+			 "rx_%03d", qinst->queue_id);
+
+		qinst->irq = of_irq_get_byname(np, of_rx_irq_name);
+		if (qinst->irq < 0) {
+			dev_crit(dev,
+				 "[%d]QID %d PID %d:No IRQ[%s]: %d\n",
+				 idx, qinst->queue_id, qinst->proxy_id,
+				 of_rx_irq_name, qinst->irq);
+			return qinst->irq;
+		}
+		/* Allocate usage buffer for rx */
+		qinst->rx_buff = devm_kzalloc(dev,
+					      d->max_message_size, GFP_KERNEL);
+		if (!qinst->rx_buff)
+			return -ENOMEM;
+	}
+
+	qinst->queue_buff_start = inst->queue_proxy_region +
+	    Q_DATA_OFFSET(qinst->proxy_id, qinst->queue_id, d->data_first_reg);
+	qinst->queue_buff_end = inst->queue_proxy_region +
+	    Q_DATA_OFFSET(qinst->proxy_id, qinst->queue_id, d->data_last_reg);
+	qinst->queue_state = inst->queue_state_debug_region +
+	    Q_STATE_OFFSET(qinst->queue_id);
+	qinst->chan = chan;
+
+	chan->con_priv = qinst;
+
+	dev_dbg(dev, "[%d] qidx=%d pidx=%d irq=%d q_s=%p q_e = %p\n",
+		idx, qinst->queue_id, qinst->proxy_id, qinst->irq,
+		qinst->queue_buff_start, qinst->queue_buff_end);
+	return 0;
+}
+
+/* Queue operations */
+static const struct mbox_chan_ops ti_msgmgr_chan_ops = {
+	.startup = ti_msgmgr_queue_startup,
+	.shutdown = ti_msgmgr_queue_shutdown,
+	.peek_data = ti_msgmgr_queue_peek_data,
+	.last_tx_done = ti_msgmgr_last_tx_done,
+	.send_data = ti_msgmgr_send_data,
+};
+
+/* Keystone K2G SoC integration details */
+static const struct ti_msgmgr_valid_queue_desc k2g_valid_queues[] = {
+	{.queue_id = 0, .proxy_id = 0, .is_tx = true,},
+	{.queue_id = 1, .proxy_id = 0, .is_tx = true,},
+	{.queue_id = 2, .proxy_id = 0, .is_tx = true,},
+	{.queue_id = 3, .proxy_id = 0, .is_tx = true,},
+	{.queue_id = 5, .proxy_id = 2, .is_tx = false,},
+	{.queue_id = 56, .proxy_id = 1, .is_tx = true,},
+	{.queue_id = 57, .proxy_id = 2, .is_tx = false,},
+	{.queue_id = 58, .proxy_id = 3, .is_tx = true,},
+	{.queue_id = 59, .proxy_id = 4, .is_tx = true,},
+	{.queue_id = 60, .proxy_id = 5, .is_tx = true,},
+	{.queue_id = 61, .proxy_id = 6, .is_tx = true,},
+};
+
+static const struct ti_msgmgr_desc k2g_desc = {
+	.queue_count = 64,
+	.max_message_size = 64,
+	.max_messages = 128,
+	.q_slices = 1,
+	.q_proxies = 1,
+	.data_first_reg = 16,
+	.data_last_reg = 31,
+	.tx_polled = false,
+	.valid_queues = k2g_valid_queues,
+	.num_valid_queues = ARRAY_SIZE(k2g_valid_queues),
+};
+
+static const struct of_device_id ti_msgmgr_of_match[] = {
+	{.compatible = "ti,k2g-message-manager", .data = &k2g_desc},
+	{ /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, ti_msgmgr_of_match);
+
+static int ti_msgmgr_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct of_device_id *of_id;
+	struct device_node *np;
+	struct resource *res;
+	const struct ti_msgmgr_desc *desc;
+	struct ti_msgmgr_inst *inst;
+	struct ti_queue_inst *qinst;
+	struct mbox_controller *mbox;
+	struct mbox_chan *chans;
+	int queue_count;
+	int i;
+	int ret = -EINVAL;
+	const struct ti_msgmgr_valid_queue_desc *queue_desc;
+
+	if (!dev->of_node) {
+		dev_err(dev, "no OF information\n");
+		return -EINVAL;
+	}
+	np = dev->of_node;
+
+	of_id = of_match_device(ti_msgmgr_of_match, dev);
+	if (!of_id) {
+		dev_err(dev, "OF data missing\n");
+		return -EINVAL;
+	}
+	desc = of_id->data;
+
+	inst = devm_kzalloc(dev, sizeof(*inst), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	inst->dev = dev;
+	inst->desc = desc;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "queue_proxy_region");
+	inst->queue_proxy_region = devm_ioremap_resource(dev, res);
+	if (IS_ERR(inst->queue_proxy_region))
+		return PTR_ERR(inst->queue_proxy_region);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "queue_state_debug_region");
+	inst->queue_state_debug_region = devm_ioremap_resource(dev, res);
+	if (IS_ERR(inst->queue_state_debug_region))
+		return PTR_ERR(inst->queue_state_debug_region);
+
+	dev_dbg(dev, "proxy region=%p, queue_state=%p\n",
+		inst->queue_proxy_region, inst->queue_state_debug_region);
+
+	queue_count = desc->num_valid_queues;
+	if (!queue_count || queue_count > desc->queue_count) {
+		dev_crit(dev, "Invalid Number of queues %d. Max %d\n",
+			 queue_count, desc->queue_count);
+		return -ERANGE;
+	}
+	inst->num_valid_queues = queue_count;
+
+	qinst = devm_kzalloc(dev, sizeof(*qinst) * queue_count, GFP_KERNEL);
+	if (!qinst)
+		return -ENOMEM;
+	inst->qinsts = qinst;
+
+	chans = devm_kzalloc(dev, sizeof(*chans) * queue_count, GFP_KERNEL);
+	if (!chans)
+		return -ENOMEM;
+	inst->chans = chans;
+
+	for (i = 0, queue_desc = desc->valid_queues;
+	     i < queue_count; i++, qinst++, chans++, queue_desc++) {
+		ret = ti_msgmgr_queue_setup(i, dev, np, inst,
+					    desc, queue_desc, qinst, chans);
+		if (ret)
+			return ret;
+	}
+
+	mbox = &inst->mbox;
+	mbox->dev = dev;
+	mbox->ops = &ti_msgmgr_chan_ops;
+	mbox->chans = inst->chans;
+	mbox->num_chans = inst->num_valid_queues;
+	mbox->txdone_irq = false;
+	mbox->txdone_poll = desc->tx_polled;
+	if (desc->tx_polled)
+		mbox->txpoll_period = desc->tx_poll_timeout_ms;
+	mbox->of_xlate = ti_msgmgr_of_xlate;
+
+	platform_set_drvdata(pdev, inst);
+	ret = mbox_controller_register(mbox);
+	if (ret)
+		dev_err(dev, "Failed to register mbox_controller(%d)\n", ret);
+
+	return ret;
+}
+
+static int ti_msgmgr_remove(struct platform_device *pdev)
+{
+	struct ti_msgmgr_inst *inst;
+
+	inst = platform_get_drvdata(pdev);
+	mbox_controller_unregister(&inst->mbox);
+
+	return 0;
+}
+
+static struct platform_driver ti_msgmgr_driver = {
+	.probe = ti_msgmgr_probe,
+	.remove = ti_msgmgr_remove,
+	.driver = {
+		   .name = "ti-msgmgr",
+		   .of_match_table = of_match_ptr(ti_msgmgr_of_match),
+	},
+};
+module_platform_driver(ti_msgmgr_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("TI message manager driver");
+MODULE_AUTHOR("Nishanth Menon");
+MODULE_ALIAS("platform:ti-msgmgr");
diff --git a/include/linux/soc/ti/ti-msgmgr.h b/include/linux/soc/ti/ti-msgmgr.h
new file mode 100644
index 000000000000..eac8e0c6fe11
--- /dev/null
+++ b/include/linux/soc/ti/ti-msgmgr.h
@@ -0,0 +1,35 @@
+/*
+ * Texas Instruments' Message Manager
+ *
+ * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/
+ *	Nishanth Menon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef TI_MSGMGR_H
+#define TI_MSGMGR_H
+
+/**
+ * struct ti_msgmgr_message - Message Manager structure
+ * @len: Length of data in the Buffer
+ * @buf: Buffer pointer
+ *
+ * This is the structure for data used in mbox_send_message
+ * the length of data buffer used depends on the SoC integration
+ * parameters - each message may be 64, 128 bytes long depending
+ * on SoC. Client is supposed to be aware of this.
+ */
+struct ti_msgmgr_message {
+	size_t len;
+	u8 *buf;
+};
+
+#endif /* TI_MSGMGR_H */
-- 
cgit v1.2.3


From b06e7de8a9d8d1d540ec122bbdf2face2a211634 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 23 Feb 2016 10:25:22 +0200
Subject: net/mlx5_core: Refactor device capability function

Device capability function was called similar in all places.
It was called twice for every queried parameter, while the
difference between calls was in HCA capability mode only.

The change proposed unify these calls into one function.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw.c   | 55 ++++----------------------
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 28 +++++++------
 include/linux/mlx5/driver.h                    |  3 +-
 3 files changed, 24 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index aa1ab4702385..fe6dfd8697f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -98,88 +98,49 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 {
 	int err;
 
-	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR);
-	if (err)
-		return err;
-
-	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX);
+	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
 	if (err)
 		return err;
 
 	if (MLX5_CAP_GEN(dev, eth_net_offloads)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, pg)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ODP,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ODP,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, atomic)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, roce)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, nic_flow_table)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, vport_group_manager) &&
 	    MLX5_CAP_GEN(dev, eswitch_flow_table)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH);
 		if (err)
 			return err;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 1d43855d9f32..f2354bc0ec19 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -341,8 +341,9 @@ static u16 to_fw_pkey_sz(u32 size)
 	}
 }
 
-int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type,
-		       enum mlx5_cap_mode cap_mode)
+static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
+				   enum mlx5_cap_type cap_type,
+				   enum mlx5_cap_mode cap_mode)
 {
 	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
 	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
@@ -392,6 +393,16 @@ query_ex:
 	return err;
 }
 
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
+{
+	int ret;
+
+	ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
+	if (ret)
+		return ret;
+	return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
+}
+
 static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
 {
 	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
@@ -419,12 +430,7 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
 	int err;
 
 	if (MLX5_CAP_GEN(dev, atomic)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
 		if (err)
 			return err;
 	} else {
@@ -466,11 +472,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 	if (!set_ctx)
 		goto query_ex;
 
-	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX);
-	if (err)
-		goto query_ex;
-
-	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR);
+	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
 	if (err)
 		goto query_ex;
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 9108904a6a56..406b27ec1d42 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -706,8 +706,7 @@ void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
 int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
 int mlx5_cmd_status_to_err_v2(void *ptr);
-int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type,
-		       enum mlx5_cap_mode cap_mode);
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 		  int out_size);
 int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
-- 
cgit v1.2.3


From 3f0393a57509c200aeecc5e3984bf1a47bffc578 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Tue, 23 Feb 2016 10:25:23 +0200
Subject: net/mlx5_core: Introduce offload arithmetic hardware capabilities

Define the necessary hardware structures for the offload
arithmetic capabilities and read/cache them on driver load.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw.c |  6 ++++++
 include/linux/mlx5/device.h                  |  6 ++++++
 include/linux/mlx5/mlx5_ifc.h                | 31 +++++++++++++++++++++++++++-
 3 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index fe6dfd8697f4..75c7ae6a5cc4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -145,6 +145,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 			return err;
 	}
 
+	if (MLX5_CAP_GEN(dev, vector_calc)) {
+		err = mlx5_core_get_caps(dev, MLX5_CAP_VECTOR_CALC);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 9566b3b3b2c5..4b531c44b3c7 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1219,6 +1219,8 @@ enum mlx5_cap_type {
 	MLX5_CAP_FLOW_TABLE,
 	MLX5_CAP_ESWITCH_FLOW_TABLE,
 	MLX5_CAP_ESWITCH,
+	MLX5_CAP_RESERVED,
+	MLX5_CAP_VECTOR_CALC,
 	/* NUM OF CAP Types */
 	MLX5_CAP_NUM
 };
@@ -1281,6 +1283,10 @@ enum mlx5_cap_type {
 #define MLX5_CAP_ODP(mdev, cap)\
 	MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap)
 
+#define MLX5_CAP_VECTOR_CALC(mdev, cap) \
+	MLX5_GET(vector_calc_cap, \
+		 mdev->hca_caps_cur[MLX5_CAP_VECTOR_CALC], cap)
+
 enum {
 	MLX5_CMD_STAT_OK			= 0x0,
 	MLX5_CMD_STAT_INT_ERR			= 0x1,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index a3cacab22849..ad0a44b1592c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -616,6 +616,33 @@ struct mlx5_ifc_odp_cap_bits {
 	u8         reserved_at_e0[0x720];
 };
 
+struct mlx5_ifc_calc_op {
+	u8        reserved_at_0[0x10];
+	u8        reserved_at_10[0x9];
+	u8        op_swap_endianness[0x1];
+	u8        op_min[0x1];
+	u8        op_xor[0x1];
+	u8        op_or[0x1];
+	u8        op_and[0x1];
+	u8        op_max[0x1];
+	u8        op_add[0x1];
+};
+
+struct mlx5_ifc_vector_calc_cap_bits {
+	u8         calc_matrix[0x1];
+	u8         reserved_at_1[0x1f];
+	u8         reserved_at_20[0x8];
+	u8         max_vec_count[0x8];
+	u8         reserved_at_30[0xd];
+	u8         max_chunk_size[0x3];
+	struct mlx5_ifc_calc_op calc0;
+	struct mlx5_ifc_calc_op calc1;
+	struct mlx5_ifc_calc_op calc2;
+	struct mlx5_ifc_calc_op calc3;
+
+	u8         reserved_at_e0[0x720];
+};
+
 enum {
 	MLX5_WQ_TYPE_LINKED_LIST  = 0x0,
 	MLX5_WQ_TYPE_CYCLIC       = 0x1,
@@ -770,7 +797,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         cd[0x1];
 	u8         reserved_at_22c[0x1];
 	u8         apm[0x1];
-	u8         reserved_at_22e[0x2];
+	u8         vector_calc[0x1];
+	u8         reserved_at_22f[0x1];
 	u8	   imaicl[0x1];
 	u8         reserved_at_231[0x4];
 	u8         qkv[0x1];
@@ -1940,6 +1968,7 @@ union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap;
 	struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
 	struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
+	struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap;
 	u8         reserved_at_0[0x8000];
 };
 
-- 
cgit v1.2.3


From cc8e27cc97318471b7e707932d5b93b0d5f70830 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Fri, 11 Mar 2016 22:58:34 +0200
Subject: net/core: Add support for configuring VF GUIDs

Add two new NLAs to support configuration of Infiniband node or port
GUIDs. New applications can choose to use this interface to configure
GUIDs with iproute2 with commands such as:

ip link set dev ib0 vf 0 node_guid 00:02:c9:03:00:21:6e:70
ip link set dev ib0 vf 0 port_guid 00:02:c9:03:00:21:6e:78

A new ndo, ndo_sef_vf_guid is introduced to notify the net device of the
request to change the GUID.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/netdevice.h    |  3 +++
 include/uapi/linux/if_link.h |  7 +++++++
 net/core/rtnetlink.c         | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5440b7b705eb..7b4ae218b90b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1147,6 +1147,9 @@ struct net_device_ops {
 						   struct nlattr *port[]);
 	int			(*ndo_get_vf_port)(struct net_device *dev,
 						   int vf, struct sk_buff *skb);
+	int			(*ndo_set_vf_guid)(struct net_device *dev,
+						   int vf, u64 guid,
+						   int guid_type);
 	int			(*ndo_set_vf_rss_query_en)(
 						   struct net_device *dev,
 						   int vf, bool setting);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index a30b78090594..1d01e8a4e5dd 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -556,6 +556,8 @@ enum {
 				 */
 	IFLA_VF_STATS,		/* network device statistics */
 	IFLA_VF_TRUST,		/* Trust VF */
+	IFLA_VF_IB_NODE_GUID,	/* VF Infiniband node GUID */
+	IFLA_VF_IB_PORT_GUID,	/* VF Infiniband port GUID */
 	__IFLA_VF_MAX,
 };
 
@@ -588,6 +590,11 @@ struct ifla_vf_spoofchk {
 	__u32 setting;
 };
 
+struct ifla_vf_guid {
+	__u32 vf;
+	__u64 guid;
+};
+
 enum {
 	IFLA_VF_LINK_STATE_AUTO,	/* link state of the uplink */
 	IFLA_VF_LINK_STATE_ENABLE,	/* link always up */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d735e854f916..4b6f3db9f8af 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1387,6 +1387,8 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 	[IFLA_VF_RSS_QUERY_EN]	= { .len = sizeof(struct ifla_vf_rss_query_en) },
 	[IFLA_VF_STATS]		= { .type = NLA_NESTED },
 	[IFLA_VF_TRUST]		= { .len = sizeof(struct ifla_vf_trust) },
+	[IFLA_VF_IB_NODE_GUID]	= { .len = sizeof(struct ifla_vf_guid) },
+	[IFLA_VF_IB_PORT_GUID]	= { .len = sizeof(struct ifla_vf_guid) },
 };
 
 static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
@@ -1534,6 +1536,22 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 	return 0;
 }
 
+static int handle_infiniband_guid(struct net_device *dev, struct ifla_vf_guid *ivt,
+				  int guid_type)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	return ops->ndo_set_vf_guid(dev, ivt->vf, ivt->guid, guid_type);
+}
+
+static int handle_vf_guid(struct net_device *dev, struct ifla_vf_guid *ivt, int guid_type)
+{
+	if (dev->type != ARPHRD_INFINIBAND)
+		return -EOPNOTSUPP;
+
+	return handle_infiniband_guid(dev, ivt, guid_type);
+}
+
 static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
@@ -1636,6 +1654,24 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
 			return err;
 	}
 
+	if (tb[IFLA_VF_IB_NODE_GUID]) {
+		struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]);
+
+		if (!ops->ndo_set_vf_guid)
+			return -EOPNOTSUPP;
+
+		return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID);
+	}
+
+	if (tb[IFLA_VF_IB_PORT_GUID]) {
+		struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]);
+
+		if (!ops->ndo_set_vf_guid)
+			return -EOPNOTSUPP;
+
+		return handle_vf_guid(dev, ivt, IFLA_VF_IB_PORT_GUID);
+	}
+
 	return err;
 }
 
-- 
cgit v1.2.3


From 2a4826fe746e01712ca53902ee75c1a1f6c0a4aa Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Fri, 11 Mar 2016 22:58:40 +0200
Subject: net/mlx5_core: Add VF param when querying vport counter

Add a vf parameter to mlx5_core_query_vport_counter so we can call it to
query counters of virtual functions. Also update current users of the
API.

PFs may call mlx5_core_query_vport_counter with other_vport set to
indicate that they are querying a virtual function. The virtual
function to be queried is given by the vf parameter. Virtual function
numbering is zero based so the first VF is 0 and so on. When a PF
queries its own function, the other_vport parameter is cleared.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/mad.c                | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/vport.c | 5 +++--
 include/linux/mlx5/vport.h                      | 3 ++-
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 41d8a0036465..1534af113058 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -208,7 +208,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
 		if (!out_cnt)
 			return IB_MAD_RESULT_FAILURE;
 
-		err = mlx5_core_query_vport_counter(dev->mdev, 0,
+		err = mlx5_core_query_vport_counter(dev->mdev, 0, 0,
 						    port_num, out_cnt, sz);
 		if (!err)
 			pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 90ab09e375b8..2b836d0b4738 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -852,7 +852,8 @@ int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
 EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
 
 int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
-				  u8 port_num, void *out, size_t out_sz)
+				  int vf, u8 port_num, void *out,
+				  size_t out_sz)
 {
 	int	in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in);
 	int	is_group_manager;
@@ -871,7 +872,7 @@ int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
 	if (other_vport) {
 		if (is_group_manager) {
 			MLX5_SET(query_vport_counter_in, in, other_vport, 1);
-			MLX5_SET(query_vport_counter_in, in, vport_number, 0);
+			MLX5_SET(query_vport_counter_in, in, vport_number, vf + 1);
 		} else {
 			err = -EPERM;
 			goto free;
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index a9f2bcc98cab..aafb3e48b5f8 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -93,6 +93,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
 int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
 int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
 int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
-				  u8 port_num, void *out, size_t out_sz);
+				  int vf, u8 port_num, void *out,
+				  size_t out_sz);
 
 #endif /* __MLX5_VPORT_H__ */
-- 
cgit v1.2.3


From 1f324bff9ba3db276f074169d5b4af9e9c117ba1 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Fri, 11 Mar 2016 22:58:41 +0200
Subject: net/mlx5_core: Implement modify HCA vport command

Implement the modify HCA vport commands used to modify the parameters of
virtual HCA's ports.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c   |  6 +++
 drivers/net/ethernet/mellanox/mlx5/core/vport.c | 67 +++++++++++++++++++++++++
 include/linux/mlx5/vport.h                      |  4 ++
 3 files changed, 77 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 037fc4cdf5af..ebb4036b98e5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -407,6 +407,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 const char *mlx5_command_str(int command)
 {
 	switch (command) {
+	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+		return "QUERY_HCA_VPORT_CONTEXT";
+
+	case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT:
+		return "MODIFY_HCA_VPORT_CONTEXT";
+
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
 		return "QUERY_HCA_CAP";
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 2b836d0b4738..bd518405859e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -891,3 +891,70 @@ free:
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
+
+int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
+				       u8 other_vport, u8 port_num,
+				       int vf,
+				       struct mlx5_hca_vport_context *req)
+{
+	int in_sz = MLX5_ST_SZ_BYTES(modify_hca_vport_context_in);
+	u8 out[MLX5_ST_SZ_BYTES(modify_hca_vport_context_out)];
+	int is_group_manager;
+	void *in;
+	int err;
+	void *ctx;
+
+	mlx5_core_dbg(dev, "vf %d\n", vf);
+	is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+	in = kzalloc(in_sz, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	memset(out, 0, sizeof(out));
+	MLX5_SET(modify_hca_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT);
+	if (other_vport) {
+		if (is_group_manager) {
+			MLX5_SET(modify_hca_vport_context_in, in, other_vport, 1);
+			MLX5_SET(modify_hca_vport_context_in, in, vport_number, vf);
+		} else {
+			err = -EPERM;
+			goto ex;
+		}
+	}
+
+	if (MLX5_CAP_GEN(dev, num_ports) > 1)
+		MLX5_SET(modify_hca_vport_context_in, in, port_num, port_num);
+
+	ctx = MLX5_ADDR_OF(modify_hca_vport_context_in, in, hca_vport_context);
+	MLX5_SET(hca_vport_context, ctx, field_select, req->field_select);
+	MLX5_SET(hca_vport_context, ctx, sm_virt_aware, req->sm_virt_aware);
+	MLX5_SET(hca_vport_context, ctx, has_smi, req->has_smi);
+	MLX5_SET(hca_vport_context, ctx, has_raw, req->has_raw);
+	MLX5_SET(hca_vport_context, ctx, vport_state_policy, req->policy);
+	MLX5_SET(hca_vport_context, ctx, port_physical_state, req->phys_state);
+	MLX5_SET(hca_vport_context, ctx, vport_state, req->vport_state);
+	MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid);
+	MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid);
+	MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1);
+	MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select, req->cap_mask1_perm);
+	MLX5_SET(hca_vport_context, ctx, cap_mask2, req->cap_mask2);
+	MLX5_SET(hca_vport_context, ctx, cap_mask2_field_select, req->cap_mask2_perm);
+	MLX5_SET(hca_vport_context, ctx, lid, req->lid);
+	MLX5_SET(hca_vport_context, ctx, init_type_reply, req->init_type_reply);
+	MLX5_SET(hca_vport_context, ctx, lmc, req->lmc);
+	MLX5_SET(hca_vport_context, ctx, subnet_timeout, req->subnet_timeout);
+	MLX5_SET(hca_vport_context, ctx, sm_lid, req->sm_lid);
+	MLX5_SET(hca_vport_context, ctx, sm_sl, req->sm_sl);
+	MLX5_SET(hca_vport_context, ctx, qkey_violation_counter, req->qkey_violation_counter);
+	MLX5_SET(hca_vport_context, ctx, pkey_violation_counter, req->pkey_violation_counter);
+	err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+	if (err)
+		goto ex;
+
+	err = mlx5_cmd_status_to_err_v2(out);
+
+ex:
+	kfree(in);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context);
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index aafb3e48b5f8..bd93e6323603 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -95,5 +95,9 @@ int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
 int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
 				  int vf, u8 port_num, void *out,
 				  size_t out_sz);
+int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
+				       u8 other_vport, u8 port_num,
+				       int vf,
+				       struct mlx5_hca_vport_context *req);
 
 #endif /* __MLX5_VPORT_H__ */
-- 
cgit v1.2.3


From eff901d30e6cebd940072637f112ce4d0090ac12 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Fri, 11 Mar 2016 22:58:42 +0200
Subject: IB/mlx5: Implement callbacks for manipulating VFs

Implement the IB defined callbacks used to manipulate the policy for the
link state, set GUIDs or get statistics information. This functionality
is added into a new file that will be used to add any SRIOV related
functionality to the mlx5 IB layer.

The following callbacks have been added:

mlx5_ib_get_vf_config
mlx5_ib_set_vf_link_state
mlx5_ib_get_vf_stats
mlx5_ib_set_vf_guid

In addition, publish whether this device is based on a virtual function.

In mlx5 supported devices, virtual functions are implemented as vHCAs.
vHCAs have their own QP number space so it is possible that two vHCAs
will use a QP with the same number at the same time.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/Makefile  |   2 +-
 drivers/infiniband/hw/mlx5/ib_virt.c | 194 +++++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/main.c    |  10 ++
 drivers/infiniband/hw/mlx5/mlx5_ib.h |   8 ++
 include/linux/mlx5/driver.h          |   5 +-
 include/linux/mlx5/mlx5_ifc.h        |   6 ++
 6 files changed, 223 insertions(+), 2 deletions(-)
 create mode 100644 drivers/infiniband/hw/mlx5/ib_virt.c

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 4e851889355a..7493a83acd28 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
 
-mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o
+mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
new file mode 100644
index 000000000000..c1b9de800fe5
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_ib.h"
+
+static inline u32 mlx_to_net_policy(enum port_state_policy mlx_policy)
+{
+	switch (mlx_policy) {
+	case MLX5_POLICY_DOWN:
+		return IFLA_VF_LINK_STATE_DISABLE;
+	case MLX5_POLICY_UP:
+		return IFLA_VF_LINK_STATE_ENABLE;
+	case MLX5_POLICY_FOLLOW:
+		return IFLA_VF_LINK_STATE_AUTO;
+	default:
+		return __IFLA_VF_LINK_STATE_MAX;
+	}
+}
+
+int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+			  struct ifla_vf_info *info)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *rep;
+	int err;
+
+	rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+	if (!rep)
+		return -ENOMEM;
+
+	err = mlx5_query_hca_vport_context(mdev, 1, 1,  vf + 1, rep);
+	if (err) {
+		mlx5_ib_warn(dev, "failed to query port policy for vf %d (%d)\n",
+			     vf, err);
+		goto free;
+	}
+	memset(info, 0, sizeof(*info));
+	info->linkstate = mlx_to_net_policy(rep->policy);
+	if (info->linkstate == __IFLA_VF_LINK_STATE_MAX)
+		err = -EINVAL;
+
+free:
+	kfree(rep);
+	return err;
+}
+
+static inline enum port_state_policy net_to_mlx_policy(int policy)
+{
+	switch (policy) {
+	case IFLA_VF_LINK_STATE_DISABLE:
+		return MLX5_POLICY_DOWN;
+	case IFLA_VF_LINK_STATE_ENABLE:
+		return MLX5_POLICY_UP;
+	case IFLA_VF_LINK_STATE_AUTO:
+		return MLX5_POLICY_FOLLOW;
+	default:
+		return MLX5_POLICY_INVALID;
+	}
+}
+
+int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
+			      u8 port, int state)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *in;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	in->policy = net_to_mlx_policy(state);
+	if (in->policy == MLX5_POLICY_INVALID) {
+		err = -EINVAL;
+		goto out;
+	}
+	in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY;
+	err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+
+out:
+	kfree(in);
+	return err;
+}
+
+int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
+			 u8 port, struct ifla_vf_stats *stats)
+{
+	int out_sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+	struct mlx5_core_dev *mdev;
+	struct mlx5_ib_dev *dev;
+	void *out;
+	int err;
+
+	dev = to_mdev(device);
+	mdev = dev->mdev;
+
+	out = kzalloc(out_sz, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	err = mlx5_core_query_vport_counter(mdev, true, vf, port, out, out_sz);
+	if (err)
+		goto ex;
+
+	stats->rx_packets = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_unicast.packets);
+	stats->tx_packets = MLX5_GET64_PR(query_vport_counter_out, out, transmitted_ib_unicast.packets);
+	stats->rx_bytes = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_unicast.octets);
+	stats->tx_bytes = MLX5_GET64_PR(query_vport_counter_out, out, transmitted_ib_unicast.octets);
+	stats->multicast = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_multicast.packets);
+
+ex:
+	kfree(out);
+	return err;
+}
+
+static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *in;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID;
+	in->node_guid = guid;
+	err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+	kfree(in);
+	return err;
+}
+
+static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *in;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID;
+	in->port_guid = guid;
+	err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+	kfree(in);
+	return err;
+}
+
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+			u64 guid, int type)
+{
+	if (type == IFLA_VF_IB_NODE_GUID)
+		return set_vf_node_guid(device, vf, port, guid);
+	else if (type == IFLA_VF_IB_PORT_GUID)
+		return set_vf_port_guid(device, vf, port, guid);
+
+	return -EINVAL;
+}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 73cb6337d856..e305990b73f6 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -562,6 +562,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	if (MLX5_CAP_GEN(mdev, cd))
 		props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
 
+	if (!mlx5_core_is_pf(mdev))
+		props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
+
 	return 0;
 }
 
@@ -699,6 +702,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
 	props->qkey_viol_cntr	= rep->qkey_violation_counter;
 	props->subnet_timeout	= rep->subnet_timeout;
 	props->init_type_reply	= rep->init_type_reply;
+	props->grh_required	= rep->grh_required;
 
 	err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
 	if (err)
@@ -2349,6 +2353,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	dev->ib_dev.map_mr_sg		= mlx5_ib_map_mr_sg;
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
 	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
+	if (mlx5_core_is_pf(mdev)) {
+		dev->ib_dev.get_vf_config	= mlx5_ib_get_vf_config;
+		dev->ib_dev.set_vf_link_state	= mlx5_ib_set_vf_link_state;
+		dev->ib_dev.get_vf_stats	= mlx5_ib_get_vf_stats;
+		dev->ib_dev.set_vf_guid		= mlx5_ib_set_vf_guid;
+	}
 
 	mlx5_ib_internal_fill_odp_caps(dev);
 
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 76b2b42e0535..f16c818ad2e6 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -776,6 +776,14 @@ void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
 void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
 void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
 			      unsigned long end);
+int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
+			  u8 port, struct ifla_vf_info *info);
+int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
+			      u8 port, int state);
+int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
+			 u8 port, struct ifla_vf_stats *stats);
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+			u64 guid, int type);
 
 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 406b27ec1d42..e1d987fb49b2 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -613,7 +613,10 @@ struct mlx5_pas {
 };
 
 enum port_state_policy {
-	MLX5_AAA_000
+	MLX5_POLICY_DOWN	= 0,
+	MLX5_POLICY_UP		= 1,
+	MLX5_POLICY_FOLLOW	= 2,
+	MLX5_POLICY_INVALID	= 0xffffffff
 };
 
 enum phy_port_state {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ad0a44b1592c..bb9e07ca6534 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -3696,6 +3696,12 @@ struct mlx5_ifc_query_hca_vport_pkey_in_bits {
 	u8         pkey_index[0x10];
 };
 
+enum {
+	MLX5_HCA_VPORT_SEL_PORT_GUID	= 1 << 0,
+	MLX5_HCA_VPORT_SEL_NODE_GUID	= 1 << 1,
+	MLX5_HCA_VPORT_SEL_STATE_POLICY	= 1 << 2,
+};
+
 struct mlx5_ifc_query_hca_vport_gid_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
-- 
cgit v1.2.3


From afc54992296a5e7f7d2e41456ed90789b01a4e7b Mon Sep 17 00:00:00 2001
From: Allen Hubbe <Allen.Hubbe@emc.com>
Date: Mon, 21 Mar 2016 04:53:13 -0400
Subject: NTB: Make _addr functions optional in the API

The functions ntb_peer_db_addr and ntb_peer_spad_addr were required by
the api.  The functions already support returning an error, so any
existing calling code should already check for it.  Any existing code
using drivers that implement the functions will not be affected.

Signed-off-by: Allen Hubbe <Allen.Hubbe@emc.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 include/linux/ntb.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index f798e2afba88..6f47562d477b 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -284,7 +284,7 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		/* ops->db_read_mask			&& */
 		ops->db_set_mask			&&
 		ops->db_clear_mask			&&
-		ops->peer_db_addr			&&
+		/* ops->peer_db_addr			&& */
 		/* ops->peer_db_read			&& */
 		ops->peer_db_set			&&
 		/* ops->peer_db_clear			&& */
@@ -295,7 +295,7 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		ops->spad_count				&&
 		ops->spad_read				&&
 		ops->spad_write				&&
-		ops->peer_spad_addr			&&
+		/* ops->peer_spad_addr			&& */
 		/* ops->peer_spad_read			&& */
 		ops->peer_spad_write			&&
 		1;
@@ -757,6 +757,9 @@ static inline int ntb_peer_db_addr(struct ntb_dev *ntb,
 				   phys_addr_t *db_addr,
 				   resource_size_t *db_size)
 {
+	if (!ntb->ops->peer_db_addr)
+		return -EINVAL;
+
 	return ntb->ops->peer_db_addr(ntb, db_addr, db_size);
 }
 
@@ -948,6 +951,9 @@ static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val)
 static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
 				     phys_addr_t *spad_addr)
 {
+	if (!ntb->ops->peer_spad_addr)
+		return -EINVAL;
+
 	return ntb->ops->peer_spad_addr(ntb, idx, spad_addr);
 }
 
-- 
cgit v1.2.3


From 6d0e24cd07f7e97015d958299fda535e459d2c99 Mon Sep 17 00:00:00 2001
From: Luis de Bethencourt <luisbg@osg.samsung.com>
Date: Mon, 21 Mar 2016 20:58:28 +0000
Subject: net: add missing descriptions in net_device_priv_flags

The flags IFF_XMIT_DST_RELEASE_PERM, IFF_IPVLAN_MASTER and
IFF_IPVLAN_SLAVE are missing descriptions for the Documentation. Adding
them.

Signed-off-by: Luis de Bethencourt <luisbg@osg.samsung.com>
Suggested-by: Benjamin Poirier <benjamin.poirier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f9eebd518545..fd07734fb277 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1320,6 +1320,10 @@ struct net_device_ops {
  * @IFF_LIVE_ADDR_CHANGE: device supports hardware address
  *	change when it's running
  * @IFF_MACVLAN: Macvlan device
+ * @IFF_XMIT_DST_RELEASE_PERM: IFF_XMIT_DST_RELEASE not taking into account
+ *	underlying stacked devices
+ * @IFF_IPVLAN_MASTER: IPvlan master device
+ * @IFF_IPVLAN_SLAVE: IPvlan slave device
  * @IFF_L3MDEV_MASTER: device is an L3 master device
  * @IFF_NO_QUEUE: device can run without qdisc attached
  * @IFF_OPENVSWITCH: device is a Open vSwitch master
-- 
cgit v1.2.3


From 62d885fe73e0032004e756b25101ac6c9ebf85dd Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Mon, 21 Mar 2016 14:08:28 -0700
Subject: net: Add missing kernel-doc for netdev ptype lists

.//include/linux/netdevice.h:1826: warning: No description found for parameter 'ptype_all'
.//include/linux/netdevice.h:1826: warning: No description found for parameter 'ptype_specific'

Introduced by commit 7866a621043f ("dev: add per net_device packet type
chains")

Cc: Salam Noureddine <noureddine@arista.com>
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fd07734fb277..3f6385d27b81 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1418,6 +1418,8 @@ enum netdev_priv_flags {
  *	@unreg_list:	List entry, that is used, when we are unregistering the
  *			device, see the function unregister_netdev
  *	@close_list:	List entry, that is used, when we are closing the device
+ *	@ptype_all:     Device-specific packet handlers for all protocols
+ *	@ptype_specific: Device-specific, protocol-specific packet handlers
  *
  *	@adj_list:	Directly linked devices, like slaves for bonding
  *	@all_adj_list:	All linked devices, *including* neighbours
-- 
cgit v1.2.3


From 3debb0a9ddb16526de8b456491b7db60114f7b5e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 22 Mar 2016 17:30:58 -0400
Subject: tracing: Fix trace_printk() to print when not using bprintk()

The trace_printk() code will allocate extra buffers if the compile detects
that a trace_printk() is used. To do this, the format of the trace_printk()
is saved to the __trace_printk_fmt section, and if that section is bigger
than zero, the buffers are allocated (along with a message that this has
happened).

If trace_printk() uses a format that is not a constant, and thus something
not guaranteed to be around when the print happens, the compiler optimizes
the fmt out, as it is not used, and the __trace_printk_fmt section is not
filled. This means the kernel will not allocate the special buffers needed
for the trace_printk() and the trace_printk() will not write anything to the
tracing buffer.

Adding a "__used" to the variable in the __trace_printk_fmt section will
keep it around, even though it is set to NULL. This will keep the string
from being printed in the debugfs/tracing/printk_formats section as it is
not needed.

Reported-by: Vlastimil Babka <vbabka@suse.cz>
Fixes: 07d777fe8c398 "tracing: Add percpu buffers for trace_printk()"
Cc: stable@vger.kernel.org # v3.5+
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kernel.h      | 6 +++---
 kernel/trace/trace_printk.c | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f31638c6e873..95452f72349a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -635,7 +635,7 @@ do {							\
 
 #define do_trace_printk(fmt, args...)					\
 do {									\
-	static const char *trace_printk_fmt				\
+	static const char *trace_printk_fmt __used			\
 		__attribute__((section("__trace_printk_fmt"))) =	\
 		__builtin_constant_p(fmt) ? fmt : NULL;			\
 									\
@@ -679,7 +679,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...);
  */
 
 #define trace_puts(str) ({						\
-	static const char *trace_printk_fmt				\
+	static const char *trace_printk_fmt __used			\
 		__attribute__((section("__trace_printk_fmt"))) =	\
 		__builtin_constant_p(str) ? str : NULL;			\
 									\
@@ -701,7 +701,7 @@ extern void trace_dump_stack(int skip);
 #define ftrace_vprintk(fmt, vargs)					\
 do {									\
 	if (__builtin_constant_p(fmt)) {				\
-		static const char *trace_printk_fmt			\
+		static const char *trace_printk_fmt __used		\
 		  __attribute__((section("__trace_printk_fmt"))) =	\
 			__builtin_constant_p(fmt) ? fmt : NULL;		\
 									\
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 060df67dbdd1..f96f0383f6c6 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -296,6 +296,9 @@ static int t_show(struct seq_file *m, void *v)
 	const char *str = *fmt;
 	int i;
 
+	if (!*fmt)
+		return 0;
+
 	seq_printf(m, "0x%lx : \"", *(unsigned long *)fmt);
 
 	/*
-- 
cgit v1.2.3


From 5180e3e24fd3e8e7ea46fbe21e10f5ea3fb1edaa Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 22 Mar 2016 14:24:43 -0700
Subject: compat: add in_compat_syscall to ask whether we're in a compat
 syscall

A lot of code currently abuses is_compat_task to determine this.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: David Airlie <airlied@linux.ie>
Cc: David Herrmann <dh.herrmann@googlemail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Eric Paris <eparis@redhat.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Oded Gabbay <oded.gabbay@gmail.com>
Cc: Oleg Drokin <oleg.drokin@intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compat.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index fe4ccd0c748a..f964ef79e0ad 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -5,6 +5,8 @@
  * syscall compatibility layer.
  */
 
+#include <linux/types.h>
+
 #ifdef CONFIG_COMPAT
 
 #include <linux/stat.h>
@@ -719,9 +721,22 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
 
 asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
 					    int, const char __user *);
+
+/*
+ * For most but not all architectures, "am I in a compat syscall?" and
+ * "am I a compat task?" are the same question.  For architectures on which
+ * they aren't the same question, arch code can override in_compat_syscall.
+ */
+
+#ifndef in_compat_syscall
+static inline bool in_compat_syscall(void) { return is_compat_task(); }
+#endif
+
 #else
 
 #define is_compat_task() (0)
+static inline bool in_compat_syscall(void) { return false; }
 
 #endif /* CONFIG_COMPAT */
+
 #endif /* _LINUX_COMPAT_H */
-- 
cgit v1.2.3


From 378c6520e7d29280f400ef2ceaf155c86f05a71a Mon Sep 17 00:00:00 2001
From: Jann Horn <jann@thejh.net>
Date: Tue, 22 Mar 2016 14:25:36 -0700
Subject: fs/coredump: prevent fsuid=0 dumps into user-controlled directories

This commit fixes the following security hole affecting systems where
all of the following conditions are fulfilled:

 - The fs.suid_dumpable sysctl is set to 2.
 - The kernel.core_pattern sysctl's value starts with "/". (Systems
   where kernel.core_pattern starts with "|/" are not affected.)
 - Unprivileged user namespace creation is permitted. (This is
   true on Linux >=3.8, but some distributions disallow it by
   default using a distro patch.)

Under these conditions, if a program executes under secure exec rules,
causing it to run with the SUID_DUMP_ROOT flag, then unshares its user
namespace, changes its root directory and crashes, the coredump will be
written using fsuid=0 and a path derived from kernel.core_pattern - but
this path is interpreted relative to the root directory of the process,
allowing the attacker to control where a coredump will be written with
root privileges.

To fix the security issue, always interpret core_pattern for dumps that
are written under SUID_DUMP_ROOT relative to the root directory of init.

Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/drivers/mconsole_kern.c |  2 +-
 fs/coredump.c                   | 30 ++++++++++++++++++++++++++----
 fs/fhandle.c                    |  2 +-
 fs/open.c                       |  6 ++----
 include/linux/fs.h              |  2 +-
 kernel/sysctl_binary.c          |  2 +-
 6 files changed, 32 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index b821b13d343a..8a6b57108ac2 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -133,7 +133,7 @@ void mconsole_proc(struct mc_request *req)
 	ptr += strlen("proc");
 	ptr = skip_spaces(ptr);
 
-	file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
+	file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY, 0);
 	if (IS_ERR(file)) {
 		mconsole_reply(req, "Failed to open file", 1, 0);
 		printk(KERN_ERR "open /proc/%s: %ld\n", ptr, PTR_ERR(file));
diff --git a/fs/coredump.c b/fs/coredump.c
index 9ea87e9fdccf..47c32c3bfa1d 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -32,6 +32,9 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
 #include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/path.h>
 #include <linux/timekeeping.h>
 
 #include <asm/uaccess.h>
@@ -649,6 +652,8 @@ void do_coredump(const siginfo_t *siginfo)
 		}
 	} else {
 		struct inode *inode;
+		int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
+				 O_LARGEFILE | O_EXCL;
 
 		if (cprm.limit < binfmt->min_coredump)
 			goto fail_unlock;
@@ -687,10 +692,27 @@ void do_coredump(const siginfo_t *siginfo)
 		 * what matters is that at least one of the two processes
 		 * writes its coredump successfully, not which one.
 		 */
-		cprm.file = filp_open(cn.corename,
-				 O_CREAT | 2 | O_NOFOLLOW |
-				 O_LARGEFILE | O_EXCL,
-				 0600);
+		if (need_suid_safe) {
+			/*
+			 * Using user namespaces, normal user tasks can change
+			 * their current->fs->root to point to arbitrary
+			 * directories. Since the intention of the "only dump
+			 * with a fully qualified path" rule is to control where
+			 * coredumps may be placed using root privileges,
+			 * current->fs->root must not be used. Instead, use the
+			 * root directory of init_task.
+			 */
+			struct path root;
+
+			task_lock(&init_task);
+			get_fs_root(init_task.fs, &root);
+			task_unlock(&init_task);
+			cprm.file = file_open_root(root.dentry, root.mnt,
+				cn.corename, open_flags, 0600);
+			path_put(&root);
+		} else {
+			cprm.file = filp_open(cn.corename, open_flags, 0600);
+		}
 		if (IS_ERR(cprm.file))
 			goto fail_unlock;
 
diff --git a/fs/fhandle.c b/fs/fhandle.c
index d59712dfa3e7..ca3c3dd01789 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -228,7 +228,7 @@ long do_handle_open(int mountdirfd,
 		path_put(&path);
 		return fd;
 	}
-	file = file_open_root(path.dentry, path.mnt, "", open_flag);
+	file = file_open_root(path.dentry, path.mnt, "", open_flag, 0);
 	if (IS_ERR(file)) {
 		put_unused_fd(fd);
 		retval =  PTR_ERR(file);
diff --git a/fs/open.c b/fs/open.c
index 55bdc75e2172..17cb6b1dab75 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -992,14 +992,12 @@ struct file *filp_open(const char *filename, int flags, umode_t mode)
 EXPORT_SYMBOL(filp_open);
 
 struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
-			    const char *filename, int flags)
+			    const char *filename, int flags, umode_t mode)
 {
 	struct open_flags op;
-	int err = build_open_flags(flags, 0, &op);
+	int err = build_open_flags(flags, mode, &op);
 	if (err)
 		return ERR_PTR(err);
-	if (flags & O_CREAT)
-		return ERR_PTR(-EINVAL);
 	return do_file_open_root(dentry, mnt, filename, &op);
 }
 EXPORT_SYMBOL(file_open_root);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 35d99266ca9a..14a97194b34b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2263,7 +2263,7 @@ extern long do_sys_open(int dfd, const char __user *filename, int flags,
 extern struct file *file_open_name(struct filename *, int, umode_t);
 extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
-				   const char *, int);
+				   const char *, int, umode_t);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
 extern int filp_close(struct file *, fl_owner_t id);
 
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 7e7746a42a62..10a1d7dc9313 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1321,7 +1321,7 @@ static ssize_t binary_sysctl(const int *name, int nlen,
 	}
 
 	mnt = task_active_pid_ns(current)->proc_mnt;
-	file = file_open_root(mnt->mnt_root, mnt, pathname, flags);
+	file = file_open_root(mnt->mnt_root, mnt, pathname, flags, 0);
 	result = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto out_putname;
-- 
cgit v1.2.3


From 95f27356e4a8982257ecae6bc44c650772d77659 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers3@gmail.com>
Date: Tue, 22 Mar 2016 14:25:39 -0700
Subject: cpumask: remove incorrect information from comment

Since commit cdfdef75e795 ("cpumask: only allocate nr_cpumask_bits."),
this comment above cpumask_size() is no longer relevant.

Signed-off-by: Eric Biggers <ebiggers3@gmail.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpumask.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index fc14275ff34e..40cee6b77a93 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -607,8 +607,6 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
 
 /**
  * cpumask_size - size to allocate for a 'struct cpumask' in bytes
- *
- * This will eventually be a runtime variable, depending on nr_cpu_ids.
  */
 static inline size_t cpumask_size(void)
 {
-- 
cgit v1.2.3


From 8b189fdbc5f68f3f43e67004de25f75c1a5b4e51 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Tue, 22 Mar 2016 14:26:00 -0700
Subject: rapidio: add query_mport operation

Add mport query operation to report master port RapidIO capabilities and
run time configuration to upper level drivers.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio.c    | 17 +++++++++++++++++
 include/linux/rio.h      | 49 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rio_regs.h |  3 +++
 3 files changed, 69 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index e220edc85c68..c72f4da8065e 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -67,6 +67,23 @@ u16 rio_local_get_device_id(struct rio_mport *port)
 	return (RIO_GET_DID(port->sys_size, result));
 }
 
+/**
+ * rio_query_mport - Query mport device attributes
+ * @port: mport device to query
+ * @mport_attr: mport attributes data structure
+ *
+ * Returns attributes of specified mport through the
+ * pointer to attributes data structure.
+ */
+int rio_query_mport(struct rio_mport *port,
+		    struct rio_mport_attr *mport_attr)
+{
+	if (!port->ops->query_mport)
+		return -ENODATA;
+	return port->ops->query_mport(port, mport_attr);
+}
+EXPORT_SYMBOL(rio_query_mport);
+
 /**
  * rio_add_device- Adds a RIO device to the device model
  * @rdev: RIO device
diff --git a/include/linux/rio.h b/include/linux/rio.h
index cde976e86b48..8996a629a321 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -314,6 +314,50 @@ struct rio_net {
 	struct rio_id_table destid_table;  /* destID allocation table */
 };
 
+enum rio_link_speed {
+	RIO_LINK_DOWN = 0, /* SRIO Link not initialized */
+	RIO_LINK_125 = 1, /* 1.25 GBaud  */
+	RIO_LINK_250 = 2, /* 2.5 GBaud   */
+	RIO_LINK_312 = 3, /* 3.125 GBaud */
+	RIO_LINK_500 = 4, /* 5.0 GBaud   */
+	RIO_LINK_625 = 5  /* 6.25 GBaud  */
+};
+
+enum rio_link_width {
+	RIO_LINK_1X  = 0,
+	RIO_LINK_1XR = 1,
+	RIO_LINK_2X  = 3,
+	RIO_LINK_4X  = 2,
+	RIO_LINK_8X  = 4,
+	RIO_LINK_16X = 5
+};
+
+enum rio_mport_flags {
+	RIO_MPORT_DMA	 = (1 << 0), /* supports DMA data transfers */
+	RIO_MPORT_DMA_SG = (1 << 1), /* DMA supports HW SG mode */
+	RIO_MPORT_IBSG	 = (1 << 2), /* inbound mapping supports SG */
+};
+
+/**
+ * struct rio_mport_attr - RIO mport device attributes
+ * @flags: mport device capability flags
+ * @link_speed: SRIO link speed value (as defined by RapidIO specification)
+ * @link_width:	SRIO link width value (as defined by RapidIO specification)
+ * @dma_max_sge: number of SG list entries that can be handled by DMA channel(s)
+ * @dma_max_size: max number of bytes in single DMA transfer (SG entry)
+ * @dma_align: alignment shift for DMA operations (as for other DMA operations)
+ */
+struct rio_mport_attr {
+	int flags;
+	int link_speed;
+	int link_width;
+
+	/* DMA capability info: valid only if RIO_MPORT_DMA flag is set */
+	int dma_max_sge;
+	int dma_max_size;
+	int dma_align;
+};
+
 /* Low-level architecture-dependent routines */
 
 /**
@@ -333,6 +377,7 @@ struct rio_net {
  * @get_inb_message: Callback to get a message from an inbound mailbox queue.
  * @map_inb: Callback to map RapidIO address region into local memory space.
  * @unmap_inb: Callback to unmap RapidIO address region mapped with map_inb().
+ * @query_mport: Callback to query mport device attributes.
  */
 struct rio_ops {
 	int (*lcread) (struct rio_mport *mport, int index, u32 offset, int len,
@@ -358,6 +403,8 @@ struct rio_ops {
 	int (*map_inb)(struct rio_mport *mport, dma_addr_t lstart,
 			u64 rstart, u32 size, u32 flags);
 	void (*unmap_inb)(struct rio_mport *mport, dma_addr_t lstart);
+	int (*query_mport)(struct rio_mport *mport,
+			   struct rio_mport_attr *attr);
 };
 
 #define RIO_RESOURCE_MEM	0x00000100
@@ -481,5 +528,7 @@ extern int rio_open_inb_mbox(struct rio_mport *, void *, int, int);
 extern void rio_close_inb_mbox(struct rio_mport *, int);
 extern int rio_open_outb_mbox(struct rio_mport *, void *, int, int);
 extern void rio_close_outb_mbox(struct rio_mport *, int);
+extern int rio_query_mport(struct rio_mport *port,
+			   struct rio_mport_attr *mport_attr);
 
 #endif				/* LINUX_RIO_H */
diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h
index 218168a2b5e9..1063ae382bc2 100644
--- a/include/linux/rio_regs.h
+++ b/include/linux/rio_regs.h
@@ -238,6 +238,8 @@
 #define  RIO_PORT_N_ACK_INBOUND		0x3f000000
 #define  RIO_PORT_N_ACK_OUTSTAND	0x00003f00
 #define  RIO_PORT_N_ACK_OUTBOUND	0x0000003f
+#define RIO_PORT_N_CTL2_CSR(x)		(0x0054 + x*0x20)
+#define  RIO_PORT_N_CTL2_SEL_BAUD	0xf0000000
 #define RIO_PORT_N_ERR_STS_CSR(x)	(0x0058 + x*0x20)
 #define  RIO_PORT_N_ERR_STS_PW_OUT_ES	0x00010000 /* Output Error-stopped */
 #define  RIO_PORT_N_ERR_STS_PW_INP_ES	0x00000100 /* Input Error-stopped */
@@ -249,6 +251,7 @@
 #define  RIO_PORT_N_CTL_PWIDTH		0xc0000000
 #define  RIO_PORT_N_CTL_PWIDTH_1	0x00000000
 #define  RIO_PORT_N_CTL_PWIDTH_4	0x40000000
+#define  RIO_PORT_N_CTL_IPW		0x38000000 /* Initialized Port Width */
 #define  RIO_PORT_N_CTL_P_TYP_SER	0x00000001
 #define  RIO_PORT_N_CTL_LOCKOUT		0x00000002
 #define  RIO_PORT_N_CTL_EN_RX_SER	0x00200000
-- 
cgit v1.2.3


From 83dc2cbc1136108068637d5c98636647f7b82461 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Tue, 22 Mar 2016 14:26:05 -0700
Subject: rapidio: add shutdown notification for RapidIO devices

Add bus-specific callback to stop RapidIO devices during a system
shutdown.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio-driver.c | 12 ++++++++++++
 include/linux/rio.h          |  2 ++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index f301f059bb85..128350f4d17a 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c
@@ -131,6 +131,17 @@ static int rio_device_remove(struct device *dev)
 	return 0;
 }
 
+static void rio_device_shutdown(struct device *dev)
+{
+	struct rio_dev *rdev = to_rio_dev(dev);
+	struct rio_driver *rdrv = rdev->driver;
+
+	dev_dbg(dev, "RIO: %s\n", __func__);
+
+	if (rdrv && rdrv->shutdown)
+		rdrv->shutdown(rdev);
+}
+
 /**
  *  rio_register_driver - register a new RIO driver
  *  @rdrv: the RIO driver structure to register
@@ -229,6 +240,7 @@ struct bus_type rio_bus_type = {
 	.bus_groups = rio_bus_groups,
 	.probe = rio_device_probe,
 	.remove = rio_device_remove,
+	.shutdown = rio_device_shutdown,
 	.uevent	= rio_uevent,
 };
 
diff --git a/include/linux/rio.h b/include/linux/rio.h
index 8996a629a321..c64a0baf37c8 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -423,6 +423,7 @@ struct rio_ops {
  * @id_table: RIO device ids to be associated with this driver
  * @probe: RIO device inserted
  * @remove: RIO device removed
+ * @shutdown: shutdown notification callback
  * @suspend: RIO device suspended
  * @resume: RIO device awakened
  * @enable_wake: RIO device enable wake event
@@ -437,6 +438,7 @@ struct rio_driver {
 	const struct rio_device_id *id_table;
 	int (*probe) (struct rio_dev * dev, const struct rio_device_id * id);
 	void (*remove) (struct rio_dev * dev);
+	void (*shutdown)(struct rio_dev *dev);
 	int (*suspend) (struct rio_dev * dev, u32 state);
 	int (*resume) (struct rio_dev * dev);
 	int (*enable_wake) (struct rio_dev * dev, u32 state, int enable);
-- 
cgit v1.2.3


From e6b585ca6e81badeb3d42db3cc408174f2826034 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Tue, 22 Mar 2016 14:26:17 -0700
Subject: rapidio: move net allocation into core code

Make net allocation/release routines available to all components of
RapidIO subsystem by moving code from rio-scan enumerator.

Make destination ID allocation method private to existing enumerator
because other enumeration methods can use their own algorithm.

Setup net device object as a parent of all RapidIO devices residing in
it and register net as a child of active mport device.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio-scan.c | 98 +++++++++++++++++++++++++++++-----------------
 drivers/rapidio/rio.c      | 53 +++++++++++++++++++++++++
 drivers/rapidio/rio.h      |  3 ++
 include/linux/rio.h        | 18 ++++-----
 4 files changed, 128 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index c46763185849..764dc5fd30b2 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -39,6 +39,13 @@
 
 static void rio_init_em(struct rio_dev *rdev);
 
+struct rio_id_table {
+	u16 start;	/* logical minimal id */
+	u32 max;	/* max number of IDs in table */
+	spinlock_t lock;
+	unsigned long table[0];
+};
+
 static int next_destid = 0;
 static int next_comptag = 1;
 
@@ -62,7 +69,7 @@ static int rio_mport_phys_table[] = {
 static u16 rio_destid_alloc(struct rio_net *net)
 {
 	int destid;
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	spin_lock(&idtab->lock);
 	destid = find_first_zero_bit(idtab->table, idtab->max);
@@ -88,7 +95,7 @@ static u16 rio_destid_alloc(struct rio_net *net)
 static int rio_destid_reserve(struct rio_net *net, u16 destid)
 {
 	int oldbit;
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	destid -= idtab->start;
 	spin_lock(&idtab->lock);
@@ -106,7 +113,7 @@ static int rio_destid_reserve(struct rio_net *net, u16 destid)
  */
 static void rio_destid_free(struct rio_net *net, u16 destid)
 {
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	destid -= idtab->start;
 	spin_lock(&idtab->lock);
@@ -121,7 +128,7 @@ static void rio_destid_free(struct rio_net *net, u16 destid)
 static u16 rio_destid_first(struct rio_net *net)
 {
 	int destid;
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	spin_lock(&idtab->lock);
 	destid = find_first_bit(idtab->table, idtab->max);
@@ -141,7 +148,7 @@ static u16 rio_destid_first(struct rio_net *net)
 static u16 rio_destid_next(struct rio_net *net, u16 from)
 {
 	int destid;
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	spin_lock(&idtab->lock);
 	destid = find_next_bit(idtab->table, idtab->max, from);
@@ -458,7 +465,7 @@ static struct rio_dev *rio_setup_device(struct rio_net *net,
 			     rdev->comp_tag & RIO_CTAG_UDEVID);
 	}
 
-	rdev->dev.parent = &port->dev;
+	rdev->dev.parent = &net->dev;
 	rio_attach_device(rdev);
 	rdev->dev.release = rio_release_dev;
 	rdev->dma_mask = DMA_BIT_MASK(32);
@@ -855,50 +862,71 @@ static int rio_mport_is_active(struct rio_mport *port)
 	return result & RIO_PORT_N_ERR_STS_PORT_OK;
 }
 
-/**
- * rio_alloc_net- Allocate and configure a new RIO network
- * @port: Master port associated with the RIO network
+static void rio_scan_release_net(struct rio_net *net)
+{
+	pr_debug("RIO-SCAN: %s: net_%d\n", __func__, net->id);
+	kfree(net->enum_data);
+}
+
+static void rio_scan_release_dev(struct device *dev)
+{
+	struct rio_net *net;
+
+	net = to_rio_net(dev);
+	pr_debug("RIO-SCAN: %s: net_%d\n", __func__, net->id);
+	kfree(net);
+}
+
+/*
+ * rio_scan_alloc_net - Allocate and configure a new RIO network
+ * @mport: Master port associated with the RIO network
  * @do_enum: Enumeration/Discovery mode flag
  * @start: logical minimal start id for new net
  *
- * Allocates a RIO network structure, initializes per-network
- * list heads, and adds the associated master port to the
- * network list of associated master ports. Returns a
- * RIO network pointer on success or %NULL on failure.
+ * Allocates a new RIO network structure and initializes enumerator-specific
+ * part of it (if required).
+ * Returns a RIO network pointer on success or %NULL on failure.
  */
-static struct rio_net *rio_alloc_net(struct rio_mport *port,
-					       int do_enum, u16 start)
+static struct rio_net *rio_scan_alloc_net(struct rio_mport *mport,
+					  int do_enum, u16 start)
 {
 	struct rio_net *net;
 
-	net = kzalloc(sizeof(struct rio_net), GFP_KERNEL);
+	net = rio_alloc_net(mport);
+
 	if (net && do_enum) {
-		net->destid_table.table = kcalloc(
-			BITS_TO_LONGS(RIO_MAX_ROUTE_ENTRIES(port->sys_size)),
-			sizeof(long),
-			GFP_KERNEL);
+		struct rio_id_table *idtab;
+		size_t size;
+
+		size = sizeof(struct rio_id_table) +
+				BITS_TO_LONGS(
+					RIO_MAX_ROUTE_ENTRIES(mport->sys_size)
+					) * sizeof(long);
 
-		if (net->destid_table.table == NULL) {
+		idtab = kzalloc(size, GFP_KERNEL);
+
+		if (idtab == NULL) {
 			pr_err("RIO: failed to allocate destID table\n");
-			kfree(net);
+			rio_free_net(net);
 			net = NULL;
 		} else {
-			net->destid_table.start = start;
-			net->destid_table.max =
-					RIO_MAX_ROUTE_ENTRIES(port->sys_size);
-			spin_lock_init(&net->destid_table.lock);
+			net->enum_data = idtab;
+			net->release = rio_scan_release_net;
+			idtab->start = start;
+			idtab->max = RIO_MAX_ROUTE_ENTRIES(mport->sys_size);
+			spin_lock_init(&idtab->lock);
 		}
 	}
 
 	if (net) {
-		INIT_LIST_HEAD(&net->node);
-		INIT_LIST_HEAD(&net->devices);
-		INIT_LIST_HEAD(&net->switches);
-		INIT_LIST_HEAD(&net->mports);
-		list_add_tail(&port->nnode, &net->mports);
-		net->hport = port;
-		net->id = port->id;
+		net->id = mport->id;
+		net->hport = mport;
+		dev_set_name(&net->dev, "rnet_%d", net->id);
+		net->dev.parent = &mport->dev;
+		net->dev.release = rio_scan_release_dev;
+		rio_add_net(net);
 	}
+
 	return net;
 }
 
@@ -1007,7 +1035,7 @@ static int rio_enum_mport(struct rio_mport *mport, u32 flags)
 
 	/* If master port has an active link, allocate net and enum peers */
 	if (rio_mport_is_active(mport)) {
-		net = rio_alloc_net(mport, 1, 0);
+		net = rio_scan_alloc_net(mport, 1, 0);
 		if (!net) {
 			printk(KERN_ERR "RIO: failed to allocate new net\n");
 			rc = -ENOMEM;
@@ -1124,7 +1152,7 @@ static int rio_disc_mport(struct rio_mport *mport, u32 flags)
 enum_done:
 		pr_debug("RIO: ... enumeration done\n");
 
-		net = rio_alloc_net(mport, 0, 0);
+		net = rio_scan_alloc_net(mport, 0, 0);
 		if (!net) {
 			printk(KERN_ERR "RIO: Failed to allocate new net\n");
 			goto bail;
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 0be86f4eea5d..5fd68cb4b610 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -42,6 +42,7 @@ MODULE_PARM_DESC(hdid,
 	"Destination ID assignment to local RapidIO controllers");
 
 static LIST_HEAD(rio_devices);
+static LIST_HEAD(rio_nets);
 static DEFINE_SPINLOCK(rio_global_list_lock);
 
 static LIST_HEAD(rio_mports);
@@ -84,6 +85,58 @@ int rio_query_mport(struct rio_mport *port,
 }
 EXPORT_SYMBOL(rio_query_mport);
 
+/**
+ * rio_alloc_net- Allocate and initialize a new RIO network data structure
+ * @mport: Master port associated with the RIO network
+ *
+ * Allocates a RIO network structure, initializes per-network
+ * list heads, and adds the associated master port to the
+ * network list of associated master ports. Returns a
+ * RIO network pointer on success or %NULL on failure.
+ */
+struct rio_net *rio_alloc_net(struct rio_mport *mport)
+{
+	struct rio_net *net;
+
+	net = kzalloc(sizeof(struct rio_net), GFP_KERNEL);
+	if (net) {
+		INIT_LIST_HEAD(&net->node);
+		INIT_LIST_HEAD(&net->devices);
+		INIT_LIST_HEAD(&net->switches);
+		INIT_LIST_HEAD(&net->mports);
+		mport->net = net;
+	}
+	return net;
+}
+EXPORT_SYMBOL_GPL(rio_alloc_net);
+
+int rio_add_net(struct rio_net *net)
+{
+	int err;
+
+	err = device_register(&net->dev);
+	if (err)
+		return err;
+	spin_lock(&rio_global_list_lock);
+	list_add_tail(&net->node, &rio_nets);
+	spin_unlock(&rio_global_list_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rio_add_net);
+
+void rio_free_net(struct rio_net *net)
+{
+	spin_lock(&rio_global_list_lock);
+	if (!list_empty(&net->node))
+		list_del(&net->node);
+	spin_unlock(&rio_global_list_lock);
+	if (net->release)
+		net->release(net);
+	device_unregister(&net->dev);
+}
+EXPORT_SYMBOL_GPL(rio_free_net);
+
 /**
  * rio_add_device- Adds a RIO device to the device model
  * @rdev: RIO device
diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h
index da0f604a834e..68e7852dd1d8 100644
--- a/drivers/rapidio/rio.h
+++ b/drivers/rapidio/rio.h
@@ -39,6 +39,9 @@ extern int rio_route_get_entry(struct rio_dev *rdev, u16 table,
 extern int rio_route_clr_table(struct rio_dev *rdev, u16 table, int lock);
 extern int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock);
 extern struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from);
+extern struct rio_net *rio_alloc_net(struct rio_mport *mport);
+extern int rio_add_net(struct rio_net *net);
+extern void rio_free_net(struct rio_net *net);
 extern int rio_add_device(struct rio_dev *rdev);
 extern void rio_del_device(struct rio_dev *rdev);
 extern int rio_enable_rx_tx_port(struct rio_mport *port, int local, u16 destid,
diff --git a/include/linux/rio.h b/include/linux/rio.h
index c64a0baf37c8..36086bf7e6f9 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -202,6 +202,7 @@ struct rio_dev {
 #define	to_rio_dev(n) container_of(n, struct rio_dev, dev)
 #define sw_to_rio_dev(n) container_of(n, struct rio_dev, rswitch[0])
 #define	to_rio_mport(n) container_of(n, struct rio_mport, dev)
+#define	to_rio_net(n) container_of(n, struct rio_net, dev)
 
 /**
  * struct rio_msg - RIO message event
@@ -237,6 +238,7 @@ enum rio_phy_type {
  * @dbells: List of doorbell events
  * @node: Node in global list of master ports
  * @nnode: Node in network list of master ports
+ * @net: RIO net this mport is attached to
  * @iores: I/O mem resource that this master port interface owns
  * @riores: RIO resources that this master port interfaces owns
  * @inb_msg: RIO inbound message event descriptors
@@ -258,6 +260,7 @@ struct rio_mport {
 	struct list_head dbells;	/* list of doorbell events */
 	struct list_head node;	/* node in global list of ports */
 	struct list_head nnode;	/* node in net list of ports */
+	struct rio_net *net;	/* RIO net this mport is attached to */
 	struct resource iores;
 	struct resource riores[RIO_MAX_MPORT_RESOURCES];
 	struct rio_msg inb_msg[RIO_MAX_MBOX];
@@ -287,13 +290,6 @@ struct rio_mport {
  */
 #define RIO_SCAN_ENUM_NO_WAIT	0x00000001 /* Do not wait for enum completed */
 
-struct rio_id_table {
-	u16 start;	/* logical minimal id */
-	u32 max;	/* max number of IDs in table */
-	spinlock_t lock;
-	unsigned long *table;
-};
-
 /**
  * struct rio_net - RIO network info
  * @node: Node in global list of RIO networks
@@ -302,7 +298,9 @@ struct rio_id_table {
  * @mports: List of master ports accessing this network
  * @hport: Default port for accessing this network
  * @id: RIO network ID
- * @destid_table: destID allocation table
+ * @dev: Device object
+ * @enum_data: private data specific to a network enumerator
+ * @release: enumerator-specific release callback
  */
 struct rio_net {
 	struct list_head node;	/* node in list of networks */
@@ -311,7 +309,9 @@ struct rio_net {
 	struct list_head mports;	/* list of ports accessing net */
 	struct rio_mport *hport;	/* primary port for accessing net */
 	unsigned char id;	/* RIO network ID */
-	struct rio_id_table destid_table;  /* destID allocation table */
+	struct device dev;
+	void *enum_data;	/* private data for enumerator of the network */
+	void (*release)(struct rio_net *net);
 };
 
 enum rio_link_speed {
-- 
cgit v1.2.3


From b77a2030dface6ea6b0d900bd8496ef41a9f3323 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Tue, 22 Mar 2016 14:26:20 -0700
Subject: rapidio: add core mport removal support

Add common mport removal support functions into the RapidIO subsystem
core.

Changes to the existing mport registration process have been made to
avoid race conditions with active subsystem interfaces immediately after
mport device registration: part of initialization code from
rio_register_mport() have been moved into separate function
rio_mport_initialize() to allow to perform mport registration as the
final step of setup process.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio.c | 98 +++++++++++++++++++++++++++++++++++++++++----------
 drivers/rapidio/rio.h |  2 +-
 include/linux/rio.h   | 18 ++++++++++
 3 files changed, 98 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 5fd68cb4b610..03b8c5af72bb 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -149,6 +149,7 @@ int rio_add_device(struct rio_dev *rdev)
 {
 	int err;
 
+	atomic_set(&rdev->state, RIO_DEVICE_RUNNING);
 	err = device_register(&rdev->dev);
 	if (err)
 		return err;
@@ -172,13 +173,15 @@ EXPORT_SYMBOL_GPL(rio_add_device);
 /*
  * rio_del_device - removes a RIO device from the device model
  * @rdev: RIO device
+ * @state: device state to set during removal process
  *
  * Removes the RIO device to the kernel device list and subsystem's device list.
  * Clears sysfs entries for the removed device.
  */
-void rio_del_device(struct rio_dev *rdev)
+void rio_del_device(struct rio_dev *rdev, enum rio_device_state state)
 {
 	pr_debug("RIO: %s: removing %s\n", __func__, rio_name(rdev));
+	atomic_set(&rdev->state, state);
 	spin_lock(&rio_global_list_lock);
 	list_del(&rdev->global_list);
 	if (rdev->net) {
@@ -2010,32 +2013,28 @@ static int rio_get_hdid(int index)
 	return hdid[index];
 }
 
-int rio_register_mport(struct rio_mport *port)
+int rio_mport_initialize(struct rio_mport *mport)
 {
-	struct rio_scan_node *scan = NULL;
-	int res = 0;
-
 	if (next_portid >= RIO_MAX_MPORTS) {
 		pr_err("RIO: reached specified max number of mports\n");
-		return 1;
+		return -ENODEV;
 	}
 
-	port->id = next_portid++;
-	port->host_deviceid = rio_get_hdid(port->id);
-	port->nscan = NULL;
+	atomic_set(&mport->state, RIO_DEVICE_INITIALIZING);
+	mport->id = next_portid++;
+	mport->host_deviceid = rio_get_hdid(mport->id);
+	mport->nscan = NULL;
 
-	dev_set_name(&port->dev, "rapidio%d", port->id);
-	port->dev.class = &rio_mport_class;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rio_mport_initialize);
 
-	res = device_register(&port->dev);
-	if (res)
-		dev_err(&port->dev, "RIO: mport%d registration failed ERR=%d\n",
-			port->id, res);
-	else
-		dev_dbg(&port->dev, "RIO: mport%d registered\n", port->id);
+int rio_register_mport(struct rio_mport *port)
+{
+	struct rio_scan_node *scan = NULL;
+	int res = 0;
 
 	mutex_lock(&rio_mport_list_lock);
-	list_add_tail(&port->node, &rio_mports);
 
 	/*
 	 * Check if there are any registered enumeration/discovery operations
@@ -2049,12 +2048,73 @@ int rio_register_mport(struct rio_mport *port)
 				break;
 		}
 	}
+
+	list_add_tail(&port->node, &rio_mports);
 	mutex_unlock(&rio_mport_list_lock);
 
+	dev_set_name(&port->dev, "rapidio%d", port->id);
+	port->dev.class = &rio_mport_class;
+	atomic_set(&port->state, RIO_DEVICE_RUNNING);
+
+	res = device_register(&port->dev);
+	if (res)
+		dev_err(&port->dev, "RIO: mport%d registration failed ERR=%d\n",
+			port->id, res);
+	else
+		dev_dbg(&port->dev, "RIO: registered mport%d\n", port->id);
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(rio_register_mport);
+
+static int rio_mport_cleanup_callback(struct device *dev, void *data)
+{
+	struct rio_dev *rdev = to_rio_dev(dev);
+
+	if (dev->bus == &rio_bus_type)
+		rio_del_device(rdev, RIO_DEVICE_SHUTDOWN);
+	return 0;
+}
+
+static int rio_net_remove_children(struct rio_net *net)
+{
+	/*
+	 * Unregister all RapidIO devices residing on this net (this will
+	 * invoke notification of registered subsystem interfaces as well).
+	 */
+	device_for_each_child(&net->dev, NULL, rio_mport_cleanup_callback);
+	return 0;
+}
+
+int rio_unregister_mport(struct rio_mport *port)
+{
 	pr_debug("RIO: %s %s id=%d\n", __func__, port->name, port->id);
+
+	/* Transition mport to the SHUTDOWN state */
+	if (atomic_cmpxchg(&port->state,
+			   RIO_DEVICE_RUNNING,
+			   RIO_DEVICE_SHUTDOWN) != RIO_DEVICE_RUNNING) {
+		pr_err("RIO: %s unexpected state transition for mport %s\n",
+			__func__, port->name);
+	}
+
+	if (port->net && port->net->hport == port) {
+		rio_net_remove_children(port->net);
+		rio_free_net(port->net);
+	}
+
+	/*
+	 * Unregister all RapidIO devices attached to this mport (this will
+	 * invoke notification of registered subsystem interfaces as well).
+	 */
+	mutex_lock(&rio_mport_list_lock);
+	list_del(&port->node);
+	mutex_unlock(&rio_mport_list_lock);
+	device_unregister(&port->dev);
+
 	return 0;
 }
-EXPORT_SYMBOL_GPL(rio_register_mport);
+EXPORT_SYMBOL_GPL(rio_unregister_mport);
 
 EXPORT_SYMBOL_GPL(rio_local_get_device_id);
 EXPORT_SYMBOL_GPL(rio_get_device);
diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h
index 68e7852dd1d8..625d09add001 100644
--- a/drivers/rapidio/rio.h
+++ b/drivers/rapidio/rio.h
@@ -43,7 +43,7 @@ extern struct rio_net *rio_alloc_net(struct rio_mport *mport);
 extern int rio_add_net(struct rio_net *net);
 extern void rio_free_net(struct rio_net *net);
 extern int rio_add_device(struct rio_dev *rdev);
-extern void rio_del_device(struct rio_dev *rdev);
+extern void rio_del_device(struct rio_dev *rdev, enum rio_device_state state);
 extern int rio_enable_rx_tx_port(struct rio_mport *port, int local, u16 destid,
 				 u8 hopcount, u8 port_num);
 extern int rio_register_scan(int mport_id, struct rio_scan *scan_ops);
diff --git a/include/linux/rio.h b/include/linux/rio.h
index 36086bf7e6f9..f833773cdc68 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -137,6 +137,13 @@ struct rio_switch_ops {
 	int (*em_handle) (struct rio_dev *dev, u8 swport);
 };
 
+enum rio_device_state {
+	RIO_DEVICE_INITIALIZING,
+	RIO_DEVICE_RUNNING,
+	RIO_DEVICE_GONE,
+	RIO_DEVICE_SHUTDOWN,
+};
+
 /**
  * struct rio_dev - RIO device info
  * @global_list: Node in list of all RIO devices
@@ -165,6 +172,7 @@ struct rio_switch_ops {
  * @destid: Network destination ID (or associated destid for switch)
  * @hopcount: Hopcount to this device
  * @prev: Previous RIO device connected to the current one
+ * @state: device state
  * @rswitch: struct rio_switch (if valid for this device)
  */
 struct rio_dev {
@@ -194,6 +202,7 @@ struct rio_dev {
 	u16 destid;
 	u8 hopcount;
 	struct rio_dev *prev;
+	atomic_t state;
 	struct rio_switch rswitch[0];	/* RIO switch info */
 };
 
@@ -255,6 +264,7 @@ enum rio_phy_type {
  * @priv: Master port private data
  * @dma: DMA device associated with mport
  * @nscan: RapidIO network enumeration/discovery operations
+ * @state: mport device state
  */
 struct rio_mport {
 	struct list_head dbells;	/* list of doorbell events */
@@ -283,8 +293,14 @@ struct rio_mport {
 	struct dma_device	dma;
 #endif
 	struct rio_scan *nscan;
+	atomic_t state;
 };
 
+static inline int rio_mport_is_running(struct rio_mport *mport)
+{
+	return atomic_read(&mport->state) == RIO_DEVICE_RUNNING;
+}
+
 /*
  * Enumeration/discovery control flags
  */
@@ -525,7 +541,9 @@ struct rio_scan_node {
 };
 
 /* Architecture and hardware-specific functions */
+extern int rio_mport_initialize(struct rio_mport *);
 extern int rio_register_mport(struct rio_mport *);
+extern int rio_unregister_mport(struct rio_mport *);
 extern int rio_open_inb_mbox(struct rio_mport *, void *, int, int);
 extern void rio_close_inb_mbox(struct rio_mport *, int);
 extern int rio_open_outb_mbox(struct rio_mport *, void *, int, int);
-- 
cgit v1.2.3


From a7b4c636d83034f0e89d58651ef2e9b96564489a Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Tue, 22 Mar 2016 14:26:35 -0700
Subject: rapidio: add lock protection for doorbell list

Add lock protection around doorbell list handling to prevent list
corruption on SMP platforms.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio.c | 9 ++++++---
 include/linux/rio.h   | 2 ++
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 03b8c5af72bb..e42f97e9e62a 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -362,7 +362,9 @@ rio_setup_inb_dbell(struct rio_mport *mport, void *dev_id, struct resource *res,
 	dbell->dinb = dinb;
 	dbell->dev_id = dev_id;
 
+	mutex_lock(&mport->lock);
 	list_add_tail(&dbell->node, &mport->dbells);
+	mutex_unlock(&mport->lock);
 
       out:
 	return rc;
@@ -426,12 +428,15 @@ int rio_release_inb_dbell(struct rio_mport *mport, u16 start, u16 end)
 	int rc = 0, found = 0;
 	struct rio_dbell *dbell;
 
+	mutex_lock(&mport->lock);
 	list_for_each_entry(dbell, &mport->dbells, node) {
 		if ((dbell->res->start == start) && (dbell->res->end == end)) {
+			list_del(&dbell->node);
 			found = 1;
 			break;
 		}
 	}
+	mutex_unlock(&mport->lock);
 
 	/* If we can't find an exact match, fail */
 	if (!found) {
@@ -439,9 +444,6 @@ int rio_release_inb_dbell(struct rio_mport *mport, u16 start, u16 end)
 		goto out;
 	}
 
-	/* Delete from list */
-	list_del(&dbell->node);
-
 	/* Release the doorbell resource */
 	rc = release_resource(dbell->res);
 
@@ -2024,6 +2026,7 @@ int rio_mport_initialize(struct rio_mport *mport)
 	mport->id = next_portid++;
 	mport->host_deviceid = rio_get_hdid(mport->id);
 	mport->nscan = NULL;
+	mutex_init(&mport->lock);
 
 	return 0;
 }
diff --git a/include/linux/rio.h b/include/linux/rio.h
index f833773cdc68..948f60550ae5 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -248,6 +248,7 @@ enum rio_phy_type {
  * @node: Node in global list of master ports
  * @nnode: Node in network list of master ports
  * @net: RIO net this mport is attached to
+ * @lock: lock to synchronize lists manipulations
  * @iores: I/O mem resource that this master port interface owns
  * @riores: RIO resources that this master port interfaces owns
  * @inb_msg: RIO inbound message event descriptors
@@ -271,6 +272,7 @@ struct rio_mport {
 	struct list_head node;	/* node in global list of ports */
 	struct list_head nnode;	/* node in net list of ports */
 	struct rio_net *net;	/* RIO net this mport is attached to */
+	struct mutex lock;
 	struct resource iores;
 	struct resource riores[RIO_MAX_MPORT_RESOURCES];
 	struct rio_msg inb_msg[RIO_MAX_MBOX];
-- 
cgit v1.2.3


From 5024622f583eb242ed8040d0b9d1e0d2458d1db8 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Tue, 22 Mar 2016 14:26:38 -0700
Subject: rapidio: move rio_local_set_device_id function to the common core

Make function rio_local_set_device_id() common for all components of
RapidIO subsystem.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio-scan.c | 13 -------------
 drivers/rapidio/rio.c      | 14 ++++++++++++++
 include/linux/rio_drv.h    |  1 +
 3 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index 764dc5fd30b2..f730914f6ff6 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -193,19 +193,6 @@ static void rio_set_device_id(struct rio_mport *port, u16 destid, u8 hopcount, u
 				  RIO_SET_DID(port->sys_size, did));
 }
 
-/**
- * rio_local_set_device_id - Set the base/extended device id for a port
- * @port: RIO master port
- * @did: Device ID value to be written
- *
- * Writes the base/extended device id from a device.
- */
-static void rio_local_set_device_id(struct rio_mport *port, u16 did)
-{
-	rio_local_write_config_32(port, RIO_DID_CSR, RIO_SET_DID(port->sys_size,
-				did));
-}
-
 /**
  * rio_clear_locks- Release all host locks and signal enumeration complete
  * @net: RIO network to run on
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index e42f97e9e62a..095801c4d239 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -137,6 +137,20 @@ void rio_free_net(struct rio_net *net)
 }
 EXPORT_SYMBOL_GPL(rio_free_net);
 
+/**
+ * rio_local_set_device_id - Set the base/extended device id for a port
+ * @port: RIO master port
+ * @did: Device ID value to be written
+ *
+ * Writes the base/extended device id from a device.
+ */
+void rio_local_set_device_id(struct rio_mport *port, u16 did)
+{
+	rio_local_write_config_32(port, RIO_DID_CSR,
+				  RIO_SET_DID(port->sys_size, did));
+}
+EXPORT_SYMBOL_GPL(rio_local_set_device_id);
+
 /**
  * rio_add_device- Adds a RIO device to the device model
  * @rdev: RIO device
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 9fc2f213e74f..341b3bf78333 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -435,6 +435,7 @@ static inline void rio_set_drvdata(struct rio_dev *rdev, void *data)
 
 /* Misc driver helpers */
 extern u16 rio_local_get_device_id(struct rio_mport *port);
+extern void rio_local_set_device_id(struct rio_mport *port, u16 did);
 extern struct rio_dev *rio_get_device(u16 vid, u16 did, struct rio_dev *from);
 extern struct rio_dev *rio_get_asm(u16 vid, u16 did, u16 asm_vid, u16 asm_did,
 				   struct rio_dev *from);
-- 
cgit v1.2.3


From b6cb95e8eb97e51a1a1b5609b59df859cc6dc2f2 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Tue, 22 Mar 2016 14:26:41 -0700
Subject: rapidio: move rio_pw_enable into core code

Make rio_pw_enable() routine available to other RapidIO drivers.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio-scan.c | 11 -----------
 drivers/rapidio/rio.c      | 19 +++++++++++++++++++
 include/linux/rio.h        |  2 ++
 include/linux/rio_drv.h    |  1 +
 4 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index f730914f6ff6..a63a380809d1 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -973,17 +973,6 @@ static void rio_init_em(struct rio_dev *rdev)
 	}
 }
 
-/**
- * rio_pw_enable - Enables/disables port-write handling by a master port
- * @port: Master port associated with port-write handling
- * @enable:  1=enable,  0=disable
- */
-static void rio_pw_enable(struct rio_mport *port, int enable)
-{
-	if (port->ops->pwenable)
-		port->ops->pwenable(port, enable);
-}
-
 /**
  * rio_enum_mport- Start enumeration through a master port
  * @mport: Master port to send transactions
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 095801c4d239..673774bf6dd6 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -559,6 +559,24 @@ int rio_release_inb_pwrite(struct rio_dev *rdev)
 }
 EXPORT_SYMBOL_GPL(rio_release_inb_pwrite);
 
+/**
+ * rio_pw_enable - Enables/disables port-write handling by a master port
+ * @mport: Master port associated with port-write handling
+ * @enable:  1=enable,  0=disable
+ */
+void rio_pw_enable(struct rio_mport *mport, int enable)
+{
+	if (mport->ops->pwenable) {
+		mutex_lock(&mport->lock);
+
+		if ((enable && ++mport->pwe_refcnt == 1) ||
+		    (!enable && mport->pwe_refcnt && --mport->pwe_refcnt == 0))
+			mport->ops->pwenable(mport, enable);
+		mutex_unlock(&mport->lock);
+	}
+}
+EXPORT_SYMBOL_GPL(rio_pw_enable);
+
 /**
  * rio_map_inb_region -- Map inbound memory region.
  * @mport: Master port.
@@ -2041,6 +2059,7 @@ int rio_mport_initialize(struct rio_mport *mport)
 	mport->host_deviceid = rio_get_hdid(mport->id);
 	mport->nscan = NULL;
 	mutex_init(&mport->lock);
+	mport->pwe_refcnt = 0;
 
 	return 0;
 }
diff --git a/include/linux/rio.h b/include/linux/rio.h
index 948f60550ae5..cb3c47543bb0 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -266,6 +266,7 @@ enum rio_phy_type {
  * @dma: DMA device associated with mport
  * @nscan: RapidIO network enumeration/discovery operations
  * @state: mport device state
+ * @pwe_refcnt: port-write enable ref counter to track enable/disable requests
  */
 struct rio_mport {
 	struct list_head dbells;	/* list of doorbell events */
@@ -296,6 +297,7 @@ struct rio_mport {
 #endif
 	struct rio_scan *nscan;
 	atomic_t state;
+	unsigned int pwe_refcnt;
 };
 
 static inline int rio_mport_is_running(struct rio_mport *mport)
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 341b3bf78333..9fb2bcd0a987 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -375,6 +375,7 @@ extern int rio_request_inb_pwrite(struct rio_dev *,
 			int (*)(struct rio_dev *, union rio_pw_msg*, int));
 extern int rio_release_inb_pwrite(struct rio_dev *);
 extern int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg);
+extern void rio_pw_enable(struct rio_mport *mport, int enable);
 
 /* LDM support */
 int rio_register_driver(struct rio_driver *);
-- 
cgit v1.2.3


From 9a0b062742e7e039273c0c2ba4b96ad9ec7e7d8f Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Tue, 22 Mar 2016 14:26:44 -0700
Subject: rapidio: add global inbound port write interfaces

Add new Port Write handler registration interfaces that attach PW
handlers to local mport device objects.  This is different from old
interface that attaches PW callback to individual RapidIO device.  The
new interfaces are intended for use for common event handling (e.g.
hot-plug notifications) while the old interface is available for
individual device drivers.

This patch is based on patch proposed by Andre van Herk but preserves
existing per-device interface and adds lock protection for list
handling.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/sysdev/fsl_rio.c    |   1 +
 arch/powerpc/sysdev/fsl_rio.h    |   1 +
 arch/powerpc/sysdev/fsl_rmu.c    |  16 +++--
 drivers/rapidio/devices/tsi721.c |  24 ++-----
 drivers/rapidio/rio.c            | 142 ++++++++++++++++++++++++++++++++-------
 include/linux/rio.h              |   2 +
 include/linux/rio_drv.h          |   9 ++-
 7 files changed, 144 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 385371acc0d0..f5bf38b94595 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -726,6 +726,7 @@ int fsl_rio_setup(struct platform_device *dev)
 		fsl_rio_inbound_mem_init(priv);
 
 		dbell->mport[i] = port;
+		pw->mport[i] = port;
 
 		if (rio_register_mport(port)) {
 			release_resource(&port->iores);
diff --git a/arch/powerpc/sysdev/fsl_rio.h b/arch/powerpc/sysdev/fsl_rio.h
index d53407a34f32..12dd18fd4795 100644
--- a/arch/powerpc/sysdev/fsl_rio.h
+++ b/arch/powerpc/sysdev/fsl_rio.h
@@ -97,6 +97,7 @@ struct fsl_rio_dbell {
 };
 
 struct fsl_rio_pw {
+	struct rio_mport *mport[MAX_PORT_NUM];
 	struct device *dev;
 	struct rio_pw_regs __iomem *pw_regs;
 	struct rio_port_write_msg port_write_msg;
diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c
index ffe0ee832768..c1826de4e749 100644
--- a/arch/powerpc/sysdev/fsl_rmu.c
+++ b/arch/powerpc/sysdev/fsl_rmu.c
@@ -481,14 +481,14 @@ pw_done:
 static void fsl_pw_dpc(struct work_struct *work)
 {
 	struct fsl_rio_pw *pw = container_of(work, struct fsl_rio_pw, pw_work);
-	u32 msg_buffer[RIO_PW_MSG_SIZE/sizeof(u32)];
+	union rio_pw_msg msg_buffer;
+	int i;
 
 	/*
 	 * Process port-write messages
 	 */
-	while (kfifo_out_spinlocked(&pw->pw_fifo, (unsigned char *)msg_buffer,
+	while (kfifo_out_spinlocked(&pw->pw_fifo, (unsigned char *)&msg_buffer,
 			 RIO_PW_MSG_SIZE, &pw->pw_fifo_lock)) {
-		/* Process one message */
 #ifdef DEBUG_PW
 		{
 		u32 i;
@@ -496,15 +496,19 @@ static void fsl_pw_dpc(struct work_struct *work)
 		for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32); i++) {
 			if ((i%4) == 0)
 				pr_debug("\n0x%02x: 0x%08x", i*4,
-					 msg_buffer[i]);
+					 msg_buffer.raw[i]);
 			else
-				pr_debug(" 0x%08x", msg_buffer[i]);
+				pr_debug(" 0x%08x", msg_buffer.raw[i]);
 		}
 		pr_debug("\n");
 		}
 #endif
 		/* Pass the port-write message to RIO core for processing */
-		rio_inb_pwrite_handler((union rio_pw_msg *)msg_buffer);
+		for (i = 0; i < MAX_PORT_NUM; i++) {
+			if (pw->mport[i])
+				rio_inb_pwrite_handler(pw->mport[i],
+						       &msg_buffer);
+		}
 	}
 }
 
diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
index db95d71ba4e9..5e1d52674e17 100644
--- a/drivers/rapidio/devices/tsi721.c
+++ b/drivers/rapidio/devices/tsi721.c
@@ -36,8 +36,6 @@
 
 #include "tsi721.h"
 
-#define DEBUG_PW	/* Inbound Port-Write debugging */
-
 static void tsi721_omsg_handler(struct tsi721_device *priv, int ch);
 static void tsi721_imsg_handler(struct tsi721_device *priv, int ch);
 
@@ -282,30 +280,15 @@ static void tsi721_pw_dpc(struct work_struct *work)
 {
 	struct tsi721_device *priv = container_of(work, struct tsi721_device,
 						    pw_work);
-	u32 msg_buffer[RIO_PW_MSG_SIZE/sizeof(u32)]; /* Use full size PW message
-							buffer for RIO layer */
+	union rio_pw_msg pwmsg;
 
 	/*
 	 * Process port-write messages
 	 */
-	while (kfifo_out_spinlocked(&priv->pw_fifo, (unsigned char *)msg_buffer,
+	while (kfifo_out_spinlocked(&priv->pw_fifo, (unsigned char *)&pwmsg,
 			 TSI721_RIO_PW_MSG_SIZE, &priv->pw_fifo_lock)) {
-		/* Process one message */
-#ifdef DEBUG_PW
-		{
-		u32 i;
-		pr_debug("%s : Port-Write Message:", __func__);
-		for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32); ) {
-			pr_debug("0x%02x: %08x %08x %08x %08x", i*4,
-				msg_buffer[i], msg_buffer[i + 1],
-				msg_buffer[i + 2], msg_buffer[i + 3]);
-			i += 4;
-		}
-		pr_debug("\n");
-		}
-#endif
 		/* Pass the port-write message to RIO core for processing */
-		rio_inb_pwrite_handler((union rio_pw_msg *)msg_buffer);
+		rio_inb_pwrite_handler(&priv->mport, &pwmsg);
 	}
 }
 
@@ -2702,6 +2685,7 @@ static void tsi721_remove(struct pci_dev *pdev)
 
 	tsi721_disable_ints(priv);
 	tsi721_free_irq(priv);
+	flush_scheduled_work();
 	rio_unregister_mport(&priv->mport);
 
 	tsi721_unregister_dma(priv);
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 673774bf6dd6..17973d3caa88 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -30,6 +30,20 @@
 
 #include "rio.h"
 
+/*
+ * struct rio_pwrite - RIO portwrite event
+ * @node:    Node in list of doorbell events
+ * @pwcback: Doorbell event callback
+ * @context: Handler specific context to pass on event
+ */
+struct rio_pwrite {
+	struct list_head node;
+
+	int (*pwcback)(struct rio_mport *mport, void *context,
+		       union rio_pw_msg *msg, int step);
+	void *context;
+};
+
 MODULE_DESCRIPTION("RapidIO Subsystem Core");
 MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>");
 MODULE_AUTHOR("Alexandre Bounine <alexandre.bounine@idt.com>");
@@ -514,7 +528,71 @@ int rio_release_outb_dbell(struct rio_dev *rdev, struct resource *res)
 }
 
 /**
- * rio_request_inb_pwrite - request inbound port-write message service
+ * rio_add_mport_pw_handler - add port-write message handler into the list
+ *                            of mport specific pw handlers
+ * @mport:   RIO master port to bind the portwrite callback
+ * @context: Handler specific context to pass on event
+ * @pwcback: Callback to execute when portwrite is received
+ *
+ * Returns 0 if the request has been satisfied.
+ */
+int rio_add_mport_pw_handler(struct rio_mport *mport, void *context,
+			     int (*pwcback)(struct rio_mport *mport,
+			     void *context, union rio_pw_msg *msg, int step))
+{
+	int rc = 0;
+	struct rio_pwrite *pwrite;
+
+	pwrite = kzalloc(sizeof(struct rio_pwrite), GFP_KERNEL);
+	if (!pwrite) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	pwrite->pwcback = pwcback;
+	pwrite->context = context;
+	mutex_lock(&mport->lock);
+	list_add_tail(&pwrite->node, &mport->pwrites);
+	mutex_unlock(&mport->lock);
+out:
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rio_add_mport_pw_handler);
+
+/**
+ * rio_del_mport_pw_handler - remove port-write message handler from the list
+ *                            of mport specific pw handlers
+ * @mport:   RIO master port to bind the portwrite callback
+ * @context: Registered handler specific context to pass on event
+ * @pwcback: Registered callback function
+ *
+ * Returns 0 if the request has been satisfied.
+ */
+int rio_del_mport_pw_handler(struct rio_mport *mport, void *context,
+			     int (*pwcback)(struct rio_mport *mport,
+			     void *context, union rio_pw_msg *msg, int step))
+{
+	int rc = -EINVAL;
+	struct rio_pwrite *pwrite;
+
+	mutex_lock(&mport->lock);
+	list_for_each_entry(pwrite, &mport->pwrites, node) {
+		if (pwrite->pwcback == pwcback && pwrite->context == context) {
+			list_del(&pwrite->node);
+			kfree(pwrite);
+			rc = 0;
+			break;
+		}
+	}
+	mutex_unlock(&mport->lock);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rio_del_mport_pw_handler);
+
+/**
+ * rio_request_inb_pwrite - request inbound port-write message service for
+ *                          specific RapidIO device
  * @rdev: RIO device to which register inbound port-write callback routine
  * @pwcback: Callback routine to execute when port-write is received
  *
@@ -539,6 +617,7 @@ EXPORT_SYMBOL_GPL(rio_request_inb_pwrite);
 
 /**
  * rio_release_inb_pwrite - release inbound port-write message service
+ *                          associated with specific RapidIO device
  * @rdev: RIO device which registered for inbound port-write callback
  *
  * Removes callback from the rio_dev structure. Returns 0 if the request
@@ -1002,52 +1081,66 @@ rd_err:
 }
 
 /**
- * rio_inb_pwrite_handler - process inbound port-write message
+ * rio_inb_pwrite_handler - inbound port-write message handler
+ * @mport:  mport device associated with port-write
  * @pw_msg: pointer to inbound port-write message
  *
  * Processes an inbound port-write message. Returns 0 if the request
  * has been satisfied.
  */
-int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg)
+int rio_inb_pwrite_handler(struct rio_mport *mport, union rio_pw_msg *pw_msg)
 {
 	struct rio_dev *rdev;
 	u32 err_status, em_perrdet, em_ltlerrdet;
 	int rc, portnum;
-
-	rdev = rio_get_comptag((pw_msg->em.comptag & RIO_CTAG_UDEVID), NULL);
-	if (rdev == NULL) {
-		/* Device removed or enumeration error */
-		pr_debug("RIO: %s No matching device for CTag 0x%08x\n",
-			__func__, pw_msg->em.comptag);
-		return -EIO;
-	}
-
-	pr_debug("RIO: Port-Write message from %s\n", rio_name(rdev));
+	struct rio_pwrite *pwrite;
 
 #ifdef DEBUG_PW
 	{
-	u32 i;
-	for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32);) {
+		u32 i;
+
+		pr_debug("%s: PW to mport_%d:\n", __func__, mport->id);
+		for (i = 0; i < RIO_PW_MSG_SIZE / sizeof(u32); i = i + 4) {
 			pr_debug("0x%02x: %08x %08x %08x %08x\n",
-				 i*4, pw_msg->raw[i], pw_msg->raw[i + 1],
-				 pw_msg->raw[i + 2], pw_msg->raw[i + 3]);
-			i += 4;
-	}
+				i * 4, pw_msg->raw[i], pw_msg->raw[i + 1],
+				pw_msg->raw[i + 2], pw_msg->raw[i + 3]);
+		}
 	}
 #endif
 
-	/* Call an external service function (if such is registered
-	 * for this device). This may be the service for endpoints that send
-	 * device-specific port-write messages. End-point messages expected
-	 * to be handled completely by EP specific device driver.
+	rdev = rio_get_comptag((pw_msg->em.comptag & RIO_CTAG_UDEVID), NULL);
+	if (rdev) {
+		pr_debug("RIO: Port-Write message from %s\n", rio_name(rdev));
+	} else {
+		pr_debug("RIO: %s No matching device for CTag 0x%08x\n",
+			__func__, pw_msg->em.comptag);
+	}
+
+	/* Call a device-specific handler (if it is registered for the device).
+	 * This may be the service for endpoints that send device-specific
+	 * port-write messages. End-point messages expected to be handled
+	 * completely by EP specific device driver.
 	 * For switches rc==0 signals that no standard processing required.
 	 */
-	if (rdev->pwcback != NULL) {
+	if (rdev && rdev->pwcback) {
 		rc = rdev->pwcback(rdev, pw_msg, 0);
 		if (rc == 0)
 			return 0;
 	}
 
+	mutex_lock(&mport->lock);
+	list_for_each_entry(pwrite, &mport->pwrites, node)
+		pwrite->pwcback(mport, pwrite->context, pw_msg, 0);
+	mutex_unlock(&mport->lock);
+
+	if (!rdev)
+		return 0;
+
+	/*
+	 * FIXME: The code below stays as it was before for now until we decide
+	 * how to do default PW handling in combination with per-mport callbacks
+	 */
+
 	portnum = pw_msg->em.is_port & 0xFF;
 
 	/* Check if device and route to it are functional:
@@ -2060,6 +2153,7 @@ int rio_mport_initialize(struct rio_mport *mport)
 	mport->nscan = NULL;
 	mutex_init(&mport->lock);
 	mport->pwe_refcnt = 0;
+	INIT_LIST_HEAD(&mport->pwrites);
 
 	return 0;
 }
diff --git a/include/linux/rio.h b/include/linux/rio.h
index cb3c47543bb0..44f3da512c15 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -245,6 +245,7 @@ enum rio_phy_type {
 /**
  * struct rio_mport - RIO master port info
  * @dbells: List of doorbell events
+ * @pwrites: List of portwrite events
  * @node: Node in global list of master ports
  * @nnode: Node in network list of master ports
  * @net: RIO net this mport is attached to
@@ -270,6 +271,7 @@ enum rio_phy_type {
  */
 struct rio_mport {
 	struct list_head dbells;	/* list of doorbell events */
+	struct list_head pwrites;	/* list of portwrite events */
 	struct list_head node;	/* node in global list of ports */
 	struct list_head nnode;	/* node in net list of ports */
 	struct rio_net *net;	/* RIO net this mport is attached to */
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 9fb2bcd0a987..5dff9a4cb675 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -374,7 +374,14 @@ extern void rio_unmap_inb_region(struct rio_mport *mport, dma_addr_t lstart);
 extern int rio_request_inb_pwrite(struct rio_dev *,
 			int (*)(struct rio_dev *, union rio_pw_msg*, int));
 extern int rio_release_inb_pwrite(struct rio_dev *);
-extern int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg);
+extern int rio_add_mport_pw_handler(struct rio_mport *mport, void *dev_id,
+			int (*pwcback)(struct rio_mport *mport, void *dev_id,
+			union rio_pw_msg *msg, int step));
+extern int rio_del_mport_pw_handler(struct rio_mport *mport, void *dev_id,
+			int (*pwcback)(struct rio_mport *mport, void *dev_id,
+			union rio_pw_msg *msg, int step));
+extern int rio_inb_pwrite_handler(struct rio_mport *mport,
+				  union rio_pw_msg *pw_msg);
 extern void rio_pw_enable(struct rio_mport *mport, int enable);
 
 /* LDM support */
-- 
cgit v1.2.3


From 93bdaca5018c02ba838f8fe2178fab261e2c1e68 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Tue, 22 Mar 2016 14:26:50 -0700
Subject: rapidio: add outbound window support

Add RapidIO controller (mport) outbound window configuration operations.

This patch is a part of the original patch submitted by Li Yang:

   https://lists.ozlabs.org/pipermail/linuxppc-dev/2009-April/071210.html

For some reason the original part was not applied to mainline code
tree.  The inbound window mapping part has been applied later during
tsi721 mport driver submission.  Now goes the second part with
corresponding HW support.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio.c   | 50 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rio.h     |  5 +++++
 include/linux/rio_drv.h |  4 ++++
 3 files changed, 59 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 17973d3caa88..0dcaa660cba1 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -699,6 +699,56 @@ void rio_unmap_inb_region(struct rio_mport *mport, dma_addr_t lstart)
 }
 EXPORT_SYMBOL_GPL(rio_unmap_inb_region);
 
+/**
+ * rio_map_outb_region -- Map outbound memory region.
+ * @mport: Master port.
+ * @destid: destination id window points to
+ * @rbase: RIO base address window translates to
+ * @size: Size of the memory region
+ * @rflags: Flags for mapping.
+ * @local: physical address of memory region mapped
+ *
+ * Return: 0 -- Success.
+ *
+ * This function will create the mapping from RIO space to local memory.
+ */
+int rio_map_outb_region(struct rio_mport *mport, u16 destid, u64 rbase,
+			u32 size, u32 rflags, dma_addr_t *local)
+{
+	int rc = 0;
+	unsigned long flags;
+
+	if (!mport->ops->map_outb)
+		return -ENODEV;
+
+	spin_lock_irqsave(&rio_mmap_lock, flags);
+	rc = mport->ops->map_outb(mport, destid, rbase, size,
+		rflags, local);
+	spin_unlock_irqrestore(&rio_mmap_lock, flags);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rio_map_outb_region);
+
+/**
+ * rio_unmap_inb_region -- Unmap the inbound memory region
+ * @mport: Master port
+ * @destid: destination id mapping points to
+ * @rstart: RIO base address window translates to
+ */
+void rio_unmap_outb_region(struct rio_mport *mport, u16 destid, u64 rstart)
+{
+	unsigned long flags;
+
+	if (!mport->ops->unmap_outb)
+		return;
+
+	spin_lock_irqsave(&rio_mmap_lock, flags);
+	mport->ops->unmap_outb(mport, destid, rstart);
+	spin_unlock_irqrestore(&rio_mmap_lock, flags);
+}
+EXPORT_SYMBOL_GPL(rio_unmap_outb_region);
+
 /**
  * rio_mport_get_physefb - Helper function that returns register offset
  *                      for Physical Layer Extended Features Block.
diff --git a/include/linux/rio.h b/include/linux/rio.h
index 44f3da512c15..aa2323893e8d 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -400,6 +400,8 @@ struct rio_mport_attr {
  * @map_inb: Callback to map RapidIO address region into local memory space.
  * @unmap_inb: Callback to unmap RapidIO address region mapped with map_inb().
  * @query_mport: Callback to query mport device attributes.
+ * @map_outb: Callback to map outbound address region into local memory space.
+ * @unmap_outb: Callback to unmap outbound RapidIO address region.
  */
 struct rio_ops {
 	int (*lcread) (struct rio_mport *mport, int index, u32 offset, int len,
@@ -427,6 +429,9 @@ struct rio_ops {
 	void (*unmap_inb)(struct rio_mport *mport, dma_addr_t lstart);
 	int (*query_mport)(struct rio_mport *mport,
 			   struct rio_mport_attr *attr);
+	int (*map_outb)(struct rio_mport *mport, u16 destid, u64 rstart,
+			u32 size, u32 flags, dma_addr_t *laddr);
+	void (*unmap_outb)(struct rio_mport *mport, u16 destid, u64 rstart);
 };
 
 #define RIO_RESOURCE_MEM	0x00000100
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 5dff9a4cb675..0834264fb7f2 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -369,6 +369,10 @@ void rio_release_region(struct rio_dev *, int);
 extern int rio_map_inb_region(struct rio_mport *mport, dma_addr_t local,
 			u64 rbase, u32 size, u32 rflags);
 extern void rio_unmap_inb_region(struct rio_mport *mport, dma_addr_t lstart);
+extern int rio_map_outb_region(struct rio_mport *mport, u16 destid, u64 rbase,
+			u32 size, u32 rflags, dma_addr_t *local);
+extern void rio_unmap_outb_region(struct rio_mport *mport,
+				  u16 destid, u64 rstart);
 
 /* Port-Write management */
 extern int rio_request_inb_pwrite(struct rio_dev *,
-- 
cgit v1.2.3


From e8de370188d098bb49483c287b44925957c3c9b6 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Tue, 22 Mar 2016 14:27:08 -0700
Subject: rapidio: add mport char device driver

Add mport character device driver to provide user space interface to
basic RapidIO subsystem operations.

See included Documentation/rapidio/mport_cdev.txt for more details.

[akpm@linux-foundation.org: fix printk warning on i386]
[dan.carpenter@oracle.com: mport_cdev: fix some error codes]
Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Tested-by: Barry Wood <barry.wood@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Cc: Barry Wood <barry.wood@idt.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/rapidio/mport_cdev.txt     |  104 ++
 drivers/rapidio/Kconfig                  |    8 +
 drivers/rapidio/devices/Makefile         |    1 +
 drivers/rapidio/devices/rio_mport_cdev.c | 2720 ++++++++++++++++++++++++++++++
 include/linux/rio_mport_cdev.h           |  271 +++
 include/uapi/linux/Kbuild                |    1 +
 6 files changed, 3105 insertions(+)
 create mode 100644 Documentation/rapidio/mport_cdev.txt
 create mode 100644 drivers/rapidio/devices/rio_mport_cdev.c
 create mode 100644 include/linux/rio_mport_cdev.h

(limited to 'include/linux')

diff --git a/Documentation/rapidio/mport_cdev.txt b/Documentation/rapidio/mport_cdev.txt
new file mode 100644
index 000000000000..20c120d4b3b8
--- /dev/null
+++ b/Documentation/rapidio/mport_cdev.txt
@@ -0,0 +1,104 @@
+RapidIO subsystem mport character device driver (rio_mport_cdev.c)
+==================================================================
+
+Version History:
+----------------
+  1.0.0 - Initial driver release.
+
+==================================================================
+
+I. Overview
+
+This device driver is the result of collaboration within the RapidIO.org
+Software Task Group (STG) between Texas Instruments, Freescale,
+Prodrive Technologies, Nokia Networks, BAE and IDT.  Additional input was
+received from other members of RapidIO.org. The objective was to create a
+character mode driver interface which exposes the capabilities of RapidIO
+devices directly to applications, in a manner that allows the numerous and
+varied RapidIO implementations to interoperate.
+
+This driver (MPORT_CDEV) provides access to basic RapidIO subsystem operations
+for user-space applications. Most of RapidIO operations are supported through
+'ioctl' system calls.
+
+When loaded this device driver creates filesystem nodes named rio_mportX in /dev
+directory for each registered RapidIO mport device. 'X' in the node name matches
+to unique port ID assigned to each local mport device.
+
+Using available set of ioctl commands user-space applications can perform
+following RapidIO bus and subsystem operations:
+
+- Reads and writes from/to configuration registers of mport devices
+    (RIO_MPORT_MAINT_READ_LOCAL/RIO_MPORT_MAINT_WRITE_LOCAL)
+- Reads and writes from/to configuration registers of remote RapidIO devices.
+  This operations are defined as RapidIO Maintenance reads/writes in RIO spec.
+    (RIO_MPORT_MAINT_READ_REMOTE/RIO_MPORT_MAINT_WRITE_REMOTE)
+- Set RapidIO Destination ID for mport devices (RIO_MPORT_MAINT_HDID_SET)
+- Set RapidIO Component Tag for mport devices (RIO_MPORT_MAINT_COMPTAG_SET)
+- Query logical index of mport devices (RIO_MPORT_MAINT_PORT_IDX_GET)
+- Query capabilities and RapidIO link configuration of mport devices
+    (RIO_MPORT_GET_PROPERTIES)
+- Enable/Disable reporting of RapidIO doorbell events to user-space applications
+    (RIO_ENABLE_DOORBELL_RANGE/RIO_DISABLE_DOORBELL_RANGE)
+- Enable/Disable reporting of RIO port-write events to user-space applications
+    (RIO_ENABLE_PORTWRITE_RANGE/RIO_DISABLE_PORTWRITE_RANGE)
+- Query/Control type of events reported through this driver: doorbells,
+  port-writes or both (RIO_SET_EVENT_MASK/RIO_GET_EVENT_MASK)
+- Configure/Map mport's outbound requests window(s) for specific size,
+  RapidIO destination ID, hopcount and request type
+    (RIO_MAP_OUTBOUND/RIO_UNMAP_OUTBOUND)
+- Configure/Map mport's inbound requests window(s) for specific size,
+  RapidIO base address and local memory base address
+    (RIO_MAP_INBOUND/RIO_UNMAP_INBOUND)
+- Allocate/Free contiguous DMA coherent memory buffer for DMA data transfers
+  to/from remote RapidIO devices (RIO_ALLOC_DMA/RIO_FREE_DMA)
+- Initiate DMA data transfers to/from remote RapidIO devices (RIO_TRANSFER).
+  Supports blocking, asynchronous and posted (a.k.a 'fire-and-forget') data
+  transfer modes.
+- Check/Wait for completion of asynchronous DMA data transfer
+    (RIO_WAIT_FOR_ASYNC)
+- Manage device objects supported by RapidIO subsystem (RIO_DEV_ADD/RIO_DEV_DEL).
+  This allows implementation of various RapidIO fabric enumeration algorithms
+  as user-space applications while using remaining functionality provided by
+  kernel RapidIO subsystem.
+
+II. Hardware Compatibility
+
+This device driver uses standard interfaces defined by kernel RapidIO subsystem
+and therefore it can be used with any mport device driver registered by RapidIO
+subsystem with limitations set by available mport implementation.
+
+At this moment the most common limitation is availability of RapidIO-specific
+DMA engine framework for specific mport device. Users should verify available
+functionality of their platform when planning to use this driver:
+
+- IDT Tsi721 PCIe-to-RapidIO bridge device and its mport device driver are fully
+  compatible with this driver.
+- Freescale SoCs 'fsl_rio' mport driver does not have implementation for RapidIO
+  specific DMA engine support and therefore DMA data transfers mport_cdev driver
+  are not available.
+
+III. Module parameters
+
+- 'dbg_level' - This parameter allows to control amount of debug information
+        generated by this device driver. This parameter is formed by set of
+        This parameter can be changed bit masks that correspond to the specific
+        functional block.
+        For mask definitions see 'drivers/rapidio/devices/rio_mport_cdev.c'
+        This parameter can be changed dynamically.
+        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
+
+IV. Known problems
+
+  None.
+
+V. User-space Applications and API
+
+API library and applications that use this device driver are available from
+RapidIO.org.
+
+VI. TODO List
+
+- Add support for sending/receiving "raw" RapidIO messaging packets.
+- Add memory mapped DMA data transfers as an option when RapidIO-specific DMA
+  is not available.
diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig
index 3e3be57e9a1a..b5a10d3c92c7 100644
--- a/drivers/rapidio/Kconfig
+++ b/drivers/rapidio/Kconfig
@@ -67,6 +67,14 @@ config RAPIDIO_ENUM_BASIC
 
 endchoice
 
+config RAPIDIO_MPORT_CDEV
+	tristate "RapidIO /dev mport device driver"
+	depends on RAPIDIO
+	help
+	  This option includes generic RapidIO mport device driver which
+	  allows to user space applications to perform RapidIO-specific
+	  operations through selected RapidIO mport.
+
 menu "RapidIO Switch drivers"
 	depends on RAPIDIO
 
diff --git a/drivers/rapidio/devices/Makefile b/drivers/rapidio/devices/Makefile
index 9432c494cf57..927dbf89592b 100644
--- a/drivers/rapidio/devices/Makefile
+++ b/drivers/rapidio/devices/Makefile
@@ -5,3 +5,4 @@
 obj-$(CONFIG_RAPIDIO_TSI721)	+= tsi721_mport.o
 tsi721_mport-y			:= tsi721.o
 tsi721_mport-$(CONFIG_RAPIDIO_DMA_ENGINE) += tsi721_dma.o
+obj-$(CONFIG_RAPIDIO_MPORT_CDEV) += rio_mport_cdev.o
diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c
new file mode 100644
index 000000000000..9607bc826460
--- /dev/null
+++ b/drivers/rapidio/devices/rio_mport_cdev.c
@@ -0,0 +1,2720 @@
+/*
+ * RapidIO mport character device
+ *
+ * Copyright 2014-2015 Integrated Device Technology, Inc.
+ *    Alexandre Bounine <alexandre.bounine@idt.com>
+ * Copyright 2014-2015 Prodrive Technologies
+ *    Andre van Herk <andre.van.herk@prodrive-technologies.com>
+ *    Jerry Jacobs <jerry.jacobs@prodrive-technologies.com>
+ * Copyright (C) 2014 Texas Instruments Incorporated
+ *    Aurelien Jacquiot <a-jacquiot@ti.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/cdev.h>
+#include <linux/ioctl.h>
+#include <linux/uaccess.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+#include <linux/err.h>
+#include <linux/net.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/kfifo.h>
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mman.h>
+
+#include <linux/dma-mapping.h>
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+#include <linux/dmaengine.h>
+#endif
+
+#include <linux/rio.h>
+#include <linux/rio_ids.h>
+#include <linux/rio_drv.h>
+#include <linux/rio_mport_cdev.h>
+
+#include "../rio.h"
+
+#define DRV_NAME	"rio_mport"
+#define DRV_PREFIX	DRV_NAME ": "
+#define DEV_NAME	"rio_mport"
+#define DRV_VERSION     "1.0.0"
+
+/* Debug output filtering masks */
+enum {
+	DBG_NONE	= 0,
+	DBG_INIT	= BIT(0), /* driver init */
+	DBG_EXIT	= BIT(1), /* driver exit */
+	DBG_MPORT	= BIT(2), /* mport add/remove */
+	DBG_RDEV	= BIT(3), /* RapidIO device add/remove */
+	DBG_DMA		= BIT(4), /* DMA transfer messages */
+	DBG_MMAP	= BIT(5), /* mapping messages */
+	DBG_IBW		= BIT(6), /* inbound window */
+	DBG_EVENT	= BIT(7), /* event handling messages */
+	DBG_OBW		= BIT(8), /* outbound window messages */
+	DBG_DBELL	= BIT(9), /* doorbell messages */
+	DBG_ALL		= ~0,
+};
+
+#ifdef DEBUG
+#define rmcd_debug(level, fmt, arg...)		\
+	do {					\
+		if (DBG_##level & dbg_level)	\
+			pr_debug(DRV_PREFIX "%s: " fmt "\n", __func__, ##arg); \
+	} while (0)
+#else
+#define rmcd_debug(level, fmt, arg...) \
+		no_printk(KERN_DEBUG pr_fmt(DRV_PREFIX fmt "\n"), ##arg)
+#endif
+
+#define rmcd_warn(fmt, arg...) \
+	pr_warn(DRV_PREFIX "%s WARNING " fmt "\n", __func__, ##arg)
+
+#define rmcd_error(fmt, arg...) \
+	pr_err(DRV_PREFIX "%s ERROR " fmt "\n", __func__, ##arg)
+
+MODULE_AUTHOR("Jerry Jacobs <jerry.jacobs@prodrive-technologies.com>");
+MODULE_AUTHOR("Aurelien Jacquiot <a-jacquiot@ti.com>");
+MODULE_AUTHOR("Alexandre Bounine <alexandre.bounine@idt.com>");
+MODULE_AUTHOR("Andre van Herk <andre.van.herk@prodrive-technologies.com>");
+MODULE_DESCRIPTION("RapidIO mport character device driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static int dma_timeout = 3000; /* DMA transfer timeout in msec */
+module_param(dma_timeout, int, S_IRUGO);
+MODULE_PARM_DESC(dma_timeout, "DMA Transfer Timeout in msec (default: 3000)");
+
+#ifdef DEBUG
+static u32 dbg_level = DBG_NONE;
+module_param(dbg_level, uint, S_IWUSR | S_IWGRP | S_IRUGO);
+MODULE_PARM_DESC(dbg_level, "Debugging output level (default 0 = none)");
+#endif
+
+/*
+ * An internal DMA coherent buffer
+ */
+struct mport_dma_buf {
+	void		*ib_base;
+	dma_addr_t	ib_phys;
+	u32		ib_size;
+	u64		ib_rio_base;
+	bool		ib_map;
+	struct file	*filp;
+};
+
+/*
+ * Internal memory mapping structure
+ */
+enum rio_mport_map_dir {
+	MAP_INBOUND,
+	MAP_OUTBOUND,
+	MAP_DMA,
+};
+
+struct rio_mport_mapping {
+	struct list_head node;
+	struct mport_dev *md;
+	enum rio_mport_map_dir dir;
+	u32 rioid;
+	u64 rio_addr;
+	dma_addr_t phys_addr; /* for mmap */
+	void *virt_addr; /* kernel address, for dma_free_coherent */
+	u64 size;
+	struct kref ref; /* refcount of vmas sharing the mapping */
+	struct file *filp;
+};
+
+struct rio_mport_dma_map {
+	int valid;
+	uint64_t length;
+	void *vaddr;
+	dma_addr_t paddr;
+};
+
+#define MPORT_MAX_DMA_BUFS	16
+#define MPORT_EVENT_DEPTH	10
+
+/*
+ * mport_dev  driver-specific structure that represents mport device
+ * @active    mport device status flag
+ * @node      list node to maintain list of registered mports
+ * @cdev      character device
+ * @dev       associated device object
+ * @mport     associated subsystem's master port device object
+ * @buf_mutex lock for buffer handling
+ * @file_mutex - lock for open files list
+ * @file_list  - list of open files on given mport
+ * @properties properties of this mport
+ * @portwrites queue of inbound portwrites
+ * @pw_lock    lock for port write queue
+ * @mappings   queue for memory mappings
+ * @dma_chan   DMA channels associated with this device
+ * @dma_ref:
+ * @comp:
+ */
+struct mport_dev {
+	atomic_t		active;
+	struct list_head	node;
+	struct cdev		cdev;
+	struct device		dev;
+	struct rio_mport	*mport;
+	struct mutex		buf_mutex;
+	struct mutex		file_mutex;
+	struct list_head	file_list;
+	struct rio_mport_properties	properties;
+	struct list_head		doorbells;
+	spinlock_t			db_lock;
+	struct list_head		portwrites;
+	spinlock_t			pw_lock;
+	struct list_head	mappings;
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	struct dma_chan *dma_chan;
+	struct kref	dma_ref;
+	struct completion comp;
+#endif
+};
+
+/*
+ * mport_cdev_priv - data structure specific to individual file object
+ *                   associated with an open device
+ * @md    master port character device object
+ * @async_queue - asynchronous notification queue
+ * @list - file objects tracking list
+ * @db_filters    inbound doorbell filters for this descriptor
+ * @pw_filters    portwrite filters for this descriptor
+ * @event_fifo    event fifo for this descriptor
+ * @event_rx_wait wait queue for this descriptor
+ * @fifo_lock     lock for event_fifo
+ * @event_mask    event mask for this descriptor
+ * @dmach DMA engine channel allocated for specific file object
+ */
+struct mport_cdev_priv {
+	struct mport_dev	*md;
+	struct fasync_struct	*async_queue;
+	struct list_head	list;
+	struct list_head	db_filters;
+	struct list_head        pw_filters;
+	struct kfifo            event_fifo;
+	wait_queue_head_t       event_rx_wait;
+	spinlock_t              fifo_lock;
+	unsigned int            event_mask; /* RIO_DOORBELL, RIO_PORTWRITE */
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	struct dma_chan		*dmach;
+	struct list_head	async_list;
+	struct list_head	pend_list;
+	spinlock_t              req_lock;
+	struct mutex		dma_lock;
+	struct kref		dma_ref;
+	struct completion	comp;
+#endif
+};
+
+/*
+ * rio_mport_pw_filter - structure to describe a portwrite filter
+ * md_node   node in mport device's list
+ * priv_node node in private file object's list
+ * priv      reference to private data
+ * filter    actual portwrite filter
+ */
+struct rio_mport_pw_filter {
+	struct list_head md_node;
+	struct list_head priv_node;
+	struct mport_cdev_priv *priv;
+	struct rio_pw_filter filter;
+};
+
+/*
+ * rio_mport_db_filter - structure to describe a doorbell filter
+ * @data_node reference to device node
+ * @priv_node node in private data
+ * @priv      reference to private data
+ * @filter    actual doorbell filter
+ */
+struct rio_mport_db_filter {
+	struct list_head data_node;
+	struct list_head priv_node;
+	struct mport_cdev_priv *priv;
+	struct rio_doorbell_filter filter;
+};
+
+static LIST_HEAD(mport_devs);
+static DEFINE_MUTEX(mport_devs_lock);
+
+#if (0) /* used by commented out portion of poll function : FIXME */
+static DECLARE_WAIT_QUEUE_HEAD(mport_cdev_wait);
+#endif
+
+static struct class *dev_class;
+static dev_t dev_number;
+
+static struct workqueue_struct *dma_wq;
+
+static void mport_release_mapping(struct kref *ref);
+
+static int rio_mport_maint_rd(struct mport_cdev_priv *priv, void __user *arg,
+			      int local)
+{
+	struct rio_mport *mport = priv->md->mport;
+	struct rio_mport_maint_io maint_io;
+	u32 *buffer;
+	u32 offset;
+	size_t length;
+	int ret, i;
+
+	if (unlikely(copy_from_user(&maint_io, arg, sizeof(maint_io))))
+		return -EFAULT;
+
+	if ((maint_io.offset % 4) ||
+	    (maint_io.length == 0) || (maint_io.length % 4))
+		return -EINVAL;
+
+	buffer = vmalloc(maint_io.length);
+	if (buffer == NULL)
+		return -ENOMEM;
+	length = maint_io.length/sizeof(u32);
+	offset = maint_io.offset;
+
+	for (i = 0; i < length; i++) {
+		if (local)
+			ret = __rio_local_read_config_32(mport,
+				offset, &buffer[i]);
+		else
+			ret = rio_mport_read_config_32(mport, maint_io.rioid,
+				maint_io.hopcount, offset, &buffer[i]);
+		if (ret)
+			goto out;
+
+		offset += 4;
+	}
+
+	if (unlikely(copy_to_user(maint_io.buffer, buffer, maint_io.length)))
+		ret = -EFAULT;
+out:
+	vfree(buffer);
+	return ret;
+}
+
+static int rio_mport_maint_wr(struct mport_cdev_priv *priv, void __user *arg,
+			      int local)
+{
+	struct rio_mport *mport = priv->md->mport;
+	struct rio_mport_maint_io maint_io;
+	u32 *buffer;
+	u32 offset;
+	size_t length;
+	int ret = -EINVAL, i;
+
+	if (unlikely(copy_from_user(&maint_io, arg, sizeof(maint_io))))
+		return -EFAULT;
+
+	if ((maint_io.offset % 4) ||
+	    (maint_io.length == 0) || (maint_io.length % 4))
+		return -EINVAL;
+
+	buffer = vmalloc(maint_io.length);
+	if (buffer == NULL)
+		return -ENOMEM;
+	length = maint_io.length;
+
+	if (unlikely(copy_from_user(buffer, maint_io.buffer, length))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	offset = maint_io.offset;
+	length /= sizeof(u32);
+
+	for (i = 0; i < length; i++) {
+		if (local)
+			ret = __rio_local_write_config_32(mport,
+							  offset, buffer[i]);
+		else
+			ret = rio_mport_write_config_32(mport, maint_io.rioid,
+							maint_io.hopcount,
+							offset, buffer[i]);
+		if (ret)
+			goto out;
+
+		offset += 4;
+	}
+
+out:
+	vfree(buffer);
+	return ret;
+}
+
+
+/*
+ * Inbound/outbound memory mapping functions
+ */
+static int
+rio_mport_create_outbound_mapping(struct mport_dev *md, struct file *filp,
+				  u32 rioid, u64 raddr, u32 size,
+				  dma_addr_t *paddr)
+{
+	struct rio_mport *mport = md->mport;
+	struct rio_mport_mapping *map;
+	int ret;
+
+	rmcd_debug(OBW, "did=%d ra=0x%llx sz=0x%x", rioid, raddr, size);
+
+	map = kzalloc(sizeof(struct rio_mport_mapping), GFP_KERNEL);
+	if (map == NULL)
+		return -ENOMEM;
+
+	ret = rio_map_outb_region(mport, rioid, raddr, size, 0, paddr);
+	if (ret < 0)
+		goto err_map_outb;
+
+	map->dir = MAP_OUTBOUND;
+	map->rioid = rioid;
+	map->rio_addr = raddr;
+	map->size = size;
+	map->phys_addr = *paddr;
+	map->filp = filp;
+	map->md = md;
+	kref_init(&map->ref);
+	list_add_tail(&map->node, &md->mappings);
+	return 0;
+err_map_outb:
+	kfree(map);
+	return ret;
+}
+
+static int
+rio_mport_get_outbound_mapping(struct mport_dev *md, struct file *filp,
+			       u32 rioid, u64 raddr, u32 size,
+			       dma_addr_t *paddr)
+{
+	struct rio_mport_mapping *map;
+	int err = -ENOMEM;
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry(map, &md->mappings, node) {
+		if (map->dir != MAP_OUTBOUND)
+			continue;
+		if (rioid == map->rioid &&
+		    raddr == map->rio_addr && size == map->size) {
+			*paddr = map->phys_addr;
+			err = 0;
+			break;
+		} else if (rioid == map->rioid &&
+			   raddr < (map->rio_addr + map->size - 1) &&
+			   (raddr + size) > map->rio_addr) {
+			err = -EBUSY;
+			break;
+		}
+	}
+
+	/* If not found, create new */
+	if (err == -ENOMEM)
+		err = rio_mport_create_outbound_mapping(md, filp, rioid, raddr,
+						size, paddr);
+	mutex_unlock(&md->buf_mutex);
+	return err;
+}
+
+static int rio_mport_obw_map(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *data = priv->md;
+	struct rio_mmap map;
+	dma_addr_t paddr;
+	int ret;
+
+	if (unlikely(copy_from_user(&map, arg, sizeof(struct rio_mmap))))
+		return -EFAULT;
+
+	rmcd_debug(OBW, "did=%d ra=0x%llx sz=0x%llx",
+		   map.rioid, map.rio_addr, map.length);
+
+	ret = rio_mport_get_outbound_mapping(data, filp, map.rioid,
+					     map.rio_addr, map.length, &paddr);
+	if (ret < 0) {
+		rmcd_error("Failed to set OBW err= %d", ret);
+		return ret;
+	}
+
+	map.handle = paddr;
+
+	if (unlikely(copy_to_user(arg, &map, sizeof(struct rio_mmap))))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * rio_mport_obw_free() - unmap an OutBound Window from RapidIO address space
+ *
+ * @priv: driver private data
+ * @arg:  buffer handle returned by allocation routine
+ */
+static int rio_mport_obw_free(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	u64 handle;
+	struct rio_mport_mapping *map, *_map;
+
+	if (!md->mport->ops->unmap_outb)
+		return -EPROTONOSUPPORT;
+
+	if (copy_from_user(&handle, arg, sizeof(u64)))
+		return -EFAULT;
+
+	rmcd_debug(OBW, "h=0x%llx", handle);
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry_safe(map, _map, &md->mappings, node) {
+		if (map->dir == MAP_OUTBOUND && map->phys_addr == handle) {
+			if (map->filp == filp) {
+				rmcd_debug(OBW, "kref_put h=0x%llx", handle);
+				map->filp = NULL;
+				kref_put(&map->ref, mport_release_mapping);
+			}
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	return 0;
+}
+
+/*
+ * maint_hdid_set() - Set the host Device ID
+ * @priv: driver private data
+ * @arg:	Device Id
+ */
+static int maint_hdid_set(struct mport_cdev_priv *priv, void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	uint16_t hdid;
+
+	if (copy_from_user(&hdid, arg, sizeof(uint16_t)))
+		return -EFAULT;
+
+	md->mport->host_deviceid = hdid;
+	md->properties.hdid = hdid;
+	rio_local_set_device_id(md->mport, hdid);
+
+	rmcd_debug(MPORT, "Set host device Id to %d", hdid);
+
+	return 0;
+}
+
+/*
+ * maint_comptag_set() - Set the host Component Tag
+ * @priv: driver private data
+ * @arg:	Component Tag
+ */
+static int maint_comptag_set(struct mport_cdev_priv *priv, void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	uint32_t comptag;
+
+	if (copy_from_user(&comptag, arg, sizeof(uint32_t)))
+		return -EFAULT;
+
+	rio_local_write_config_32(md->mport, RIO_COMPONENT_TAG_CSR, comptag);
+
+	rmcd_debug(MPORT, "Set host Component Tag to %d", comptag);
+
+	return 0;
+}
+
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+
+struct mport_dma_req {
+	struct list_head node;
+	struct file *filp;
+	struct mport_cdev_priv *priv;
+	enum rio_transfer_sync sync;
+	struct sg_table sgt;
+	struct page **page_list;
+	unsigned int nr_pages;
+	struct rio_mport_mapping *map;
+	struct dma_chan *dmach;
+	enum dma_data_direction dir;
+	dma_cookie_t cookie;
+	enum dma_status	status;
+	struct completion req_comp;
+};
+
+struct mport_faf_work {
+	struct work_struct work;
+	struct mport_dma_req *req;
+};
+
+static void mport_release_def_dma(struct kref *dma_ref)
+{
+	struct mport_dev *md =
+			container_of(dma_ref, struct mport_dev, dma_ref);
+
+	rmcd_debug(EXIT, "DMA_%d", md->dma_chan->chan_id);
+	rio_release_dma(md->dma_chan);
+	md->dma_chan = NULL;
+}
+
+static void mport_release_dma(struct kref *dma_ref)
+{
+	struct mport_cdev_priv *priv =
+			container_of(dma_ref, struct mport_cdev_priv, dma_ref);
+
+	rmcd_debug(EXIT, "DMA_%d", priv->dmach->chan_id);
+	complete(&priv->comp);
+}
+
+static void dma_req_free(struct mport_dma_req *req)
+{
+	struct mport_cdev_priv *priv = req->priv;
+	unsigned int i;
+
+	dma_unmap_sg(req->dmach->device->dev,
+		     req->sgt.sgl, req->sgt.nents, req->dir);
+	sg_free_table(&req->sgt);
+	if (req->page_list) {
+		for (i = 0; i < req->nr_pages; i++)
+			put_page(req->page_list[i]);
+		kfree(req->page_list);
+	}
+
+	if (req->map) {
+		mutex_lock(&req->map->md->buf_mutex);
+		kref_put(&req->map->ref, mport_release_mapping);
+		mutex_unlock(&req->map->md->buf_mutex);
+	}
+
+	kref_put(&priv->dma_ref, mport_release_dma);
+
+	kfree(req);
+}
+
+static void dma_xfer_callback(void *param)
+{
+	struct mport_dma_req *req = (struct mport_dma_req *)param;
+	struct mport_cdev_priv *priv = req->priv;
+
+	req->status = dma_async_is_tx_complete(priv->dmach, req->cookie,
+					       NULL, NULL);
+	complete(&req->req_comp);
+}
+
+static void dma_faf_cleanup(struct work_struct *_work)
+{
+	struct mport_faf_work *work = container_of(_work,
+						struct mport_faf_work, work);
+	struct mport_dma_req *req = work->req;
+
+	dma_req_free(req);
+	kfree(work);
+}
+
+static void dma_faf_callback(void *param)
+{
+	struct mport_dma_req *req = (struct mport_dma_req *)param;
+	struct mport_faf_work *work;
+
+	work = kmalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return;
+
+	INIT_WORK(&work->work, dma_faf_cleanup);
+	work->req = req;
+	queue_work(dma_wq, &work->work);
+}
+
+/*
+ * prep_dma_xfer() - Configure and send request to DMAengine to prepare DMA
+ *                   transfer object.
+ * Returns pointer to DMA transaction descriptor allocated by DMA driver on
+ * success or ERR_PTR (and/or NULL) if failed. Caller must check returned
+ * non-NULL pointer using IS_ERR macro.
+ */
+static struct dma_async_tx_descriptor
+*prep_dma_xfer(struct dma_chan *chan, struct rio_transfer_io *transfer,
+	struct sg_table *sgt, int nents, enum dma_transfer_direction dir,
+	enum dma_ctrl_flags flags)
+{
+	struct rio_dma_data tx_data;
+
+	tx_data.sg = sgt->sgl;
+	tx_data.sg_len = nents;
+	tx_data.rio_addr_u = 0;
+	tx_data.rio_addr = transfer->rio_addr;
+	if (dir == DMA_MEM_TO_DEV) {
+		switch (transfer->method) {
+		case RIO_EXCHANGE_NWRITE:
+			tx_data.wr_type = RDW_ALL_NWRITE;
+			break;
+		case RIO_EXCHANGE_NWRITE_R_ALL:
+			tx_data.wr_type = RDW_ALL_NWRITE_R;
+			break;
+		case RIO_EXCHANGE_NWRITE_R:
+			tx_data.wr_type = RDW_LAST_NWRITE_R;
+			break;
+		case RIO_EXCHANGE_DEFAULT:
+			tx_data.wr_type = RDW_DEFAULT;
+			break;
+		default:
+			return ERR_PTR(-EINVAL);
+		}
+	}
+
+	return rio_dma_prep_xfer(chan, transfer->rioid, &tx_data, dir, flags);
+}
+
+/* Request DMA channel associated with this mport device.
+ * Try to request DMA channel for every new process that opened given
+ * mport. If a new DMA channel is not available use default channel
+ * which is the first DMA channel opened on mport device.
+ */
+static int get_dma_channel(struct mport_cdev_priv *priv)
+{
+	mutex_lock(&priv->dma_lock);
+	if (!priv->dmach) {
+		priv->dmach = rio_request_mport_dma(priv->md->mport);
+		if (!priv->dmach) {
+			/* Use default DMA channel if available */
+			if (priv->md->dma_chan) {
+				priv->dmach = priv->md->dma_chan;
+				kref_get(&priv->md->dma_ref);
+			} else {
+				rmcd_error("Failed to get DMA channel");
+				mutex_unlock(&priv->dma_lock);
+				return -ENODEV;
+			}
+		} else if (!priv->md->dma_chan) {
+			/* Register default DMA channel if we do not have one */
+			priv->md->dma_chan = priv->dmach;
+			kref_init(&priv->md->dma_ref);
+			rmcd_debug(DMA, "Register DMA_chan %d as default",
+				   priv->dmach->chan_id);
+		}
+
+		kref_init(&priv->dma_ref);
+		init_completion(&priv->comp);
+	}
+
+	kref_get(&priv->dma_ref);
+	mutex_unlock(&priv->dma_lock);
+	return 0;
+}
+
+static void put_dma_channel(struct mport_cdev_priv *priv)
+{
+	kref_put(&priv->dma_ref, mport_release_dma);
+}
+
+/*
+ * DMA transfer functions
+ */
+static int do_dma_request(struct mport_dma_req *req,
+			  struct rio_transfer_io *xfer,
+			  enum rio_transfer_sync sync, int nents)
+{
+	struct mport_cdev_priv *priv;
+	struct sg_table *sgt;
+	struct dma_chan *chan;
+	struct dma_async_tx_descriptor *tx;
+	dma_cookie_t cookie;
+	unsigned long tmo = msecs_to_jiffies(dma_timeout);
+	enum dma_transfer_direction dir;
+	long wret;
+	int ret = 0;
+
+	priv = req->priv;
+	sgt = &req->sgt;
+
+	chan = priv->dmach;
+	dir = (req->dir == DMA_FROM_DEVICE) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
+
+	rmcd_debug(DMA, "%s(%d) uses %s for DMA_%s",
+		   current->comm, task_pid_nr(current),
+		   dev_name(&chan->dev->device),
+		   (dir == DMA_DEV_TO_MEM)?"READ":"WRITE");
+
+	/* Initialize DMA transaction request */
+	tx = prep_dma_xfer(chan, xfer, sgt, nents, dir,
+			   DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+
+	if (!tx) {
+		rmcd_debug(DMA, "prep error for %s A:0x%llx L:0x%llx",
+			(dir == DMA_DEV_TO_MEM)?"READ":"WRITE",
+			xfer->rio_addr, xfer->length);
+		ret = -EIO;
+		goto err_out;
+	} else if (IS_ERR(tx)) {
+		ret = PTR_ERR(tx);
+		rmcd_debug(DMA, "prep error %d for %s A:0x%llx L:0x%llx", ret,
+			(dir == DMA_DEV_TO_MEM)?"READ":"WRITE",
+			xfer->rio_addr, xfer->length);
+		goto err_out;
+	}
+
+	if (sync == RIO_TRANSFER_FAF)
+		tx->callback = dma_faf_callback;
+	else
+		tx->callback = dma_xfer_callback;
+	tx->callback_param = req;
+
+	req->dmach = chan;
+	req->sync = sync;
+	req->status = DMA_IN_PROGRESS;
+	init_completion(&req->req_comp);
+
+	cookie = dmaengine_submit(tx);
+	req->cookie = cookie;
+
+	rmcd_debug(DMA, "pid=%d DMA_%s tx_cookie = %d", task_pid_nr(current),
+		   (dir == DMA_DEV_TO_MEM)?"READ":"WRITE", cookie);
+
+	if (dma_submit_error(cookie)) {
+		rmcd_error("submit err=%d (addr:0x%llx len:0x%llx)",
+			   cookie, xfer->rio_addr, xfer->length);
+		ret = -EIO;
+		goto err_out;
+	}
+
+	dma_async_issue_pending(chan);
+
+	if (sync == RIO_TRANSFER_ASYNC) {
+		spin_lock(&priv->req_lock);
+		list_add_tail(&req->node, &priv->async_list);
+		spin_unlock(&priv->req_lock);
+		return cookie;
+	} else if (sync == RIO_TRANSFER_FAF)
+		return 0;
+
+	wret = wait_for_completion_interruptible_timeout(&req->req_comp, tmo);
+
+	if (wret == 0) {
+		/* Timeout on wait occurred */
+		rmcd_error("%s(%d) timed out waiting for DMA_%s %d",
+		       current->comm, task_pid_nr(current),
+		       (dir == DMA_DEV_TO_MEM)?"READ":"WRITE", cookie);
+		return -ETIMEDOUT;
+	} else if (wret == -ERESTARTSYS) {
+		/* Wait_for_completion was interrupted by a signal but DMA may
+		 * be in progress
+		 */
+		rmcd_error("%s(%d) wait for DMA_%s %d was interrupted",
+			current->comm, task_pid_nr(current),
+			(dir == DMA_DEV_TO_MEM)?"READ":"WRITE", cookie);
+		return -EINTR;
+	}
+
+	if (req->status != DMA_COMPLETE) {
+		/* DMA transaction completion was signaled with error */
+		rmcd_error("%s(%d) DMA_%s %d completed with status %d (ret=%d)",
+			current->comm, task_pid_nr(current),
+			(dir == DMA_DEV_TO_MEM)?"READ":"WRITE",
+			cookie, req->status, ret);
+		ret = -EIO;
+	}
+
+err_out:
+	return ret;
+}
+
+/*
+ * rio_dma_transfer() - Perform RapidIO DMA data transfer to/from
+ *                      the remote RapidIO device
+ * @filp: file pointer associated with the call
+ * @transfer_mode: DMA transfer mode
+ * @sync: synchronization mode
+ * @dir: DMA transfer direction (DMA_MEM_TO_DEV = write OR
+ *                               DMA_DEV_TO_MEM = read)
+ * @xfer: data transfer descriptor structure
+ */
+static int
+rio_dma_transfer(struct file *filp, uint32_t transfer_mode,
+		 enum rio_transfer_sync sync, enum dma_data_direction dir,
+		 struct rio_transfer_io *xfer)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	unsigned long nr_pages = 0;
+	struct page **page_list = NULL;
+	struct mport_dma_req *req;
+	struct mport_dev *md = priv->md;
+	struct dma_chan *chan;
+	int i, ret;
+	int nents;
+
+	if (xfer->length == 0)
+		return -EINVAL;
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	ret = get_dma_channel(priv);
+	if (ret) {
+		kfree(req);
+		return ret;
+	}
+
+	/*
+	 * If parameter loc_addr != NULL, we are transferring data from/to
+	 * data buffer allocated in user-space: lock in memory user-space
+	 * buffer pages and build an SG table for DMA transfer request
+	 *
+	 * Otherwise (loc_addr == NULL) contiguous kernel-space buffer is
+	 * used for DMA data transfers: build single entry SG table using
+	 * offset within the internal buffer specified by handle parameter.
+	 */
+	if (xfer->loc_addr) {
+		unsigned long offset;
+		long pinned;
+
+		offset = (unsigned long)xfer->loc_addr & ~PAGE_MASK;
+		nr_pages = PAGE_ALIGN(xfer->length + offset) >> PAGE_SHIFT;
+
+		page_list = kmalloc_array(nr_pages,
+					  sizeof(*page_list), GFP_KERNEL);
+		if (page_list == NULL) {
+			ret = -ENOMEM;
+			goto err_req;
+		}
+
+		down_read(&current->mm->mmap_sem);
+		pinned = get_user_pages(current, current->mm,
+				(unsigned long)xfer->loc_addr & PAGE_MASK,
+				nr_pages, dir == DMA_FROM_DEVICE, 0,
+				page_list, NULL);
+		up_read(&current->mm->mmap_sem);
+
+		if (pinned != nr_pages) {
+			if (pinned < 0) {
+				rmcd_error("get_user_pages err=%ld", pinned);
+				nr_pages = 0;
+			} else
+				rmcd_error("pinned %ld out of %ld pages",
+					   pinned, nr_pages);
+			ret = -EFAULT;
+			goto err_pg;
+		}
+
+		ret = sg_alloc_table_from_pages(&req->sgt, page_list, nr_pages,
+					offset, xfer->length, GFP_KERNEL);
+		if (ret) {
+			rmcd_error("sg_alloc_table failed with err=%d", ret);
+			goto err_pg;
+		}
+
+		req->page_list = page_list;
+		req->nr_pages = nr_pages;
+	} else {
+		dma_addr_t baddr;
+		struct rio_mport_mapping *map;
+
+		baddr = (dma_addr_t)xfer->handle;
+
+		mutex_lock(&md->buf_mutex);
+		list_for_each_entry(map, &md->mappings, node) {
+			if (baddr >= map->phys_addr &&
+			    baddr < (map->phys_addr + map->size)) {
+				kref_get(&map->ref);
+				req->map = map;
+				break;
+			}
+		}
+		mutex_unlock(&md->buf_mutex);
+
+		if (req->map == NULL) {
+			ret = -ENOMEM;
+			goto err_req;
+		}
+
+		if (xfer->length + xfer->offset > map->size) {
+			ret = -EINVAL;
+			goto err_req;
+		}
+
+		ret = sg_alloc_table(&req->sgt, 1, GFP_KERNEL);
+		if (unlikely(ret)) {
+			rmcd_error("sg_alloc_table failed for internal buf");
+			goto err_req;
+		}
+
+		sg_set_buf(req->sgt.sgl,
+			   map->virt_addr + (baddr - map->phys_addr) +
+				xfer->offset, xfer->length);
+	}
+
+	req->dir = dir;
+	req->filp = filp;
+	req->priv = priv;
+	chan = priv->dmach;
+
+	nents = dma_map_sg(chan->device->dev,
+			   req->sgt.sgl, req->sgt.nents, dir);
+	if (nents == -EFAULT) {
+		rmcd_error("Failed to map SG list");
+		return -EFAULT;
+	}
+
+	ret = do_dma_request(req, xfer, sync, nents);
+
+	if (ret >= 0) {
+		if (sync == RIO_TRANSFER_SYNC)
+			goto sync_out;
+		return ret; /* return ASYNC cookie */
+	}
+
+	if (ret == -ETIMEDOUT || ret == -EINTR) {
+		/*
+		 * This can happen only in case of SYNC transfer.
+		 * Do not free unfinished request structure immediately.
+		 * Place it into pending list and deal with it later
+		 */
+		spin_lock(&priv->req_lock);
+		list_add_tail(&req->node, &priv->pend_list);
+		spin_unlock(&priv->req_lock);
+		return ret;
+	}
+
+
+	rmcd_debug(DMA, "do_dma_request failed with err=%d", ret);
+sync_out:
+	dma_unmap_sg(chan->device->dev, req->sgt.sgl, req->sgt.nents, dir);
+	sg_free_table(&req->sgt);
+err_pg:
+	if (page_list) {
+		for (i = 0; i < nr_pages; i++)
+			put_page(page_list[i]);
+		kfree(page_list);
+	}
+err_req:
+	if (req->map) {
+		mutex_lock(&md->buf_mutex);
+		kref_put(&req->map->ref, mport_release_mapping);
+		mutex_unlock(&md->buf_mutex);
+	}
+	put_dma_channel(priv);
+	kfree(req);
+	return ret;
+}
+
+static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct rio_transaction transaction;
+	struct rio_transfer_io *transfer;
+	enum dma_data_direction dir;
+	int i, ret = 0;
+
+	if (unlikely(copy_from_user(&transaction, arg, sizeof(transaction))))
+		return -EFAULT;
+
+	if (transaction.count != 1)
+		return -EINVAL;
+
+	if ((transaction.transfer_mode &
+	     priv->md->properties.transfer_mode) == 0)
+		return -ENODEV;
+
+	transfer = vmalloc(transaction.count * sizeof(struct rio_transfer_io));
+	if (!transfer)
+		return -ENOMEM;
+
+	if (unlikely(copy_from_user(transfer, transaction.block,
+	      transaction.count * sizeof(struct rio_transfer_io)))) {
+		ret = -EFAULT;
+		goto out_free;
+	}
+
+	dir = (transaction.dir == RIO_TRANSFER_DIR_READ) ?
+					DMA_FROM_DEVICE : DMA_TO_DEVICE;
+	for (i = 0; i < transaction.count && ret == 0; i++)
+		ret = rio_dma_transfer(filp, transaction.transfer_mode,
+			transaction.sync, dir, &transfer[i]);
+
+	if (unlikely(copy_to_user(transaction.block, transfer,
+	      transaction.count * sizeof(struct rio_transfer_io))))
+		ret = -EFAULT;
+
+out_free:
+	vfree(transfer);
+
+	return ret;
+}
+
+static int rio_mport_wait_for_async_dma(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv;
+	struct mport_dev *md;
+	struct rio_async_tx_wait w_param;
+	struct mport_dma_req *req;
+	dma_cookie_t cookie;
+	unsigned long tmo;
+	long wret;
+	int found = 0;
+	int ret;
+
+	priv = (struct mport_cdev_priv *)filp->private_data;
+	md = priv->md;
+
+	if (unlikely(copy_from_user(&w_param, arg, sizeof(w_param))))
+		return -EFAULT;
+
+	cookie = w_param.token;
+	if (w_param.timeout)
+		tmo = msecs_to_jiffies(w_param.timeout);
+	else /* Use default DMA timeout */
+		tmo = msecs_to_jiffies(dma_timeout);
+
+	spin_lock(&priv->req_lock);
+	list_for_each_entry(req, &priv->async_list, node) {
+		if (req->cookie == cookie) {
+			list_del(&req->node);
+			found = 1;
+			break;
+		}
+	}
+	spin_unlock(&priv->req_lock);
+
+	if (!found)
+		return -EAGAIN;
+
+	wret = wait_for_completion_interruptible_timeout(&req->req_comp, tmo);
+
+	if (wret == 0) {
+		/* Timeout on wait occurred */
+		rmcd_error("%s(%d) timed out waiting for ASYNC DMA_%s",
+		       current->comm, task_pid_nr(current),
+		       (req->dir == DMA_FROM_DEVICE)?"READ":"WRITE");
+		ret = -ETIMEDOUT;
+		goto err_tmo;
+	} else if (wret == -ERESTARTSYS) {
+		/* Wait_for_completion was interrupted by a signal but DMA may
+		 * be still in progress
+		 */
+		rmcd_error("%s(%d) wait for ASYNC DMA_%s was interrupted",
+			current->comm, task_pid_nr(current),
+			(req->dir == DMA_FROM_DEVICE)?"READ":"WRITE");
+		ret = -EINTR;
+		goto err_tmo;
+	}
+
+	if (req->status != DMA_COMPLETE) {
+		/* DMA transaction completion signaled with transfer error */
+		rmcd_error("%s(%d) ASYNC DMA_%s completion with status %d",
+			current->comm, task_pid_nr(current),
+			(req->dir == DMA_FROM_DEVICE)?"READ":"WRITE",
+			req->status);
+		ret = -EIO;
+	} else
+		ret = 0;
+
+	if (req->status != DMA_IN_PROGRESS && req->status != DMA_PAUSED)
+		dma_req_free(req);
+
+	return ret;
+
+err_tmo:
+	/* Return request back into async queue */
+	spin_lock(&priv->req_lock);
+	list_add_tail(&req->node, &priv->async_list);
+	spin_unlock(&priv->req_lock);
+	return ret;
+}
+
+static int rio_mport_create_dma_mapping(struct mport_dev *md, struct file *filp,
+			uint64_t size, struct rio_mport_mapping **mapping)
+{
+	struct rio_mport_mapping *map;
+
+	map = kzalloc(sizeof(struct rio_mport_mapping), GFP_KERNEL);
+	if (map == NULL)
+		return -ENOMEM;
+
+	map->virt_addr = dma_alloc_coherent(md->mport->dev.parent, size,
+					    &map->phys_addr, GFP_KERNEL);
+	if (map->virt_addr == NULL) {
+		kfree(map);
+		return -ENOMEM;
+	}
+
+	map->dir = MAP_DMA;
+	map->size = size;
+	map->filp = filp;
+	map->md = md;
+	kref_init(&map->ref);
+	mutex_lock(&md->buf_mutex);
+	list_add_tail(&map->node, &md->mappings);
+	mutex_unlock(&md->buf_mutex);
+	*mapping = map;
+
+	return 0;
+}
+
+static int rio_mport_alloc_dma(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	struct rio_dma_mem map;
+	struct rio_mport_mapping *mapping = NULL;
+	int ret;
+
+	if (unlikely(copy_from_user(&map, arg, sizeof(struct rio_dma_mem))))
+		return -EFAULT;
+
+	ret = rio_mport_create_dma_mapping(md, filp, map.length, &mapping);
+	if (ret)
+		return ret;
+
+	map.dma_handle = mapping->phys_addr;
+
+	if (unlikely(copy_to_user(arg, &map, sizeof(struct rio_dma_mem)))) {
+		mutex_lock(&md->buf_mutex);
+		kref_put(&mapping->ref, mport_release_mapping);
+		mutex_unlock(&md->buf_mutex);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int rio_mport_free_dma(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	u64 handle;
+	int ret = -EFAULT;
+	struct rio_mport_mapping *map, *_map;
+
+	if (copy_from_user(&handle, arg, sizeof(u64)))
+		return -EFAULT;
+	rmcd_debug(EXIT, "filp=%p", filp);
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry_safe(map, _map, &md->mappings, node) {
+		if (map->dir == MAP_DMA && map->phys_addr == handle &&
+		    map->filp == filp) {
+			kref_put(&map->ref, mport_release_mapping);
+			ret = 0;
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	if (ret == -EFAULT) {
+		rmcd_debug(DMA, "ERR no matching mapping");
+		return ret;
+	}
+
+	return 0;
+}
+#else
+static int rio_mport_transfer_ioctl(struct file *filp, void *arg)
+{
+	return -ENODEV;
+}
+
+static int rio_mport_wait_for_async_dma(struct file *filp, void __user *arg)
+{
+	return -ENODEV;
+}
+
+static int rio_mport_alloc_dma(struct file *filp, void __user *arg)
+{
+	return -ENODEV;
+}
+
+static int rio_mport_free_dma(struct file *filp, void __user *arg)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_RAPIDIO_DMA_ENGINE */
+
+/*
+ * Inbound/outbound memory mapping functions
+ */
+
+static int
+rio_mport_create_inbound_mapping(struct mport_dev *md, struct file *filp,
+				u64 raddr, u32 size,
+				struct rio_mport_mapping **mapping)
+{
+	struct rio_mport *mport = md->mport;
+	struct rio_mport_mapping *map;
+	int ret;
+
+	map = kzalloc(sizeof(struct rio_mport_mapping), GFP_KERNEL);
+	if (map == NULL)
+		return -ENOMEM;
+
+	map->virt_addr = dma_alloc_coherent(mport->dev.parent, size,
+					    &map->phys_addr, GFP_KERNEL);
+	if (map->virt_addr == NULL) {
+		ret = -ENOMEM;
+		goto err_dma_alloc;
+	}
+
+	if (raddr == RIO_MAP_ANY_ADDR)
+		raddr = map->phys_addr;
+	ret = rio_map_inb_region(mport, map->phys_addr, raddr, size, 0);
+	if (ret < 0)
+		goto err_map_inb;
+
+	map->dir = MAP_INBOUND;
+	map->rio_addr = raddr;
+	map->size = size;
+	map->filp = filp;
+	map->md = md;
+	kref_init(&map->ref);
+	mutex_lock(&md->buf_mutex);
+	list_add_tail(&map->node, &md->mappings);
+	mutex_unlock(&md->buf_mutex);
+	*mapping = map;
+	return 0;
+
+err_map_inb:
+	dma_free_coherent(mport->dev.parent, size,
+			  map->virt_addr, map->phys_addr);
+err_dma_alloc:
+	kfree(map);
+	return ret;
+}
+
+static int
+rio_mport_get_inbound_mapping(struct mport_dev *md, struct file *filp,
+			      u64 raddr, u32 size,
+			      struct rio_mport_mapping **mapping)
+{
+	struct rio_mport_mapping *map;
+	int err = -ENOMEM;
+
+	if (raddr == RIO_MAP_ANY_ADDR)
+		goto get_new;
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry(map, &md->mappings, node) {
+		if (map->dir != MAP_INBOUND)
+			continue;
+		if (raddr == map->rio_addr && size == map->size) {
+			/* allow exact match only */
+			*mapping = map;
+			err = 0;
+			break;
+		} else if (raddr < (map->rio_addr + map->size - 1) &&
+			   (raddr + size) > map->rio_addr) {
+			err = -EBUSY;
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	if (err != -ENOMEM)
+		return err;
+get_new:
+	/* not found, create new */
+	return rio_mport_create_inbound_mapping(md, filp, raddr, size, mapping);
+}
+
+static int rio_mport_map_inbound(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	struct rio_mmap map;
+	struct rio_mport_mapping *mapping = NULL;
+	int ret;
+
+	if (!md->mport->ops->map_inb)
+		return -EPROTONOSUPPORT;
+	if (unlikely(copy_from_user(&map, arg, sizeof(struct rio_mmap))))
+		return -EFAULT;
+
+	rmcd_debug(IBW, "%s filp=%p", dev_name(&priv->md->dev), filp);
+
+	ret = rio_mport_get_inbound_mapping(md, filp, map.rio_addr,
+					    map.length, &mapping);
+	if (ret)
+		return ret;
+
+	map.handle = mapping->phys_addr;
+	map.rio_addr = mapping->rio_addr;
+
+	if (unlikely(copy_to_user(arg, &map, sizeof(struct rio_mmap)))) {
+		/* Delete mapping if it was created by this request */
+		if (ret == 0 && mapping->filp == filp) {
+			mutex_lock(&md->buf_mutex);
+			kref_put(&mapping->ref, mport_release_mapping);
+			mutex_unlock(&md->buf_mutex);
+		}
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/*
+ * rio_mport_inbound_free() - unmap from RapidIO address space and free
+ *                    previously allocated inbound DMA coherent buffer
+ * @priv: driver private data
+ * @arg:  buffer handle returned by allocation routine
+ */
+static int rio_mport_inbound_free(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	u64 handle;
+	struct rio_mport_mapping *map, *_map;
+
+	rmcd_debug(IBW, "%s filp=%p", dev_name(&priv->md->dev), filp);
+
+	if (!md->mport->ops->unmap_inb)
+		return -EPROTONOSUPPORT;
+
+	if (copy_from_user(&handle, arg, sizeof(u64)))
+		return -EFAULT;
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry_safe(map, _map, &md->mappings, node) {
+		if (map->dir == MAP_INBOUND && map->phys_addr == handle) {
+			if (map->filp == filp) {
+				map->filp = NULL;
+				kref_put(&map->ref, mport_release_mapping);
+			}
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	return 0;
+}
+
+/*
+ * maint_port_idx_get() - Get the port index of the mport instance
+ * @priv: driver private data
+ * @arg:  port index
+ */
+static int maint_port_idx_get(struct mport_cdev_priv *priv, void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	uint32_t port_idx = md->mport->index;
+
+	rmcd_debug(MPORT, "port_index=%d", port_idx);
+
+	if (copy_to_user(arg, &port_idx, sizeof(port_idx)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int rio_mport_add_event(struct mport_cdev_priv *priv,
+			       struct rio_event *event)
+{
+	int overflow;
+
+	if (!(priv->event_mask & event->header))
+		return -EACCES;
+
+	spin_lock(&priv->fifo_lock);
+	overflow = kfifo_avail(&priv->event_fifo) < sizeof(*event)
+		|| kfifo_in(&priv->event_fifo, (unsigned char *)event,
+			sizeof(*event)) != sizeof(*event);
+	spin_unlock(&priv->fifo_lock);
+
+	wake_up_interruptible(&priv->event_rx_wait);
+
+	if (overflow) {
+		dev_warn(&priv->md->dev, DRV_NAME ": event fifo overflow\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static void rio_mport_doorbell_handler(struct rio_mport *mport, void *dev_id,
+				       u16 src, u16 dst, u16 info)
+{
+	struct mport_dev *data = dev_id;
+	struct mport_cdev_priv *priv;
+	struct rio_mport_db_filter *db_filter;
+	struct rio_event event;
+	int handled;
+
+	event.header = RIO_DOORBELL;
+	event.u.doorbell.rioid = src;
+	event.u.doorbell.payload = info;
+
+	handled = 0;
+	spin_lock(&data->db_lock);
+	list_for_each_entry(db_filter, &data->doorbells, data_node) {
+		if (((db_filter->filter.rioid == 0xffffffff ||
+		      db_filter->filter.rioid == src)) &&
+		      info >= db_filter->filter.low &&
+		      info <= db_filter->filter.high) {
+			priv = db_filter->priv;
+			rio_mport_add_event(priv, &event);
+			handled = 1;
+		}
+	}
+	spin_unlock(&data->db_lock);
+
+	if (!handled)
+		dev_warn(&data->dev,
+			"%s: spurious DB received from 0x%x, info=0x%04x\n",
+			__func__, src, info);
+}
+
+static int rio_mport_add_db_filter(struct mport_cdev_priv *priv,
+				   void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	struct rio_mport_db_filter *db_filter;
+	struct rio_doorbell_filter filter;
+	unsigned long flags;
+	int ret;
+
+	if (copy_from_user(&filter, arg, sizeof(filter)))
+		return -EFAULT;
+
+	if (filter.low > filter.high)
+		return -EINVAL;
+
+	ret = rio_request_inb_dbell(md->mport, md, filter.low, filter.high,
+				    rio_mport_doorbell_handler);
+	if (ret) {
+		rmcd_error("%s failed to register IBDB, err=%d",
+			   dev_name(&md->dev), ret);
+		return ret;
+	}
+
+	db_filter = kzalloc(sizeof(*db_filter), GFP_KERNEL);
+	if (db_filter == NULL) {
+		rio_release_inb_dbell(md->mport, filter.low, filter.high);
+		return -ENOMEM;
+	}
+
+	db_filter->filter = filter;
+	db_filter->priv = priv;
+	spin_lock_irqsave(&md->db_lock, flags);
+	list_add_tail(&db_filter->priv_node, &priv->db_filters);
+	list_add_tail(&db_filter->data_node, &md->doorbells);
+	spin_unlock_irqrestore(&md->db_lock, flags);
+
+	return 0;
+}
+
+static void rio_mport_delete_db_filter(struct rio_mport_db_filter *db_filter)
+{
+	list_del(&db_filter->data_node);
+	list_del(&db_filter->priv_node);
+	kfree(db_filter);
+}
+
+static int rio_mport_remove_db_filter(struct mport_cdev_priv *priv,
+				      void __user *arg)
+{
+	struct rio_mport_db_filter *db_filter;
+	struct rio_doorbell_filter filter;
+	unsigned long flags;
+	int ret = -EINVAL;
+
+	if (copy_from_user(&filter, arg, sizeof(filter)))
+		return -EFAULT;
+
+	spin_lock_irqsave(&priv->md->db_lock, flags);
+	list_for_each_entry(db_filter, &priv->db_filters, priv_node) {
+		if (db_filter->filter.rioid == filter.rioid &&
+		    db_filter->filter.low == filter.low &&
+		    db_filter->filter.high == filter.high) {
+			rio_mport_delete_db_filter(db_filter);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&priv->md->db_lock, flags);
+
+	if (!ret)
+		rio_release_inb_dbell(priv->md->mport, filter.low, filter.high);
+
+	return ret;
+}
+
+static int rio_mport_match_pw(union rio_pw_msg *msg,
+			      struct rio_pw_filter *filter)
+{
+	if ((msg->em.comptag & filter->mask) < filter->low ||
+		(msg->em.comptag & filter->mask) > filter->high)
+		return 0;
+	return 1;
+}
+
+static int rio_mport_pw_handler(struct rio_mport *mport, void *context,
+				union rio_pw_msg *msg, int step)
+{
+	struct mport_dev *md = context;
+	struct mport_cdev_priv *priv;
+	struct rio_mport_pw_filter *pw_filter;
+	struct rio_event event;
+	int handled;
+
+	event.header = RIO_PORTWRITE;
+	memcpy(event.u.portwrite.payload, msg->raw, RIO_PW_MSG_SIZE);
+
+	handled = 0;
+	spin_lock(&md->pw_lock);
+	list_for_each_entry(pw_filter, &md->portwrites, md_node) {
+		if (rio_mport_match_pw(msg, &pw_filter->filter)) {
+			priv = pw_filter->priv;
+			rio_mport_add_event(priv, &event);
+			handled = 1;
+		}
+	}
+	spin_unlock(&md->pw_lock);
+
+	if (!handled) {
+		printk_ratelimited(KERN_WARNING DRV_NAME
+			": mport%d received spurious PW from 0x%08x\n",
+			mport->id, msg->em.comptag);
+	}
+
+	return 0;
+}
+
+static int rio_mport_add_pw_filter(struct mport_cdev_priv *priv,
+				   void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	struct rio_mport_pw_filter *pw_filter;
+	struct rio_pw_filter filter;
+	unsigned long flags;
+	int hadd = 0;
+
+	if (copy_from_user(&filter, arg, sizeof(filter)))
+		return -EFAULT;
+
+	pw_filter = kzalloc(sizeof(*pw_filter), GFP_KERNEL);
+	if (pw_filter == NULL)
+		return -ENOMEM;
+
+	pw_filter->filter = filter;
+	pw_filter->priv = priv;
+	spin_lock_irqsave(&md->pw_lock, flags);
+	if (list_empty(&md->portwrites))
+		hadd = 1;
+	list_add_tail(&pw_filter->priv_node, &priv->pw_filters);
+	list_add_tail(&pw_filter->md_node, &md->portwrites);
+	spin_unlock_irqrestore(&md->pw_lock, flags);
+
+	if (hadd) {
+		int ret;
+
+		ret = rio_add_mport_pw_handler(md->mport, md,
+					       rio_mport_pw_handler);
+		if (ret) {
+			dev_err(&md->dev,
+				"%s: failed to add IB_PW handler, err=%d\n",
+				__func__, ret);
+			return ret;
+		}
+		rio_pw_enable(md->mport, 1);
+	}
+
+	return 0;
+}
+
+static void rio_mport_delete_pw_filter(struct rio_mport_pw_filter *pw_filter)
+{
+	list_del(&pw_filter->md_node);
+	list_del(&pw_filter->priv_node);
+	kfree(pw_filter);
+}
+
+static int rio_mport_match_pw_filter(struct rio_pw_filter *a,
+				     struct rio_pw_filter *b)
+{
+	if ((a->mask == b->mask) && (a->low == b->low) && (a->high == b->high))
+		return 1;
+	return 0;
+}
+
+static int rio_mport_remove_pw_filter(struct mport_cdev_priv *priv,
+				      void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	struct rio_mport_pw_filter *pw_filter;
+	struct rio_pw_filter filter;
+	unsigned long flags;
+	int ret = -EINVAL;
+	int hdel = 0;
+
+	if (copy_from_user(&filter, arg, sizeof(filter)))
+		return -EFAULT;
+
+	spin_lock_irqsave(&md->pw_lock, flags);
+	list_for_each_entry(pw_filter, &priv->pw_filters, priv_node) {
+		if (rio_mport_match_pw_filter(&pw_filter->filter, &filter)) {
+			rio_mport_delete_pw_filter(pw_filter);
+			ret = 0;
+			break;
+		}
+	}
+
+	if (list_empty(&md->portwrites))
+		hdel = 1;
+	spin_unlock_irqrestore(&md->pw_lock, flags);
+
+	if (hdel) {
+		rio_del_mport_pw_handler(md->mport, priv->md,
+					 rio_mport_pw_handler);
+		rio_pw_enable(md->mport, 0);
+	}
+
+	return ret;
+}
+
+/*
+ * rio_release_dev - release routine for kernel RIO device object
+ * @dev: kernel device object associated with a RIO device structure
+ *
+ * Frees a RIO device struct associated a RIO device struct.
+ * The RIO device struct is freed.
+ */
+static void rio_release_dev(struct device *dev)
+{
+	struct rio_dev *rdev;
+
+	rdev = to_rio_dev(dev);
+	pr_info(DRV_PREFIX "%s: %s\n", __func__, rio_name(rdev));
+	kfree(rdev);
+}
+
+
+static void rio_release_net(struct device *dev)
+{
+	struct rio_net *net;
+
+	net = to_rio_net(dev);
+	rmcd_debug(RDEV, "net_%d", net->id);
+	kfree(net);
+}
+
+
+/*
+ * rio_mport_add_riodev - creates a kernel RIO device object
+ *
+ * Allocates a RIO device data structure and initializes required fields based
+ * on device's configuration space contents.
+ * If the device has switch capabilities, then a switch specific portion is
+ * allocated and configured.
+ */
+static int rio_mport_add_riodev(struct mport_cdev_priv *priv,
+				   void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	struct rio_rdev_info dev_info;
+	struct rio_dev *rdev;
+	struct rio_switch *rswitch = NULL;
+	struct rio_mport *mport;
+	size_t size;
+	u32 rval;
+	u32 swpinfo = 0;
+	u16 destid;
+	u8 hopcount;
+	int err;
+
+	if (copy_from_user(&dev_info, arg, sizeof(dev_info)))
+		return -EFAULT;
+
+	rmcd_debug(RDEV, "name:%s ct:0x%x did:0x%x hc:0x%x", dev_info.name,
+		   dev_info.comptag, dev_info.destid, dev_info.hopcount);
+
+	if (bus_find_device_by_name(&rio_bus_type, NULL, dev_info.name)) {
+		rmcd_debug(RDEV, "device %s already exists", dev_info.name);
+		return -EEXIST;
+	}
+
+	size = sizeof(struct rio_dev);
+	mport = md->mport;
+	destid = (u16)dev_info.destid;
+	hopcount = (u8)dev_info.hopcount;
+
+	if (rio_mport_read_config_32(mport, destid, hopcount,
+				     RIO_PEF_CAR, &rval))
+		return -EIO;
+
+	if (rval & RIO_PEF_SWITCH) {
+		rio_mport_read_config_32(mport, destid, hopcount,
+					 RIO_SWP_INFO_CAR, &swpinfo);
+		size += (RIO_GET_TOTAL_PORTS(swpinfo) *
+			 sizeof(rswitch->nextdev[0])) + sizeof(*rswitch);
+	}
+
+	rdev = kzalloc(size, GFP_KERNEL);
+	if (rdev == NULL)
+		return -ENOMEM;
+
+	if (mport->net == NULL) {
+		struct rio_net *net;
+
+		net = rio_alloc_net(mport);
+		if (!net) {
+			err = -ENOMEM;
+			rmcd_debug(RDEV, "failed to allocate net object");
+			goto cleanup;
+		}
+
+		net->id = mport->id;
+		net->hport = mport;
+		dev_set_name(&net->dev, "rnet_%d", net->id);
+		net->dev.parent = &mport->dev;
+		net->dev.release = rio_release_net;
+		err = rio_add_net(net);
+		if (err) {
+			rmcd_debug(RDEV, "failed to register net, err=%d", err);
+			kfree(net);
+			goto cleanup;
+		}
+	}
+
+	rdev->net = mport->net;
+	rdev->pef = rval;
+	rdev->swpinfo = swpinfo;
+	rio_mport_read_config_32(mport, destid, hopcount,
+				 RIO_DEV_ID_CAR, &rval);
+	rdev->did = rval >> 16;
+	rdev->vid = rval & 0xffff;
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_DEV_INFO_CAR,
+				 &rdev->device_rev);
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_ASM_ID_CAR,
+				 &rval);
+	rdev->asm_did = rval >> 16;
+	rdev->asm_vid = rval & 0xffff;
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_ASM_INFO_CAR,
+				 &rval);
+	rdev->asm_rev = rval >> 16;
+
+	if (rdev->pef & RIO_PEF_EXT_FEATURES) {
+		rdev->efptr = rval & 0xffff;
+		rdev->phys_efptr = rio_mport_get_physefb(mport, 0, destid,
+							 hopcount);
+
+		rdev->em_efptr = rio_mport_get_feature(mport, 0, destid,
+						hopcount, RIO_EFB_ERR_MGMNT);
+	}
+
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_SRC_OPS_CAR,
+				 &rdev->src_ops);
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_DST_OPS_CAR,
+				 &rdev->dst_ops);
+
+	rdev->comp_tag = dev_info.comptag;
+	rdev->destid = destid;
+	/* hopcount is stored as specified by a caller, regardles of EP or SW */
+	rdev->hopcount = hopcount;
+
+	if (rdev->pef & RIO_PEF_SWITCH) {
+		rswitch = rdev->rswitch;
+		rswitch->route_table = NULL;
+	}
+
+	if (strlen(dev_info.name))
+		dev_set_name(&rdev->dev, "%s", dev_info.name);
+	else if (rdev->pef & RIO_PEF_SWITCH)
+		dev_set_name(&rdev->dev, "%02x:s:%04x", mport->id,
+			     rdev->comp_tag & RIO_CTAG_UDEVID);
+	else
+		dev_set_name(&rdev->dev, "%02x:e:%04x", mport->id,
+			     rdev->comp_tag & RIO_CTAG_UDEVID);
+
+	INIT_LIST_HEAD(&rdev->net_list);
+	rdev->dev.parent = &mport->net->dev;
+	rio_attach_device(rdev);
+	rdev->dev.release = rio_release_dev;
+
+	if (rdev->dst_ops & RIO_DST_OPS_DOORBELL)
+		rio_init_dbell_res(&rdev->riores[RIO_DOORBELL_RESOURCE],
+				   0, 0xffff);
+	err = rio_add_device(rdev);
+	if (err)
+		goto cleanup;
+	rio_dev_get(rdev);
+
+	return 0;
+cleanup:
+	kfree(rdev);
+	return err;
+}
+
+static int rio_mport_del_riodev(struct mport_cdev_priv *priv, void __user *arg)
+{
+	struct rio_rdev_info dev_info;
+	struct rio_dev *rdev = NULL;
+	struct device  *dev;
+	struct rio_mport *mport;
+	struct rio_net *net;
+
+	if (copy_from_user(&dev_info, arg, sizeof(dev_info)))
+		return -EFAULT;
+
+	mport = priv->md->mport;
+
+	/* If device name is specified, removal by name has priority */
+	if (strlen(dev_info.name)) {
+		dev = bus_find_device_by_name(&rio_bus_type, NULL,
+					      dev_info.name);
+		if (dev)
+			rdev = to_rio_dev(dev);
+	} else {
+		do {
+			rdev = rio_get_comptag(dev_info.comptag, rdev);
+			if (rdev && rdev->dev.parent == &mport->net->dev &&
+			    rdev->destid == (u16)dev_info.destid &&
+			    rdev->hopcount == (u8)dev_info.hopcount)
+				break;
+		} while (rdev);
+	}
+
+	if (!rdev) {
+		rmcd_debug(RDEV,
+			"device name:%s ct:0x%x did:0x%x hc:0x%x not found",
+			dev_info.name, dev_info.comptag, dev_info.destid,
+			dev_info.hopcount);
+		return -ENODEV;
+	}
+
+	net = rdev->net;
+	rio_dev_put(rdev);
+	rio_del_device(rdev, RIO_DEVICE_SHUTDOWN);
+
+	if (list_empty(&net->devices)) {
+		rio_free_net(net);
+		mport->net = NULL;
+	}
+
+	return 0;
+}
+
+/*
+ * Mport cdev management
+ */
+
+/*
+ * mport_cdev_open() - Open character device (mport)
+ */
+static int mport_cdev_open(struct inode *inode, struct file *filp)
+{
+	int ret;
+	int minor = iminor(inode);
+	struct mport_dev *chdev;
+	struct mport_cdev_priv *priv;
+
+	/* Test for valid device */
+	if (minor >= RIO_MAX_MPORTS) {
+		rmcd_error("Invalid minor device number");
+		return -EINVAL;
+	}
+
+	chdev = container_of(inode->i_cdev, struct mport_dev, cdev);
+
+	rmcd_debug(INIT, "%s filp=%p", dev_name(&chdev->dev), filp);
+
+	if (atomic_read(&chdev->active) == 0)
+		return -ENODEV;
+
+	get_device(&chdev->dev);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		put_device(&chdev->dev);
+		return -ENOMEM;
+	}
+
+	priv->md = chdev;
+
+	mutex_lock(&chdev->file_mutex);
+	list_add_tail(&priv->list, &chdev->file_list);
+	mutex_unlock(&chdev->file_mutex);
+
+	INIT_LIST_HEAD(&priv->db_filters);
+	INIT_LIST_HEAD(&priv->pw_filters);
+	spin_lock_init(&priv->fifo_lock);
+	init_waitqueue_head(&priv->event_rx_wait);
+	ret = kfifo_alloc(&priv->event_fifo,
+			  sizeof(struct rio_event) * MPORT_EVENT_DEPTH,
+			  GFP_KERNEL);
+	if (ret < 0) {
+		dev_err(&chdev->dev, DRV_NAME ": kfifo_alloc failed\n");
+		ret = -ENOMEM;
+		goto err_fifo;
+	}
+
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	INIT_LIST_HEAD(&priv->async_list);
+	INIT_LIST_HEAD(&priv->pend_list);
+	spin_lock_init(&priv->req_lock);
+	mutex_init(&priv->dma_lock);
+#endif
+
+	filp->private_data = priv;
+	goto out;
+err_fifo:
+	kfree(priv);
+out:
+	return ret;
+}
+
+static int mport_cdev_fasync(int fd, struct file *filp, int mode)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+
+	return fasync_helper(fd, filp, mode, &priv->async_queue);
+}
+
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+static void mport_cdev_release_dma(struct file *filp)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md;
+	struct mport_dma_req *req, *req_next;
+	unsigned long tmo = msecs_to_jiffies(dma_timeout);
+	long wret;
+	LIST_HEAD(list);
+
+	rmcd_debug(EXIT, "from filp=%p %s(%d)",
+		   filp, current->comm, task_pid_nr(current));
+
+	if (!priv->dmach) {
+		rmcd_debug(EXIT, "No DMA channel for filp=%p", filp);
+		return;
+	}
+
+	md = priv->md;
+
+	flush_workqueue(dma_wq);
+
+	spin_lock(&priv->req_lock);
+	if (!list_empty(&priv->async_list)) {
+		rmcd_debug(EXIT, "async list not empty filp=%p %s(%d)",
+			   filp, current->comm, task_pid_nr(current));
+		list_splice_init(&priv->async_list, &list);
+	}
+	spin_unlock(&priv->req_lock);
+
+	if (!list_empty(&list)) {
+		rmcd_debug(EXIT, "temp list not empty");
+		list_for_each_entry_safe(req, req_next, &list, node) {
+			rmcd_debug(EXIT, "free req->filp=%p cookie=%d compl=%s",
+				   req->filp, req->cookie,
+				   completion_done(&req->req_comp)?"yes":"no");
+			list_del(&req->node);
+			dma_req_free(req);
+		}
+	}
+
+	if (!list_empty(&priv->pend_list)) {
+		rmcd_debug(EXIT, "Free pending DMA requests for filp=%p %s(%d)",
+			   filp, current->comm, task_pid_nr(current));
+		list_for_each_entry_safe(req,
+					 req_next, &priv->pend_list, node) {
+			rmcd_debug(EXIT, "free req->filp=%p cookie=%d compl=%s",
+				   req->filp, req->cookie,
+				   completion_done(&req->req_comp)?"yes":"no");
+			list_del(&req->node);
+			dma_req_free(req);
+		}
+	}
+
+	put_dma_channel(priv);
+	wret = wait_for_completion_interruptible_timeout(&priv->comp, tmo);
+
+	if (wret <= 0) {
+		rmcd_error("%s(%d) failed waiting for DMA release err=%ld",
+			current->comm, task_pid_nr(current), wret);
+	}
+
+	spin_lock(&priv->req_lock);
+
+	if (!list_empty(&priv->pend_list)) {
+		rmcd_debug(EXIT, "ATTN: pending DMA requests, filp=%p %s(%d)",
+			   filp, current->comm, task_pid_nr(current));
+	}
+
+	spin_unlock(&priv->req_lock);
+
+	if (priv->dmach != priv->md->dma_chan) {
+		rmcd_debug(EXIT, "Release DMA channel for filp=%p %s(%d)",
+			   filp, current->comm, task_pid_nr(current));
+		rio_release_dma(priv->dmach);
+	} else {
+		rmcd_debug(EXIT, "Adjust default DMA channel refcount");
+		kref_put(&md->dma_ref, mport_release_def_dma);
+	}
+
+	priv->dmach = NULL;
+}
+#else
+#define mport_cdev_release_dma(priv) do {} while (0)
+#endif
+
+/*
+ * mport_cdev_release() - Release character device
+ */
+static int mport_cdev_release(struct inode *inode, struct file *filp)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *chdev;
+	struct rio_mport_pw_filter *pw_filter, *pw_filter_next;
+	struct rio_mport_db_filter *db_filter, *db_filter_next;
+	struct rio_mport_mapping *map, *_map;
+	unsigned long flags;
+
+	rmcd_debug(EXIT, "%s filp=%p", dev_name(&priv->md->dev), filp);
+
+	chdev = priv->md;
+	mport_cdev_release_dma(filp);
+
+	priv->event_mask = 0;
+
+	spin_lock_irqsave(&chdev->pw_lock, flags);
+	if (!list_empty(&priv->pw_filters)) {
+		list_for_each_entry_safe(pw_filter, pw_filter_next,
+					 &priv->pw_filters, priv_node)
+			rio_mport_delete_pw_filter(pw_filter);
+	}
+	spin_unlock_irqrestore(&chdev->pw_lock, flags);
+
+	spin_lock_irqsave(&chdev->db_lock, flags);
+	list_for_each_entry_safe(db_filter, db_filter_next,
+				 &priv->db_filters, priv_node) {
+		rio_mport_delete_db_filter(db_filter);
+	}
+	spin_unlock_irqrestore(&chdev->db_lock, flags);
+
+	kfifo_free(&priv->event_fifo);
+
+	mutex_lock(&chdev->buf_mutex);
+	list_for_each_entry_safe(map, _map, &chdev->mappings, node) {
+		if (map->filp == filp) {
+			rmcd_debug(EXIT, "release mapping %p filp=%p",
+				   map->virt_addr, filp);
+			kref_put(&map->ref, mport_release_mapping);
+		}
+	}
+	mutex_unlock(&chdev->buf_mutex);
+
+	mport_cdev_fasync(-1, filp, 0);
+	filp->private_data = NULL;
+	mutex_lock(&chdev->file_mutex);
+	list_del(&priv->list);
+	mutex_unlock(&chdev->file_mutex);
+	put_device(&chdev->dev);
+	kfree(priv);
+	return 0;
+}
+
+/*
+ * mport_cdev_ioctl() - IOCTLs for character device
+ */
+static long mport_cdev_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	int err = -EINVAL;
+	struct mport_cdev_priv *data = filp->private_data;
+	struct mport_dev *md = data->md;
+
+	if (atomic_read(&md->active) == 0)
+		return -ENODEV;
+
+	switch (cmd) {
+	case RIO_MPORT_MAINT_READ_LOCAL:
+		return rio_mport_maint_rd(data, (void __user *)arg, 1);
+	case RIO_MPORT_MAINT_WRITE_LOCAL:
+		return rio_mport_maint_wr(data, (void __user *)arg, 1);
+	case RIO_MPORT_MAINT_READ_REMOTE:
+		return rio_mport_maint_rd(data, (void __user *)arg, 0);
+	case RIO_MPORT_MAINT_WRITE_REMOTE:
+		return rio_mport_maint_wr(data, (void __user *)arg, 0);
+	case RIO_MPORT_MAINT_HDID_SET:
+		return maint_hdid_set(data, (void __user *)arg);
+	case RIO_MPORT_MAINT_COMPTAG_SET:
+		return maint_comptag_set(data, (void __user *)arg);
+	case RIO_MPORT_MAINT_PORT_IDX_GET:
+		return maint_port_idx_get(data, (void __user *)arg);
+	case RIO_MPORT_GET_PROPERTIES:
+		md->properties.hdid = md->mport->host_deviceid;
+		if (copy_to_user((void __user *)arg, &(data->md->properties),
+				 sizeof(data->md->properties)))
+			return -EFAULT;
+		return 0;
+	case RIO_ENABLE_DOORBELL_RANGE:
+		return rio_mport_add_db_filter(data, (void __user *)arg);
+	case RIO_DISABLE_DOORBELL_RANGE:
+		return rio_mport_remove_db_filter(data, (void __user *)arg);
+	case RIO_ENABLE_PORTWRITE_RANGE:
+		return rio_mport_add_pw_filter(data, (void __user *)arg);
+	case RIO_DISABLE_PORTWRITE_RANGE:
+		return rio_mport_remove_pw_filter(data, (void __user *)arg);
+	case RIO_SET_EVENT_MASK:
+		data->event_mask = arg;
+		return 0;
+	case RIO_GET_EVENT_MASK:
+		if (copy_to_user((void __user *)arg, &data->event_mask,
+				    sizeof(data->event_mask)))
+			return -EFAULT;
+		return 0;
+	case RIO_MAP_OUTBOUND:
+		return rio_mport_obw_map(filp, (void __user *)arg);
+	case RIO_MAP_INBOUND:
+		return rio_mport_map_inbound(filp, (void __user *)arg);
+	case RIO_UNMAP_OUTBOUND:
+		return rio_mport_obw_free(filp, (void __user *)arg);
+	case RIO_UNMAP_INBOUND:
+		return rio_mport_inbound_free(filp, (void __user *)arg);
+	case RIO_ALLOC_DMA:
+		return rio_mport_alloc_dma(filp, (void __user *)arg);
+	case RIO_FREE_DMA:
+		return rio_mport_free_dma(filp, (void __user *)arg);
+	case RIO_WAIT_FOR_ASYNC:
+		return rio_mport_wait_for_async_dma(filp, (void __user *)arg);
+	case RIO_TRANSFER:
+		return rio_mport_transfer_ioctl(filp, (void __user *)arg);
+	case RIO_DEV_ADD:
+		return rio_mport_add_riodev(data, (void __user *)arg);
+	case RIO_DEV_DEL:
+		return rio_mport_del_riodev(data, (void __user *)arg);
+	default:
+		break;
+	}
+
+	return err;
+}
+
+/*
+ * mport_release_mapping - free mapping resources and info structure
+ * @ref: a pointer to the kref within struct rio_mport_mapping
+ *
+ * NOTE: Shall be called while holding buf_mutex.
+ */
+static void mport_release_mapping(struct kref *ref)
+{
+	struct rio_mport_mapping *map =
+			container_of(ref, struct rio_mport_mapping, ref);
+	struct rio_mport *mport = map->md->mport;
+
+	rmcd_debug(MMAP, "type %d mapping @ %p (phys = %pad) for %s",
+		   map->dir, map->virt_addr,
+		   &map->phys_addr, mport->name);
+
+	list_del(&map->node);
+
+	switch (map->dir) {
+	case MAP_INBOUND:
+		rio_unmap_inb_region(mport, map->phys_addr);
+	case MAP_DMA:
+		dma_free_coherent(mport->dev.parent, map->size,
+				  map->virt_addr, map->phys_addr);
+		break;
+	case MAP_OUTBOUND:
+		rio_unmap_outb_region(mport, map->rioid, map->rio_addr);
+		break;
+	}
+	kfree(map);
+}
+
+static void mport_mm_open(struct vm_area_struct *vma)
+{
+	struct rio_mport_mapping *map = vma->vm_private_data;
+
+rmcd_debug(MMAP, "0x%pad", &map->phys_addr);
+	kref_get(&map->ref);
+}
+
+static void mport_mm_close(struct vm_area_struct *vma)
+{
+	struct rio_mport_mapping *map = vma->vm_private_data;
+
+rmcd_debug(MMAP, "0x%pad", &map->phys_addr);
+	mutex_lock(&map->md->buf_mutex);
+	kref_put(&map->ref, mport_release_mapping);
+	mutex_unlock(&map->md->buf_mutex);
+}
+
+static const struct vm_operations_struct vm_ops = {
+	.open =	mport_mm_open,
+	.close = mport_mm_close,
+};
+
+static int mport_cdev_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md;
+	size_t size = vma->vm_end - vma->vm_start;
+	dma_addr_t baddr;
+	unsigned long offset;
+	int found = 0, ret;
+	struct rio_mport_mapping *map;
+
+	rmcd_debug(MMAP, "0x%x bytes at offset 0x%lx",
+		   (unsigned int)size, vma->vm_pgoff);
+
+	md = priv->md;
+	baddr = ((dma_addr_t)vma->vm_pgoff << PAGE_SHIFT);
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry(map, &md->mappings, node) {
+		if (baddr >= map->phys_addr &&
+		    baddr < (map->phys_addr + map->size)) {
+			found = 1;
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	if (!found)
+		return -ENOMEM;
+
+	offset = baddr - map->phys_addr;
+
+	if (size + offset > map->size)
+		return -EINVAL;
+
+	vma->vm_pgoff = offset >> PAGE_SHIFT;
+	rmcd_debug(MMAP, "MMAP adjusted offset = 0x%lx", vma->vm_pgoff);
+
+	if (map->dir == MAP_INBOUND || map->dir == MAP_DMA)
+		ret = dma_mmap_coherent(md->mport->dev.parent, vma,
+				map->virt_addr, map->phys_addr, map->size);
+	else if (map->dir == MAP_OUTBOUND) {
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		ret = vm_iomap_memory(vma, map->phys_addr, map->size);
+	} else {
+		rmcd_error("Attempt to mmap unsupported mapping type");
+		ret = -EIO;
+	}
+
+	if (!ret) {
+		vma->vm_private_data = map;
+		vma->vm_ops = &vm_ops;
+		mport_mm_open(vma);
+	} else {
+		rmcd_error("MMAP exit with err=%d", ret);
+	}
+
+	return ret;
+}
+
+static unsigned int mport_cdev_poll(struct file *filp, poll_table *wait)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+
+	poll_wait(filp, &priv->event_rx_wait, wait);
+	if (kfifo_len(&priv->event_fifo))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+static ssize_t mport_read(struct file *filp, char __user *buf, size_t count,
+			loff_t *ppos)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	int copied;
+	ssize_t ret;
+
+	if (!count)
+		return 0;
+
+	if (kfifo_is_empty(&priv->event_fifo) &&
+	    (filp->f_flags & O_NONBLOCK))
+		return -EAGAIN;
+
+	if (count % sizeof(struct rio_event))
+		return -EINVAL;
+
+	ret = wait_event_interruptible(priv->event_rx_wait,
+					kfifo_len(&priv->event_fifo) != 0);
+	if (ret)
+		return ret;
+
+	while (ret < count) {
+		if (kfifo_to_user(&priv->event_fifo, buf,
+		      sizeof(struct rio_event), &copied))
+			return -EFAULT;
+		ret += copied;
+		buf += copied;
+	}
+
+	return ret;
+}
+
+static ssize_t mport_write(struct file *filp, const char __user *buf,
+			 size_t count, loff_t *ppos)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct rio_mport *mport = priv->md->mport;
+	struct rio_event event;
+	int len, ret;
+
+	if (!count)
+		return 0;
+
+	if (count % sizeof(event))
+		return -EINVAL;
+
+	len = 0;
+	while ((count - len) >= (int)sizeof(event)) {
+		if (copy_from_user(&event, buf, sizeof(event)))
+			return -EFAULT;
+
+		if (event.header != RIO_DOORBELL)
+			return -EINVAL;
+
+		ret = rio_mport_send_doorbell(mport,
+					      (u16)event.u.doorbell.rioid,
+					      event.u.doorbell.payload);
+		if (ret < 0)
+			return ret;
+
+		len += sizeof(event);
+		buf += sizeof(event);
+	}
+
+	return len;
+}
+
+static const struct file_operations mport_fops = {
+	.owner		= THIS_MODULE,
+	.open		= mport_cdev_open,
+	.release	= mport_cdev_release,
+	.poll		= mport_cdev_poll,
+	.read		= mport_read,
+	.write		= mport_write,
+	.mmap		= mport_cdev_mmap,
+	.fasync		= mport_cdev_fasync,
+	.unlocked_ioctl = mport_cdev_ioctl
+};
+
+/*
+ * Character device management
+ */
+
+static void mport_device_release(struct device *dev)
+{
+	struct mport_dev *md;
+
+	rmcd_debug(EXIT, "%s", dev_name(dev));
+	md = container_of(dev, struct mport_dev, dev);
+	kfree(md);
+}
+
+/*
+ * mport_cdev_add() - Create mport_dev from rio_mport
+ * @mport:	RapidIO master port
+ */
+static struct mport_dev *mport_cdev_add(struct rio_mport *mport)
+{
+	int ret = 0;
+	struct mport_dev *md;
+	struct rio_mport_attr attr;
+
+	md = kzalloc(sizeof(struct mport_dev), GFP_KERNEL);
+	if (!md) {
+		rmcd_error("Unable allocate a device object");
+		return NULL;
+	}
+
+	md->mport = mport;
+	mutex_init(&md->buf_mutex);
+	mutex_init(&md->file_mutex);
+	INIT_LIST_HEAD(&md->file_list);
+	cdev_init(&md->cdev, &mport_fops);
+	md->cdev.owner = THIS_MODULE;
+	ret = cdev_add(&md->cdev, MKDEV(MAJOR(dev_number), mport->id), 1);
+	if (ret < 0) {
+		kfree(md);
+		rmcd_error("Unable to register a device, err=%d", ret);
+		return NULL;
+	}
+
+	md->dev.devt = md->cdev.dev;
+	md->dev.class = dev_class;
+	md->dev.parent = &mport->dev;
+	md->dev.release = mport_device_release;
+	dev_set_name(&md->dev, DEV_NAME "%d", mport->id);
+	atomic_set(&md->active, 1);
+
+	ret = device_register(&md->dev);
+	if (ret) {
+		rmcd_error("Failed to register mport %d (err=%d)",
+		       mport->id, ret);
+		goto err_cdev;
+	}
+
+	get_device(&md->dev);
+
+	INIT_LIST_HEAD(&md->doorbells);
+	spin_lock_init(&md->db_lock);
+	INIT_LIST_HEAD(&md->portwrites);
+	spin_lock_init(&md->pw_lock);
+	INIT_LIST_HEAD(&md->mappings);
+
+	md->properties.id = mport->id;
+	md->properties.sys_size = mport->sys_size;
+	md->properties.hdid = mport->host_deviceid;
+	md->properties.index = mport->index;
+
+	/* The transfer_mode property will be returned through mport query
+	 * interface
+	 */
+#ifdef CONFIG_PPC /* for now: only on Freescale's SoCs */
+	md->properties.transfer_mode |= RIO_TRANSFER_MODE_MAPPED;
+#else
+	md->properties.transfer_mode |= RIO_TRANSFER_MODE_TRANSFER;
+#endif
+	ret = rio_query_mport(mport, &attr);
+	if (!ret) {
+		md->properties.flags = attr.flags;
+		md->properties.link_speed = attr.link_speed;
+		md->properties.link_width = attr.link_width;
+		md->properties.dma_max_sge = attr.dma_max_sge;
+		md->properties.dma_max_size = attr.dma_max_size;
+		md->properties.dma_align = attr.dma_align;
+		md->properties.cap_sys_size = 0;
+		md->properties.cap_transfer_mode = 0;
+		md->properties.cap_addr_size = 0;
+	} else
+		pr_info(DRV_PREFIX "Failed to obtain info for %s cdev(%d:%d)\n",
+			mport->name, MAJOR(dev_number), mport->id);
+
+	mutex_lock(&mport_devs_lock);
+	list_add_tail(&md->node, &mport_devs);
+	mutex_unlock(&mport_devs_lock);
+
+	pr_info(DRV_PREFIX "Added %s cdev(%d:%d)\n",
+		mport->name, MAJOR(dev_number), mport->id);
+
+	return md;
+
+err_cdev:
+	cdev_del(&md->cdev);
+	kfree(md);
+	return NULL;
+}
+
+/*
+ * mport_cdev_terminate_dma() - Stop all active DMA data transfers and release
+ *                              associated DMA channels.
+ */
+static void mport_cdev_terminate_dma(struct mport_dev *md)
+{
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	struct mport_cdev_priv *client;
+
+	rmcd_debug(DMA, "%s", dev_name(&md->dev));
+
+	mutex_lock(&md->file_mutex);
+	list_for_each_entry(client, &md->file_list, list) {
+		if (client->dmach) {
+			dmaengine_terminate_all(client->dmach);
+			rio_release_dma(client->dmach);
+		}
+	}
+	mutex_unlock(&md->file_mutex);
+
+	if (md->dma_chan) {
+		dmaengine_terminate_all(md->dma_chan);
+		rio_release_dma(md->dma_chan);
+		md->dma_chan = NULL;
+	}
+#endif
+}
+
+
+/*
+ * mport_cdev_kill_fasync() - Send SIGIO signal to all processes with open
+ *                            mport_cdev files.
+ */
+static int mport_cdev_kill_fasync(struct mport_dev *md)
+{
+	unsigned int files = 0;
+	struct mport_cdev_priv *client;
+
+	mutex_lock(&md->file_mutex);
+	list_for_each_entry(client, &md->file_list, list) {
+		if (client->async_queue)
+			kill_fasync(&client->async_queue, SIGIO, POLL_HUP);
+		files++;
+	}
+	mutex_unlock(&md->file_mutex);
+	return files;
+}
+
+/*
+ * mport_cdev_remove() - Remove mport character device
+ * @dev:	Mport device to remove
+ */
+static void mport_cdev_remove(struct mport_dev *md)
+{
+	struct rio_mport_mapping *map, *_map;
+
+	rmcd_debug(EXIT, "Remove %s cdev", md->mport->name);
+	atomic_set(&md->active, 0);
+	mport_cdev_terminate_dma(md);
+	rio_del_mport_pw_handler(md->mport, md, rio_mport_pw_handler);
+	cdev_del(&(md->cdev));
+	mport_cdev_kill_fasync(md);
+
+	flush_workqueue(dma_wq);
+
+	/* TODO: do we need to give clients some time to close file
+	 * descriptors? Simple wait for XX, or kref?
+	 */
+
+	/*
+	 * Release DMA buffers allocated for the mport device.
+	 * Disable associated inbound Rapidio requests mapping if applicable.
+	 */
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry_safe(map, _map, &md->mappings, node) {
+		kref_put(&map->ref, mport_release_mapping);
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	if (!list_empty(&md->mappings))
+		rmcd_warn("WARNING: %s pending mappings on removal",
+			  md->mport->name);
+
+	rio_release_inb_dbell(md->mport, 0, 0x0fff);
+
+	device_unregister(&md->dev);
+	put_device(&md->dev);
+}
+
+/*
+ * RIO rio_mport_interface driver
+ */
+
+/*
+ * mport_add_mport() - Add rio_mport from LDM device struct
+ * @dev:		Linux device model struct
+ * @class_intf:	Linux class_interface
+ */
+static int mport_add_mport(struct device *dev,
+		struct class_interface *class_intf)
+{
+	struct rio_mport *mport = NULL;
+	struct mport_dev *chdev = NULL;
+
+	mport = to_rio_mport(dev);
+	if (!mport)
+		return -ENODEV;
+
+	chdev = mport_cdev_add(mport);
+	if (!chdev)
+		return -ENODEV;
+
+	return 0;
+}
+
+/*
+ * mport_remove_mport() - Remove rio_mport from global list
+ * TODO remove device from global mport_dev list
+ */
+static void mport_remove_mport(struct device *dev,
+		struct class_interface *class_intf)
+{
+	struct rio_mport *mport = NULL;
+	struct mport_dev *chdev;
+	int found = 0;
+
+	mport = to_rio_mport(dev);
+	rmcd_debug(EXIT, "Remove %s", mport->name);
+
+	mutex_lock(&mport_devs_lock);
+	list_for_each_entry(chdev, &mport_devs, node) {
+		if (chdev->mport->id == mport->id) {
+			atomic_set(&chdev->active, 0);
+			list_del(&chdev->node);
+			found = 1;
+			break;
+		}
+	}
+	mutex_unlock(&mport_devs_lock);
+
+	if (found)
+		mport_cdev_remove(chdev);
+}
+
+/* the rio_mport_interface is used to handle local mport devices */
+static struct class_interface rio_mport_interface __refdata = {
+	.class		= &rio_mport_class,
+	.add_dev	= mport_add_mport,
+	.remove_dev	= mport_remove_mport,
+};
+
+/*
+ * Linux kernel module
+ */
+
+/*
+ * mport_init - Driver module loading
+ */
+static int __init mport_init(void)
+{
+	int ret;
+
+	/* Create device class needed by udev */
+	dev_class = class_create(THIS_MODULE, DRV_NAME);
+	if (!dev_class) {
+		rmcd_error("Unable to create " DRV_NAME " class");
+		return -EINVAL;
+	}
+
+	ret = alloc_chrdev_region(&dev_number, 0, RIO_MAX_MPORTS, DRV_NAME);
+	if (ret < 0)
+		goto err_chr;
+
+	rmcd_debug(INIT, "Registered class with major=%d", MAJOR(dev_number));
+
+	/* Register to rio_mport_interface */
+	ret = class_interface_register(&rio_mport_interface);
+	if (ret) {
+		rmcd_error("class_interface_register() failed, err=%d", ret);
+		goto err_cli;
+	}
+
+	dma_wq = create_singlethread_workqueue("dma_wq");
+	if (!dma_wq) {
+		rmcd_error("failed to create DMA work queue");
+		ret = -ENOMEM;
+		goto err_wq;
+	}
+
+	return 0;
+
+err_wq:
+	class_interface_unregister(&rio_mport_interface);
+err_cli:
+	unregister_chrdev_region(dev_number, RIO_MAX_MPORTS);
+err_chr:
+	class_destroy(dev_class);
+	return ret;
+}
+
+/**
+ * mport_exit - Driver module unloading
+ */
+static void __exit mport_exit(void)
+{
+	class_interface_unregister(&rio_mport_interface);
+	class_destroy(dev_class);
+	unregister_chrdev_region(dev_number, RIO_MAX_MPORTS);
+	destroy_workqueue(dma_wq);
+}
+
+module_init(mport_init);
+module_exit(mport_exit);
diff --git a/include/linux/rio_mport_cdev.h b/include/linux/rio_mport_cdev.h
new file mode 100644
index 000000000000..b65d19df76d2
--- /dev/null
+++ b/include/linux/rio_mport_cdev.h
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2015-2016, Integrated Device Technology Inc.
+ * Copyright (c) 2015, Prodrive Technologies
+ * Copyright (c) 2015, Texas Instruments Incorporated
+ * Copyright (c) 2015, RapidIO Trade Association
+ * All rights reserved.
+ *
+ * This software is available to you under a choice of one of two licenses.
+ * You may choose to be licensed under the terms of the GNU General Public
+ * License(GPL) Version 2, or the BSD-3 Clause license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RIO_MPORT_CDEV_H_
+#define _RIO_MPORT_CDEV_H_
+
+#ifndef __user
+#define __user
+#endif
+
+struct rio_mport_maint_io {
+	uint32_t rioid;		/* destID of remote device */
+	uint32_t hopcount;	/* hopcount to remote device */
+	uint32_t offset;	/* offset in register space */
+	size_t length;		/* length in bytes */
+	void __user *buffer;	/* data buffer */
+};
+
+/*
+ * Definitions for RapidIO data transfers:
+ * - memory mapped (MAPPED)
+ * - packet generation from memory (TRANSFER)
+ */
+#define RIO_TRANSFER_MODE_MAPPED	(1 << 0)
+#define RIO_TRANSFER_MODE_TRANSFER	(1 << 1)
+#define RIO_CAP_DBL_SEND		(1 << 2)
+#define RIO_CAP_DBL_RECV		(1 << 3)
+#define RIO_CAP_PW_SEND			(1 << 4)
+#define RIO_CAP_PW_RECV			(1 << 5)
+#define RIO_CAP_MAP_OUTB		(1 << 6)
+#define RIO_CAP_MAP_INB			(1 << 7)
+
+struct rio_mport_properties {
+	uint16_t hdid;
+	uint8_t id;			/* Physical port ID */
+	uint8_t  index;
+	uint32_t flags;
+	uint32_t sys_size;		/* Default addressing size */
+	uint8_t  port_ok;
+	uint8_t  link_speed;
+	uint8_t  link_width;
+	uint32_t dma_max_sge;
+	uint32_t dma_max_size;
+	uint32_t dma_align;
+	uint32_t transfer_mode;		/* Default transfer mode */
+	uint32_t cap_sys_size;		/* Capable system sizes */
+	uint32_t cap_addr_size;		/* Capable addressing sizes */
+	uint32_t cap_transfer_mode;	/* Capable transfer modes */
+	uint32_t cap_mport;		/* Mport capabilities */
+};
+
+/*
+ * Definitions for RapidIO events;
+ * - incoming port-writes
+ * - incoming doorbells
+ */
+#define RIO_DOORBELL	(1 << 0)
+#define RIO_PORTWRITE	(1 << 1)
+
+struct rio_doorbell {
+	uint32_t rioid;
+	uint16_t payload;
+};
+
+struct rio_doorbell_filter {
+	uint32_t rioid;			/* 0xffffffff to match all ids */
+	uint16_t low;
+	uint16_t high;
+};
+
+
+struct rio_portwrite {
+	uint32_t payload[16];
+};
+
+struct rio_pw_filter {
+	uint32_t mask;
+	uint32_t low;
+	uint32_t high;
+};
+
+/* RapidIO base address for inbound requests set to value defined below
+ * indicates that no specific RIO-to-local address translation is requested
+ * and driver should use direct (one-to-one) address mapping.
+*/
+#define RIO_MAP_ANY_ADDR	(uint64_t)(~((uint64_t) 0))
+
+struct rio_mmap {
+	uint32_t rioid;
+	uint64_t rio_addr;
+	uint64_t length;
+	uint64_t handle;
+	void *address;
+};
+
+struct rio_dma_mem {
+	uint64_t length;		/* length of DMA memory */
+	uint64_t dma_handle;		/* handle associated with this memory */
+	void *buffer;			/* pointer to this memory */
+};
+
+
+struct rio_event {
+	unsigned int header;	/* event type RIO_DOORBELL or RIO_PORTWRITE */
+	union {
+		struct rio_doorbell doorbell;	/* header for RIO_DOORBELL */
+		struct rio_portwrite portwrite; /* header for RIO_PORTWRITE */
+	} u;
+};
+
+enum rio_transfer_sync {
+	RIO_TRANSFER_SYNC,	/* synchronous transfer */
+	RIO_TRANSFER_ASYNC,	/* asynchronous transfer */
+	RIO_TRANSFER_FAF,	/* fire-and-forget transfer */
+};
+
+enum rio_transfer_dir {
+	RIO_TRANSFER_DIR_READ,	/* Read operation */
+	RIO_TRANSFER_DIR_WRITE,	/* Write operation */
+};
+
+/*
+ * RapidIO data exchange transactions are lists of individual transfers. Each
+ * transfer exchanges data between two RapidIO devices by remote direct memory
+ * access and has its own completion code.
+ *
+ * The RapidIO specification defines four types of data exchange requests:
+ * NREAD, NWRITE, SWRITE and NWRITE_R. The RapidIO DMA channel interface allows
+ * to specify the required type of write operation or combination of them when
+ * only the last data packet requires response.
+ *
+ * NREAD:    read up to 256 bytes from remote device memory into local memory
+ * NWRITE:   write up to 256 bytes from local memory to remote device memory
+ *           without confirmation
+ * SWRITE:   as NWRITE, but all addresses and payloads must be 64-bit aligned
+ * NWRITE_R: as NWRITE, but expect acknowledgment from remote device.
+ *
+ * The default exchange is chosen from NREAD and any of the WRITE modes as the
+ * driver sees fit. For write requests the user can explicitly choose between
+ * any of the write modes for each transaction.
+ */
+enum rio_exchange {
+	RIO_EXCHANGE_DEFAULT,	/* Default method */
+	RIO_EXCHANGE_NWRITE,	/* All packets using NWRITE */
+	RIO_EXCHANGE_SWRITE,	/* All packets using SWRITE */
+	RIO_EXCHANGE_NWRITE_R,	/* Last packet NWRITE_R, others NWRITE */
+	RIO_EXCHANGE_SWRITE_R,	/* Last packet NWRITE_R, others SWRITE */
+	RIO_EXCHANGE_NWRITE_R_ALL, /* All packets using NWRITE_R */
+};
+
+struct rio_transfer_io {
+	uint32_t rioid;			/* Target destID */
+	uint64_t rio_addr;		/* Address in target's RIO mem space */
+	enum rio_exchange method;	/* Data exchange method */
+	void __user *loc_addr;
+	uint64_t handle;
+	uint64_t offset;		/* Offset in buffer */
+	uint64_t length;		/* Length in bytes */
+	uint32_t completion_code;	/* Completion code for this transfer */
+};
+
+struct rio_transaction {
+	uint32_t transfer_mode;		/* Data transfer mode */
+	enum rio_transfer_sync sync;	/* Synchronization method */
+	enum rio_transfer_dir dir;	/* Transfer direction */
+	size_t count;			/* Number of transfers */
+	struct rio_transfer_io __user *block;	/* Array of <count> transfers */
+};
+
+struct rio_async_tx_wait {
+	uint32_t token;		/* DMA transaction ID token */
+	uint32_t timeout;	/* Wait timeout in msec, if 0 use default TO */
+};
+
+#define RIO_MAX_DEVNAME_SZ	20
+
+struct rio_rdev_info {
+	uint32_t destid;
+	uint8_t hopcount;
+	uint32_t comptag;
+	char name[RIO_MAX_DEVNAME_SZ + 1];
+};
+
+/* Driver IOCTL codes */
+#define RIO_MPORT_DRV_MAGIC           'm'
+
+#define RIO_MPORT_MAINT_HDID_SET	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 1, uint16_t)
+#define RIO_MPORT_MAINT_COMPTAG_SET	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 2, uint32_t)
+#define RIO_MPORT_MAINT_PORT_IDX_GET	\
+	_IOR(RIO_MPORT_DRV_MAGIC, 3, uint32_t)
+#define RIO_MPORT_GET_PROPERTIES \
+	_IOR(RIO_MPORT_DRV_MAGIC, 4, struct rio_mport_properties)
+#define RIO_MPORT_MAINT_READ_LOCAL \
+	_IOR(RIO_MPORT_DRV_MAGIC, 5, struct rio_mport_maint_io)
+#define RIO_MPORT_MAINT_WRITE_LOCAL \
+	_IOW(RIO_MPORT_DRV_MAGIC, 6, struct rio_mport_maint_io)
+#define RIO_MPORT_MAINT_READ_REMOTE \
+	_IOR(RIO_MPORT_DRV_MAGIC, 7, struct rio_mport_maint_io)
+#define RIO_MPORT_MAINT_WRITE_REMOTE \
+	_IOW(RIO_MPORT_DRV_MAGIC, 8, struct rio_mport_maint_io)
+#define RIO_ENABLE_DOORBELL_RANGE	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 9, struct rio_doorbell_filter)
+#define RIO_DISABLE_DOORBELL_RANGE	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 10, struct rio_doorbell_filter)
+#define RIO_ENABLE_PORTWRITE_RANGE	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 11, struct rio_pw_filter)
+#define RIO_DISABLE_PORTWRITE_RANGE	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 12, struct rio_pw_filter)
+#define RIO_SET_EVENT_MASK		\
+	_IOW(RIO_MPORT_DRV_MAGIC, 13, unsigned int)
+#define RIO_GET_EVENT_MASK		\
+	_IOR(RIO_MPORT_DRV_MAGIC, 14, unsigned int)
+#define RIO_MAP_OUTBOUND \
+	_IOWR(RIO_MPORT_DRV_MAGIC, 15, struct rio_mmap)
+#define RIO_UNMAP_OUTBOUND \
+	_IOW(RIO_MPORT_DRV_MAGIC, 16, struct rio_mmap)
+#define RIO_MAP_INBOUND \
+	_IOWR(RIO_MPORT_DRV_MAGIC, 17, struct rio_mmap)
+#define RIO_UNMAP_INBOUND \
+	_IOW(RIO_MPORT_DRV_MAGIC, 18, uint64_t)
+#define RIO_ALLOC_DMA \
+	_IOWR(RIO_MPORT_DRV_MAGIC, 19, struct rio_dma_mem)
+#define RIO_FREE_DMA \
+	_IOW(RIO_MPORT_DRV_MAGIC, 20, uint64_t)
+#define RIO_TRANSFER \
+	_IOWR(RIO_MPORT_DRV_MAGIC, 21, struct rio_transaction)
+#define RIO_WAIT_FOR_ASYNC \
+	_IOW(RIO_MPORT_DRV_MAGIC, 22, struct rio_async_tx_wait)
+#define RIO_DEV_ADD \
+	_IOW(RIO_MPORT_DRV_MAGIC, 23, struct rio_rdev_info)
+#define RIO_DEV_DEL \
+	_IOW(RIO_MPORT_DRV_MAGIC, 24, struct rio_rdev_info)
+
+#endif /* _RIO_MPORT_CDEV_H_ */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 0495884defc1..b71fd0b5cbad 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -354,6 +354,7 @@ header-y += reiserfs_fs.h
 header-y += reiserfs_xattr.h
 header-y += resource.h
 header-y += rfkill.h
+header-y += rio_mport_cdev.h
 header-y += romfs_fs.h
 header-y += rose.h
 header-y += route.h
-- 
cgit v1.2.3


From 0335695dfa4df01edff5bb102b9a82a0668ee51e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 22 Mar 2016 14:27:11 -0700
Subject: cred/userns: define current_user_ns() as a function

The current_user_ns() macro currently returns &init_user_ns when user
namespaces are disabled, and that causes several warnings when building
with gcc-6.0 in code that compares the result of the macro to
&init_user_ns itself:

  fs/xfs/xfs_ioctl.c: In function 'xfs_ioctl_setattr_check_projid':
  fs/xfs/xfs_ioctl.c:1249:22: error: self-comparison always evaluates to true [-Werror=tautological-compare]
    if (current_user_ns() == &init_user_ns)

This is a legitimate warning in principle, but here it isn't really
helpful, so I'm reprasing the definition in a way that shuts up the
warning.  Apparently gcc only warns when comparing identical literals,
but it can figure out that the result of an inline function can be
identical to a constant expression in order to optimize a condition yet
not warn about the fact that the condition is known at compile time.
This is exactly what we want here, and it looks reasonable because we
generally prefer inline functions over macros anyway.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Yaowei Bai <baiyaowei@cmss.chinamobile.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/capability.h | 2 --
 include/linux/cred.h       | 5 ++++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index f314275d4e3f..00690ff92edf 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -40,8 +40,6 @@ struct inode;
 struct dentry;
 struct user_namespace;
 
-struct user_namespace *current_user_ns(void);
-
 extern const kernel_cap_t __cap_empty_set;
 extern const kernel_cap_t __cap_init_eff_set;
 
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 8d70e1361ecd..257db64562e5 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -377,7 +377,10 @@ extern struct user_namespace init_user_ns;
 #ifdef CONFIG_USER_NS
 #define current_user_ns()	(current_cred_xxx(user_ns))
 #else
-#define current_user_ns()	(&init_user_ns)
+static inline struct user_namespace *current_user_ns(void)
+{
+	return &init_user_ns;
+}
 #endif
 
 
-- 
cgit v1.2.3


From ebc41f20d77f6ad91f1f2d2af5147dc9bb6b5eea Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Tue, 22 Mar 2016 14:27:17 -0700
Subject: panic: change nmi_panic from macro to function

Commit 1717f2096b54 ("panic, x86: Fix re-entrance problem due to panic
on NMI") and commit 58c5661f2144 ("panic, x86: Allow CPUs to save
registers even if looping in NMI context") introduced nmi_panic() which
prevents concurrent/recursive execution of panic().  It also saves
registers for the crash dump on x86.

However, there are some cases where NMI handlers still use panic().
This patch set partially replaces them with nmi_panic() in those cases.

Even this patchset is applied, some NMI or similar handlers (e.g.  MCE
handler) continue to use panic().  This is because I can't test them
well and actual problems won't happen.  For example, the possibility
that normal panic and panic on MCE happen simultaneously is very low.

This patch (of 3):

Convert nmi_panic() to a proper function and export it instead of
exporting internal implementation details to modules, for obvious
reasons.

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Acked-by: Borislav Petkov <bp@suse.de>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Cc: Javi Merino <javi.merino@arm.com>
Cc: Gobinda Charan Maji <gobinda.cemk07@gmail.com>
Cc: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 21 +--------------------
 kernel/panic.c         | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index b82646ee70eb..a13c52ccd8ac 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -255,7 +255,7 @@ extern long (*panic_blink)(int state);
 __printf(1, 2)
 void panic(const char *fmt, ...)
 	__noreturn __cold;
-void nmi_panic_self_stop(struct pt_regs *);
+void nmi_panic(struct pt_regs *regs, const char *msg);
 extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
@@ -456,25 +456,6 @@ extern bool crash_kexec_post_notifiers;
 extern atomic_t panic_cpu;
 #define PANIC_CPU_INVALID	-1
 
-/*
- * A variant of panic() called from NMI context. We return if we've already
- * panicked on this CPU. If another CPU already panicked, loop in
- * nmi_panic_self_stop() which can provide architecture dependent code such
- * as saving register state for crash dump.
- */
-#define nmi_panic(regs, fmt, ...)					\
-do {									\
-	int old_cpu, cpu;						\
-									\
-	cpu = raw_smp_processor_id();					\
-	old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu);	\
-									\
-	if (old_cpu == PANIC_CPU_INVALID)				\
-		panic(fmt, ##__VA_ARGS__);				\
-	else if (old_cpu != cpu)					\
-		nmi_panic_self_stop(regs);				\
-} while (0)
-
 /*
  * Only to be used by arch init code. If the user over-wrote the default
  * CONFIG_PANIC_TIMEOUT, honor it.
diff --git a/kernel/panic.c b/kernel/panic.c
index fa400852bf6c..535c96510a44 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -73,6 +73,26 @@ void __weak nmi_panic_self_stop(struct pt_regs *regs)
 
 atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
 
+/*
+ * A variant of panic() called from NMI context. We return if we've already
+ * panicked on this CPU. If another CPU already panicked, loop in
+ * nmi_panic_self_stop() which can provide architecture dependent code such
+ * as saving register state for crash dump.
+ */
+void nmi_panic(struct pt_regs *regs, const char *msg)
+{
+	int old_cpu, cpu;
+
+	cpu = raw_smp_processor_id();
+	old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu);
+
+	if (old_cpu == PANIC_CPU_INVALID)
+		panic("%s", msg);
+	else if (old_cpu != cpu)
+		nmi_panic_self_stop(regs);
+}
+EXPORT_SYMBOL(nmi_panic);
+
 /**
  *	panic - halt the system
  *	@fmt: The text string to print
-- 
cgit v1.2.3


From 5c9a8750a6409c63a0f01d51a9024861022f6593 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Tue, 22 Mar 2016 14:27:30 -0700
Subject: kernel: add kcov code coverage

kcov provides code coverage collection for coverage-guided fuzzing
(randomized testing).  Coverage-guided fuzzing is a testing technique
that uses coverage feedback to determine new interesting inputs to a
system.  A notable user-space example is AFL
(http://lcamtuf.coredump.cx/afl/).  However, this technique is not
widely used for kernel testing due to missing compiler and kernel
support.

kcov does not aim to collect as much coverage as possible.  It aims to
collect more or less stable coverage that is function of syscall inputs.
To achieve this goal it does not collect coverage in soft/hard
interrupts and instrumentation of some inherently non-deterministic or
non-interesting parts of kernel is disbled (e.g.  scheduler, locking).

Currently there is a single coverage collection mode (tracing), but the
API anticipates additional collection modes.  Initially I also
implemented a second mode which exposes coverage in a fixed-size hash
table of counters (what Quentin used in his original patch).  I've
dropped the second mode for simplicity.

This patch adds the necessary support on kernel side.  The complimentary
compiler support was added in gcc revision 231296.

We've used this support to build syzkaller system call fuzzer, which has
found 90 kernel bugs in just 2 months:

  https://github.com/google/syzkaller/wiki/Found-Bugs

We've also found 30+ bugs in our internal systems with syzkaller.
Another (yet unexplored) direction where kcov coverage would greatly
help is more traditional "blob mutation".  For example, mounting a
random blob as a filesystem, or receiving a random blob over wire.

Why not gcov.  Typical fuzzing loop looks as follows: (1) reset
coverage, (2) execute a bit of code, (3) collect coverage, repeat.  A
typical coverage can be just a dozen of basic blocks (e.g.  an invalid
input).  In such context gcov becomes prohibitively expensive as
reset/collect coverage steps depend on total number of basic
blocks/edges in program (in case of kernel it is about 2M).  Cost of
kcov depends only on number of executed basic blocks/edges.  On top of
that, kernel requires per-thread coverage because there are always
background threads and unrelated processes that also produce coverage.
With inlined gcov instrumentation per-thread coverage is not possible.

kcov exposes kernel PCs and control flow to user-space which is
insecure.  But debugfs should not be mapped as user accessible.

Based on a patch by Quentin Casasnovas.

[akpm@linux-foundation.org: make task_struct.kcov_mode have type `enum kcov_mode']
[akpm@linux-foundation.org: unbreak allmodconfig]
[akpm@linux-foundation.org: follow x86 Makefile layout standards]
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: syzkaller <syzkaller@googlegroups.com>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Tavis Ormandy <taviso@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Kees Cook <keescook@google.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: David Drysdale <drysdale@google.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kcov.txt                | 111 ++++++++++++++
 Makefile                              |  11 +-
 arch/x86/Kconfig                      |   1 +
 arch/x86/boot/Makefile                |   7 +
 arch/x86/boot/compressed/Makefile     |   3 +
 arch/x86/entry/vdso/Makefile          |   3 +
 arch/x86/kernel/Makefile              |   6 +
 arch/x86/kernel/apic/Makefile         |   4 +
 arch/x86/kernel/cpu/Makefile          |   4 +
 arch/x86/lib/Makefile                 |   3 +
 arch/x86/mm/Makefile                  |   3 +
 arch/x86/realmode/rm/Makefile         |   3 +
 drivers/firmware/efi/libstub/Makefile |   3 +
 include/linux/kcov.h                  |  29 ++++
 include/linux/sched.h                 |  11 ++
 include/uapi/linux/kcov.h             |  10 ++
 kernel/Makefile                       |  12 ++
 kernel/exit.c                         |   2 +
 kernel/fork.c                         |   3 +
 kernel/kcov.c                         | 273 ++++++++++++++++++++++++++++++++++
 kernel/locking/Makefile               |   3 +
 kernel/rcu/Makefile                   |   4 +
 kernel/sched/Makefile                 |   4 +
 lib/Kconfig.debug                     |  21 +++
 lib/Makefile                          |  12 ++
 mm/Makefile                           |  15 ++
 mm/kasan/Makefile                     |   1 +
 scripts/Makefile.lib                  |   6 +
 28 files changed, 567 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/kcov.txt
 create mode 100644 include/linux/kcov.h
 create mode 100644 include/uapi/linux/kcov.h
 create mode 100644 kernel/kcov.c

(limited to 'include/linux')

diff --git a/Documentation/kcov.txt b/Documentation/kcov.txt
new file mode 100644
index 000000000000..779ff4ab1c1d
--- /dev/null
+++ b/Documentation/kcov.txt
@@ -0,0 +1,111 @@
+kcov: code coverage for fuzzing
+===============================
+
+kcov exposes kernel code coverage information in a form suitable for coverage-
+guided fuzzing (randomized testing). Coverage data of a running kernel is
+exported via the "kcov" debugfs file. Coverage collection is enabled on a task
+basis, and thus it can capture precise coverage of a single system call.
+
+Note that kcov does not aim to collect as much coverage as possible. It aims
+to collect more or less stable coverage that is function of syscall inputs.
+To achieve this goal it does not collect coverage in soft/hard interrupts
+and instrumentation of some inherently non-deterministic parts of kernel is
+disbled (e.g. scheduler, locking).
+
+Usage:
+======
+
+Configure kernel with:
+
+        CONFIG_KCOV=y
+
+CONFIG_KCOV requires gcc built on revision 231296 or later.
+Profiling data will only become accessible once debugfs has been mounted:
+
+        mount -t debugfs none /sys/kernel/debug
+
+The following program demonstrates kcov usage from within a test program:
+
+#include <stdio.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
+#define KCOV_ENABLE			_IO('c', 100)
+#define KCOV_DISABLE			_IO('c', 101)
+#define COVER_SIZE			(64<<10)
+
+int main(int argc, char **argv)
+{
+	int fd;
+	unsigned long *cover, n, i;
+
+	/* A single fd descriptor allows coverage collection on a single
+	 * thread.
+	 */
+	fd = open("/sys/kernel/debug/kcov", O_RDWR);
+	if (fd == -1)
+		perror("open"), exit(1);
+	/* Setup trace mode and trace size. */
+	if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE))
+		perror("ioctl"), exit(1);
+	/* Mmap buffer shared between kernel- and user-space. */
+	cover = (unsigned long*)mmap(NULL, COVER_SIZE * sizeof(unsigned long),
+				     PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if ((void*)cover == MAP_FAILED)
+		perror("mmap"), exit(1);
+	/* Enable coverage collection on the current thread. */
+	if (ioctl(fd, KCOV_ENABLE, 0))
+		perror("ioctl"), exit(1);
+	/* Reset coverage from the tail of the ioctl() call. */
+	__atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED);
+	/* That's the target syscal call. */
+	read(-1, NULL, 0);
+	/* Read number of PCs collected. */
+	n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
+	for (i = 0; i < n; i++)
+		printf("0x%lx\n", cover[i + 1]);
+	/* Disable coverage collection for the current thread. After this call
+	 * coverage can be enabled for a different thread.
+	 */
+	if (ioctl(fd, KCOV_DISABLE, 0))
+		perror("ioctl"), exit(1);
+	/* Free resources. */
+	if (munmap(cover, COVER_SIZE * sizeof(unsigned long)))
+		perror("munmap"), exit(1);
+	if (close(fd))
+		perror("close"), exit(1);
+	return 0;
+}
+
+After piping through addr2line output of the program looks as follows:
+
+SyS_read
+fs/read_write.c:562
+__fdget_pos
+fs/file.c:774
+__fget_light
+fs/file.c:746
+__fget_light
+fs/file.c:750
+__fget_light
+fs/file.c:760
+__fdget_pos
+fs/file.c:784
+SyS_read
+fs/read_write.c:562
+
+If a program needs to collect coverage from several threads (independently),
+it needs to open /sys/kernel/debug/kcov in each thread separately.
+
+The interface is fine-grained to allow efficient forking of test processes.
+That is, a parent process opens /sys/kernel/debug/kcov, enables trace mode,
+mmaps coverage buffer and then forks child processes in a loop. Child processes
+only need to enable coverage (disable happens automatically on thread end).
diff --git a/Makefile b/Makefile
index e055b969c325..b98a4f70d1b5 100644
--- a/Makefile
+++ b/Makefile
@@ -365,6 +365,7 @@ LDFLAGS_MODULE  =
 CFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
 CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage
+CFLAGS_KCOV	= -fsanitize-coverage=trace-pc
 
 
 # Use USERINCLUDE when you must reference the UAPI directories only.
@@ -411,7 +412,7 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE
 export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
 
 export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
-export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KASAN CFLAGS_UBSAN
+export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KCOV CFLAGS_KASAN CFLAGS_UBSAN
 export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
 export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
 export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
@@ -673,6 +674,14 @@ endif
 endif
 KBUILD_CFLAGS += $(stackp-flag)
 
+ifdef CONFIG_KCOV
+  ifeq ($(call cc-option, $(CFLAGS_KCOV)),)
+    $(warning Cannot use CONFIG_KCOV: \
+             -fsanitize-coverage=trace-pc is not supported by compiler)
+    CFLAGS_KCOV =
+  endif
+endif
+
 ifeq ($(cc-name),clang)
 KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
 KBUILD_CPPFLAGS += $(call cc-option,-Wno-unknown-warning-option,)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8b680a5cb25b..54478b7635de 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -28,6 +28,7 @@ config X86
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
 	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_MMIO_FLUSH
 	select ARCH_HAS_SG_CHAIN
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 0bf6749522d9..b1ef9e489084 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -12,6 +12,13 @@
 KASAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
 
+# Kernel does not boot with kcov instrumentation here.
+# One of the problems observed was insertion of __sanitizer_cov_trace_pc()
+# callback into middle of per-cpu data enabling code. Thus the callback observed
+# inconsistent state and crashed. We are interested mostly in syscall coverage,
+# so boot code is not interesting anyway.
+KCOV_INSTRUMENT		:= n
+
 # If you want to preset the SVGA mode, uncomment the next line and
 # set SVGA_MODE to whatever number you want.
 # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 5e1d26e09407..6915ff2bd996 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -19,6 +19,9 @@
 KASAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
 
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT		:= n
+
 targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
 	vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
 
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index f9fb859c98b9..6874da5f67fc 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -7,6 +7,9 @@ KASAN_SANITIZE			:= n
 UBSAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
 
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT		:= n
+
 VDSO64-$(CONFIG_X86_64)		:= y
 VDSOX32-$(CONFIG_X86_X32_ABI)	:= y
 VDSO32-$(CONFIG_X86_32)		:= y
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d5fb0871aba3..adaae2c781c1 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -25,6 +25,12 @@ OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o	:= y
 OBJECT_FILES_NON_STANDARD_mcount_$(BITS).o		:= y
 OBJECT_FILES_NON_STANDARD_test_nx.o			:= y
 
+# If instrumentation of this dir is enabled, boot hangs during first second.
+# Probably could be more selective here, but note that files related to irqs,
+# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to
+# non-deterministic coverage.
+KCOV_INSTRUMENT		:= n
+
 CFLAGS_irq.o := -I$(src)/../include/asm/trace
 
 obj-y			:= process_$(BITS).o signal.o
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 8bb12ddc5db8..8e63ebdcbd0b 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,6 +2,10 @@
 # Makefile for local APIC drivers and for the IO-APIC code
 #
 
+# Leads to non-deterministic coverage that is not a function of syscall inputs.
+# In particualr, smp_apic_timer_interrupt() is called in random places.
+KCOV_INSTRUMENT		:= n
+
 obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o vector.o
 obj-y				+= hw_nmi.o
 
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 0d373d7affc8..4a8697f7d4ef 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -8,6 +8,10 @@ CFLAGS_REMOVE_common.o = -pg
 CFLAGS_REMOVE_perf_event.o = -pg
 endif
 
+# If these files are instrumented, boot hangs during the first second.
+KCOV_INSTRUMENT_common.o := n
+KCOV_INSTRUMENT_perf_event.o := n
+
 # Make sure load_percpu_segment has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_common.o		:= $(nostackp)
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index a501fa25da41..72a576752a7e 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -2,6 +2,9 @@
 # Makefile for x86 specific library files.
 #
 
+# Produces uninteresting flaky coverage.
+KCOV_INSTRUMENT_delay.o	:= n
+
 inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
 inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
 quiet_cmd_inat_tables = GEN     $@
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 67cf2e1e557b..f98913258c63 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,3 +1,6 @@
+# Kernel does not boot with instrumentation of tlb.c.
+KCOV_INSTRUMENT_tlb.o	:= n
+
 obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
 	    pat.o pgtable.o physaddr.o gup.o setup_nx.o
 
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 053abe7b0ef7..b95964610ea7 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -9,6 +9,9 @@
 KASAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
 
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT		:= n
+
 always := realmode.bin realmode.relocs
 
 wakeup-objs	:= wakeup_asm.o wakemain.o video-mode.o
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index a15841eced4e..da99bbb74aeb 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -25,6 +25,9 @@ KASAN_SANITIZE			:= n
 UBSAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
 
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT			:= n
+
 lib-y				:= efi-stub-helper.o
 
 # include the stub's generic dependencies from lib/ when building for ARM/arm64
diff --git a/include/linux/kcov.h b/include/linux/kcov.h
new file mode 100644
index 000000000000..2883ac98c280
--- /dev/null
+++ b/include/linux/kcov.h
@@ -0,0 +1,29 @@
+#ifndef _LINUX_KCOV_H
+#define _LINUX_KCOV_H
+
+#include <uapi/linux/kcov.h>
+
+struct task_struct;
+
+#ifdef CONFIG_KCOV
+
+void kcov_task_init(struct task_struct *t);
+void kcov_task_exit(struct task_struct *t);
+
+enum kcov_mode {
+	/* Coverage collection is not enabled yet. */
+	KCOV_MODE_DISABLED = 0,
+	/*
+	 * Tracing coverage collection mode.
+	 * Covered PCs are collected in a per-task buffer.
+	 */
+	KCOV_MODE_TRACE = 1,
+};
+
+#else
+
+static inline void kcov_task_init(struct task_struct *t) {}
+static inline void kcov_task_exit(struct task_struct *t) {}
+
+#endif /* CONFIG_KCOV */
+#endif /* _LINUX_KCOV_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 084ed9fba620..34495d2d2d7b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -51,6 +51,7 @@ struct sched_param {
 #include <linux/resource.h>
 #include <linux/timer.h>
 #include <linux/hrtimer.h>
+#include <linux/kcov.h>
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
 #include <linux/cred.h>
@@ -1818,6 +1819,16 @@ struct task_struct {
 	/* bitmask and counter of trace recursion */
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
+#ifdef CONFIG_KCOV
+	/* Coverage collection mode enabled for this task (0 if disabled). */
+	enum kcov_mode kcov_mode;
+	/* Size of the kcov_area. */
+	unsigned	kcov_size;
+	/* Buffer for coverage collection. */
+	void		*kcov_area;
+	/* kcov desciptor wired with this task or NULL. */
+	struct kcov	*kcov;
+#endif
 #ifdef CONFIG_MEMCG
 	struct mem_cgroup *memcg_in_oom;
 	gfp_t memcg_oom_gfp_mask;
diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h
new file mode 100644
index 000000000000..574e22ec640d
--- /dev/null
+++ b/include/uapi/linux/kcov.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_KCOV_IOCTLS_H
+#define _LINUX_KCOV_IOCTLS_H
+
+#include <linux/types.h>
+
+#define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
+#define KCOV_ENABLE			_IO('c', 100)
+#define KCOV_DISABLE			_IO('c', 101)
+
+#endif /* _LINUX_KCOV_IOCTLS_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index baa55e50a315..f0c40bf49d9f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -18,6 +18,17 @@ ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE)
 endif
 
+# Prevents flicker of uninteresting __do_softirq()/__local_bh_disable_ip()
+# in coverage traces.
+KCOV_INSTRUMENT_softirq.o := n
+# These are called from save_stack_trace() on slub debug path,
+# and produce insane amounts of uninteresting coverage.
+KCOV_INSTRUMENT_module.o := n
+KCOV_INSTRUMENT_extable.o := n
+# Don't self-instrument.
+KCOV_INSTRUMENT_kcov.o := n
+KASAN_SANITIZE_kcov.o := n
+
 # cond_syscall is currently not LTO compatible
 CFLAGS_sys_ni.o = $(DISABLE_LTO)
 
@@ -68,6 +79,7 @@ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
 obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o audit_fsnotify.o
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_GCOV_KERNEL) += gcov/
+obj-$(CONFIG_KCOV) += kcov.o
 obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += debug/
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 10e088237fed..953d1a1c0387 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -53,6 +53,7 @@
 #include <linux/oom.h>
 #include <linux/writeback.h>
 #include <linux/shm.h>
+#include <linux/kcov.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -655,6 +656,7 @@ void do_exit(long code)
 	TASKS_RCU(int tasks_rcu_i);
 
 	profile_task_exit(tsk);
+	kcov_task_exit(tsk);
 
 	WARN_ON(blk_needs_flush_plug(tsk));
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 5b8d1e7ceeea..d277e83ed3e0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -75,6 +75,7 @@
 #include <linux/aio.h>
 #include <linux/compiler.h>
 #include <linux/sysctl.h>
+#include <linux/kcov.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -392,6 +393,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 
 	account_kernel_stack(ti, 1);
 
+	kcov_task_init(tsk);
+
 	return tsk;
 
 free_ti:
diff --git a/kernel/kcov.c b/kernel/kcov.c
new file mode 100644
index 000000000000..3efbee0834a8
--- /dev/null
+++ b/kernel/kcov.c
@@ -0,0 +1,273 @@
+#define pr_fmt(fmt) "kcov: " fmt
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/kcov.h>
+
+/*
+ * kcov descriptor (one per opened debugfs file).
+ * State transitions of the descriptor:
+ *  - initial state after open()
+ *  - then there must be a single ioctl(KCOV_INIT_TRACE) call
+ *  - then, mmap() call (several calls are allowed but not useful)
+ *  - then, repeated enable/disable for a task (only one task a time allowed)
+ */
+struct kcov {
+	/*
+	 * Reference counter. We keep one for:
+	 *  - opened file descriptor
+	 *  - task with enabled coverage (we can't unwire it from another task)
+	 */
+	atomic_t		refcount;
+	/* The lock protects mode, size, area and t. */
+	spinlock_t		lock;
+	enum kcov_mode		mode;
+	/* Size of arena (in long's for KCOV_MODE_TRACE). */
+	unsigned		size;
+	/* Coverage buffer shared with user space. */
+	void			*area;
+	/* Task for which we collect coverage, or NULL. */
+	struct task_struct	*t;
+};
+
+/*
+ * Entry point from instrumented code.
+ * This is called once per basic-block/edge.
+ */
+void __sanitizer_cov_trace_pc(void)
+{
+	struct task_struct *t;
+	enum kcov_mode mode;
+
+	t = current;
+	/*
+	 * We are interested in code coverage as a function of a syscall inputs,
+	 * so we ignore code executed in interrupts.
+	 */
+	if (!t || in_interrupt())
+		return;
+	mode = READ_ONCE(t->kcov_mode);
+	if (mode == KCOV_MODE_TRACE) {
+		unsigned long *area;
+		unsigned long pos;
+
+		/*
+		 * There is some code that runs in interrupts but for which
+		 * in_interrupt() returns false (e.g. preempt_schedule_irq()).
+		 * READ_ONCE()/barrier() effectively provides load-acquire wrt
+		 * interrupts, there are paired barrier()/WRITE_ONCE() in
+		 * kcov_ioctl_locked().
+		 */
+		barrier();
+		area = t->kcov_area;
+		/* The first word is number of subsequent PCs. */
+		pos = READ_ONCE(area[0]) + 1;
+		if (likely(pos < t->kcov_size)) {
+			area[pos] = _RET_IP_;
+			WRITE_ONCE(area[0], pos);
+		}
+	}
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_pc);
+
+static void kcov_get(struct kcov *kcov)
+{
+	atomic_inc(&kcov->refcount);
+}
+
+static void kcov_put(struct kcov *kcov)
+{
+	if (atomic_dec_and_test(&kcov->refcount)) {
+		vfree(kcov->area);
+		kfree(kcov);
+	}
+}
+
+void kcov_task_init(struct task_struct *t)
+{
+	t->kcov_mode = KCOV_MODE_DISABLED;
+	t->kcov_size = 0;
+	t->kcov_area = NULL;
+	t->kcov = NULL;
+}
+
+void kcov_task_exit(struct task_struct *t)
+{
+	struct kcov *kcov;
+
+	kcov = t->kcov;
+	if (kcov == NULL)
+		return;
+	spin_lock(&kcov->lock);
+	if (WARN_ON(kcov->t != t)) {
+		spin_unlock(&kcov->lock);
+		return;
+	}
+	/* Just to not leave dangling references behind. */
+	kcov_task_init(t);
+	kcov->t = NULL;
+	spin_unlock(&kcov->lock);
+	kcov_put(kcov);
+}
+
+static int kcov_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+	int res = 0;
+	void *area;
+	struct kcov *kcov = vma->vm_file->private_data;
+	unsigned long size, off;
+	struct page *page;
+
+	area = vmalloc_user(vma->vm_end - vma->vm_start);
+	if (!area)
+		return -ENOMEM;
+
+	spin_lock(&kcov->lock);
+	size = kcov->size * sizeof(unsigned long);
+	if (kcov->mode == KCOV_MODE_DISABLED || vma->vm_pgoff != 0 ||
+	    vma->vm_end - vma->vm_start != size) {
+		res = -EINVAL;
+		goto exit;
+	}
+	if (!kcov->area) {
+		kcov->area = area;
+		vma->vm_flags |= VM_DONTEXPAND;
+		spin_unlock(&kcov->lock);
+		for (off = 0; off < size; off += PAGE_SIZE) {
+			page = vmalloc_to_page(kcov->area + off);
+			if (vm_insert_page(vma, vma->vm_start + off, page))
+				WARN_ONCE(1, "vm_insert_page() failed");
+		}
+		return 0;
+	}
+exit:
+	spin_unlock(&kcov->lock);
+	vfree(area);
+	return res;
+}
+
+static int kcov_open(struct inode *inode, struct file *filep)
+{
+	struct kcov *kcov;
+
+	kcov = kzalloc(sizeof(*kcov), GFP_KERNEL);
+	if (!kcov)
+		return -ENOMEM;
+	atomic_set(&kcov->refcount, 1);
+	spin_lock_init(&kcov->lock);
+	filep->private_data = kcov;
+	return nonseekable_open(inode, filep);
+}
+
+static int kcov_close(struct inode *inode, struct file *filep)
+{
+	kcov_put(filep->private_data);
+	return 0;
+}
+
+static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
+			     unsigned long arg)
+{
+	struct task_struct *t;
+	unsigned long size, unused;
+
+	switch (cmd) {
+	case KCOV_INIT_TRACE:
+		/*
+		 * Enable kcov in trace mode and setup buffer size.
+		 * Must happen before anything else.
+		 */
+		if (kcov->mode != KCOV_MODE_DISABLED)
+			return -EBUSY;
+		/*
+		 * Size must be at least 2 to hold current position and one PC.
+		 * Later we allocate size * sizeof(unsigned long) memory,
+		 * that must not overflow.
+		 */
+		size = arg;
+		if (size < 2 || size > INT_MAX / sizeof(unsigned long))
+			return -EINVAL;
+		kcov->size = size;
+		kcov->mode = KCOV_MODE_TRACE;
+		return 0;
+	case KCOV_ENABLE:
+		/*
+		 * Enable coverage for the current task.
+		 * At this point user must have been enabled trace mode,
+		 * and mmapped the file. Coverage collection is disabled only
+		 * at task exit or voluntary by KCOV_DISABLE. After that it can
+		 * be enabled for another task.
+		 */
+		unused = arg;
+		if (unused != 0 || kcov->mode == KCOV_MODE_DISABLED ||
+		    kcov->area == NULL)
+			return -EINVAL;
+		if (kcov->t != NULL)
+			return -EBUSY;
+		t = current;
+		/* Cache in task struct for performance. */
+		t->kcov_size = kcov->size;
+		t->kcov_area = kcov->area;
+		/* See comment in __sanitizer_cov_trace_pc(). */
+		barrier();
+		WRITE_ONCE(t->kcov_mode, kcov->mode);
+		t->kcov = kcov;
+		kcov->t = t;
+		/* This is put either in kcov_task_exit() or in KCOV_DISABLE. */
+		kcov_get(kcov);
+		return 0;
+	case KCOV_DISABLE:
+		/* Disable coverage for the current task. */
+		unused = arg;
+		if (unused != 0 || current->kcov != kcov)
+			return -EINVAL;
+		t = current;
+		if (WARN_ON(kcov->t != t))
+			return -EINVAL;
+		kcov_task_init(t);
+		kcov->t = NULL;
+		kcov_put(kcov);
+		return 0;
+	default:
+		return -ENOTTY;
+	}
+}
+
+static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+	struct kcov *kcov;
+	int res;
+
+	kcov = filep->private_data;
+	spin_lock(&kcov->lock);
+	res = kcov_ioctl_locked(kcov, cmd, arg);
+	spin_unlock(&kcov->lock);
+	return res;
+}
+
+static const struct file_operations kcov_fops = {
+	.open		= kcov_open,
+	.unlocked_ioctl	= kcov_ioctl,
+	.mmap		= kcov_mmap,
+	.release        = kcov_close,
+};
+
+static int __init kcov_init(void)
+{
+	if (!debugfs_create_file("kcov", 0600, NULL, NULL, &kcov_fops)) {
+		pr_err("failed to create kcov in debugfs\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+device_initcall(kcov_init);
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 8e96f6cc2a4a..31322a4275cd 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -1,3 +1,6 @@
+# Any varying coverage in these files is non-deterministic
+# and is generally not a function of system call inputs.
+KCOV_INSTRUMENT		:= n
 
 obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
 
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 61a16569ffbf..032b2c015beb 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -1,3 +1,7 @@
+# Any varying coverage in these files is non-deterministic
+# and is generally not a function of system call inputs.
+KCOV_INSTRUMENT := n
+
 obj-y += update.o sync.o
 obj-$(CONFIG_SRCU) += srcu.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 302d6ebd64f7..414d9c16da42 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -2,6 +2,10 @@ ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_clock.o = $(CC_FLAGS_FTRACE)
 endif
 
+# These files are disabled because they produce non-interesting flaky coverage
+# that is not a function of syscall inputs. E.g. involuntary context switches.
+KCOV_INSTRUMENT := n
+
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
 # needed for x86 only.  Why this used to be enabled for all architectures is beyond
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 5a60f45cd9bb..532d4d52d1df 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -696,6 +696,27 @@ source "lib/Kconfig.kasan"
 
 endmenu # "Memory Debugging"
 
+config ARCH_HAS_KCOV
+	bool
+	help
+	  KCOV does not have any arch-specific code, but currently it is enabled
+	  only for x86_64. KCOV requires testing on other archs, and most likely
+	  disabling of instrumentation for some early boot code.
+
+config KCOV
+	bool "Code coverage for fuzzing"
+	depends on ARCH_HAS_KCOV
+	select DEBUG_FS
+	help
+	  KCOV exposes kernel code coverage information in a form suitable
+	  for coverage-guided fuzzing (randomized testing).
+
+	  If RANDOMIZE_BASE is enabled, PC values will not be stable across
+	  different machines and across reboots. If you need stable PC values,
+	  disable RANDOMIZE_BASE.
+
+	  For more details, see Documentation/kcov.txt.
+
 config DEBUG_SHIRQ
 	bool "Debug shared IRQ handlers"
 	depends on DEBUG_KERNEL
diff --git a/lib/Makefile b/lib/Makefile
index 4962d14c450f..a1de5b61ff40 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -7,6 +7,18 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
 endif
 
+# These files are disabled because they produce lots of non-interesting and/or
+# flaky coverage that is not a function of syscall inputs. For example,
+# rbtree can be global and individual rotations don't correlate with inputs.
+KCOV_INSTRUMENT_string.o := n
+KCOV_INSTRUMENT_rbtree.o := n
+KCOV_INSTRUMENT_list_debug.o := n
+KCOV_INSTRUMENT_debugobjects.o := n
+KCOV_INSTRUMENT_dynamic_debug.o := n
+# Kernel does not boot if we instrument this file as it uses custom calling
+# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
+KCOV_INSTRUMENT_hweight.o := n
+
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
diff --git a/mm/Makefile b/mm/Makefile
index 6da300a1414b..f5e797cbd128 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -5,6 +5,21 @@
 KASAN_SANITIZE_slab_common.o := n
 KASAN_SANITIZE_slub.o := n
 
+# These files are disabled because they produce non-interesting and/or
+# flaky coverage that is not a function of syscall inputs. E.g. slab is out of
+# free pages, or a task is migrated between nodes.
+KCOV_INSTRUMENT_slab_common.o := n
+KCOV_INSTRUMENT_slob.o := n
+KCOV_INSTRUMENT_slab.o := n
+KCOV_INSTRUMENT_slub.o := n
+KCOV_INSTRUMENT_page_alloc.o := n
+KCOV_INSTRUMENT_debug-pagealloc.o := n
+KCOV_INSTRUMENT_kmemleak.o := n
+KCOV_INSTRUMENT_kmemcheck.o := n
+KCOV_INSTRUMENT_memcontrol.o := n
+KCOV_INSTRUMENT_mmzone.o := n
+KCOV_INSTRUMENT_vmstat.o := n
+
 mmu-y			:= nommu.o
 mmu-$(CONFIG_MMU)	:= gup.o highmem.o memory.o mincore.o \
 			   mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index a61460d9f5b0..131daadf40e4 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -1,5 +1,6 @@
 KASAN_SANITIZE := n
 UBSAN_SANITIZE_kasan.o := n
+KCOV_INSTRUMENT := n
 
 CFLAGS_REMOVE_kasan.o = -pg
 # Function splitter causes unnecessary splits in __asan_load1/__asan_store1
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index ad50d5859ac4..ddf83d0181e7 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -136,6 +136,12 @@ _c_flags += $(if $(patsubst n%,, \
 		$(CFLAGS_UBSAN))
 endif
 
+ifeq ($(CONFIG_KCOV),y)
+_c_flags += $(if $(patsubst n%,, \
+	$(KCOV_INSTRUMENT_$(basetarget).o)$(KCOV_INSTRUMENT)y), \
+	$(CFLAGS_KCOV))
+endif
+
 # If building the kernel in a separate objtree expand all occurrences
 # of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/').
 
-- 
cgit v1.2.3


From 21b2f44315d735345f02356239a819debdd89462 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Tue, 22 Mar 2016 14:27:42 -0700
Subject: kfifo: fix sparse complaints

This patch fix complaints by the sparse tool when using kfifo_put() with
non scalar types like structures (i.e.
drivers/iio/industrialio-event.c).

Casting a pointer to the value and read this pointer instead of directly
casting the value will fix this.

The generated code is equal.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 473b43678ad1..41eb6fdf87a8 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -401,7 +401,7 @@ __kfifo_int_must_check_helper( \
 			((typeof(__tmp->type))__kfifo->data) : \
 			(__tmp->buf) \
 			)[__kfifo->in & __tmp->kfifo.mask] = \
-				(typeof(*__tmp->type))__val; \
+				*(typeof(__tmp->type))&__val; \
 			smp_wmb(); \
 			__kfifo->in++; \
 		} \
-- 
cgit v1.2.3


From c907e0eb43a522de60fb651c011c553f87273222 Mon Sep 17 00:00:00 2001
From: Brian Starkey <brian.starkey@arm.com>
Date: Tue, 22 Mar 2016 14:28:00 -0700
Subject: memremap: add MEMREMAP_WC flag

Add a flag to memremap() for writecombine mappings.  Mappings satisfied
by this flag will not be cached, however writes may be delayed or
combined into more efficient bursts.  This is most suitable for buffers
written sequentially by the CPU for use by other DMA devices.

Signed-off-by: Brian Starkey <brian.starkey@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/io.h |  1 +
 kernel/memremap.c  | 13 +++++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io.h b/include/linux/io.h
index 32403b5716e5..e2c8419278c1 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -135,6 +135,7 @@ enum {
 	/* See memremap() kernel-doc for usage description... */
 	MEMREMAP_WB = 1 << 0,
 	MEMREMAP_WT = 1 << 1,
+	MEMREMAP_WC = 1 << 2,
 };
 
 void *memremap(resource_size_t offset, size_t size, unsigned long flags);
diff --git a/kernel/memremap.c b/kernel/memremap.c
index e5e685e6ff2a..a6d382312e6f 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -41,11 +41,13 @@ static void *try_ram_remap(resource_size_t offset, size_t size)
  * memremap() - remap an iomem_resource as cacheable memory
  * @offset: iomem resource start address
  * @size: size of remap
- * @flags: either MEMREMAP_WB or MEMREMAP_WT
+ * @flags: any of MEMREMAP_WB, MEMREMAP_WT and MEMREMAP_WC
  *
  * memremap() is "ioremap" for cases where it is known that the resource
  * being mapped does not have i/o side effects and the __iomem
- * annotation is not applicable.
+ * annotation is not applicable. In the case of multiple flags, the different
+ * mapping types will be attempted in the order listed below until one of
+ * them succeeds.
  *
  * MEMREMAP_WB - matches the default mapping for System RAM on
  * the architecture.  This is usually a read-allocate write-back cache.
@@ -57,6 +59,10 @@ static void *try_ram_remap(resource_size_t offset, size_t size)
  * cache or are written through to memory and never exist in a
  * cache-dirty state with respect to program visibility.  Attempts to
  * map System RAM with this mapping type will fail.
+ *
+ * MEMREMAP_WC - establish a writecombine mapping, whereby writes may
+ * be coalesced together (e.g. in the CPU's write buffers), but is otherwise
+ * uncached. Attempts to map System RAM with this mapping type will fail.
  */
 void *memremap(resource_size_t offset, size_t size, unsigned long flags)
 {
@@ -102,6 +108,9 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags)
 	if (!addr && (flags & MEMREMAP_WT))
 		addr = ioremap_wt(offset, size);
 
+	if (!addr && (flags & MEMREMAP_WC))
+		addr = ioremap_wc(offset, size);
+
 	return addr;
 }
 EXPORT_SYMBOL(memremap);
-- 
cgit v1.2.3


From f09e3f4fe42caad0c0f59c1489d2b3f512c070b5 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <baiyaowei@cmss.chinamobile.com>
Date: Wed, 23 Mar 2016 09:40:09 +0800
Subject: x86/apic: Remove declaration of unused hw_nmi_is_cpu_stuck

Commit 10f9014912 ("x86: Cleanup hw_nmi.c cruft") removed unused
code in the hw_nmi.c file because of the redesign of the hardlockup
watchdog but left declaration of hw_nmi_is_cpu_stuck in linux/nmi.h,
so remvoe it.

Signed-off-by: Yaowei Bai <baiyaowei@cmss.chinamobile.com>
Link: http://lkml.kernel.org/r/1458697210-3027-1-git-send-email-baiyaowei@cmss.chinamobile.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/nmi.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 7ec5b86735f3..4630eeae18e0 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -65,7 +65,6 @@ static inline bool trigger_allbutself_cpu_backtrace(void)
 #endif
 
 #ifdef CONFIG_LOCKUP_DETECTOR
-int hw_nmi_is_cpu_stuck(struct pt_regs *);
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
 extern int nmi_watchdog_enabled;
 extern int soft_watchdog_enabled;
-- 
cgit v1.2.3


From 6c31da3464b4d28825d1827ee41a3a217b2dcf0e Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 19 Mar 2016 17:54:10 +0100
Subject: parisc,metag: Implement CONFIG_DEBUG_STACK_USAGE option

On parisc and metag the stack grows upwards, so for those we need to
scan the stack downwards in order to calculate how much stack a process
has used.

Tested on a 64bit parisc kernel.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 include/linux/sched.h | 8 ++++++++
 lib/Kconfig.debug     | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 34495d2d2d7b..589c4780b077 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2870,10 +2870,18 @@ static inline unsigned long stack_not_used(struct task_struct *p)
 	unsigned long *n = end_of_stack(p);
 
 	do { 	/* Skip over canary */
+# ifdef CONFIG_STACK_GROWSUP
+		n--;
+# else
 		n++;
+# endif
 	} while (!*n);
 
+# ifdef CONFIG_STACK_GROWSUP
+	return (unsigned long)end_of_stack(p) - (unsigned long)n;
+# else
 	return (unsigned long)n - (unsigned long)end_of_stack(p);
+# endif
 }
 #endif
 extern void set_task_stack_end_magic(struct task_struct *tsk);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 532d4d52d1df..1e9a607534ca 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -558,7 +558,7 @@ config DEBUG_KMEMLEAK_DEFAULT_OFF
 
 config DEBUG_STACK_USAGE
 	bool "Stack utilization instrumentation"
-	depends on DEBUG_KERNEL && !IA64 && !PARISC && !METAG
+	depends on DEBUG_KERNEL && !IA64
 	help
 	  Enables the display of the minimum amount of free stack which each
 	  task has ever had available in the sysrq-T and sysrq-P debug output.
-- 
cgit v1.2.3


From 5e82b4b2a0015ed0a659b22ef2ee3409a3c39e54 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 23 Mar 2016 13:47:23 -0500
Subject: net: Fix typos and whitespace.

Fix typos.  Capitalize CPU, NAPI, RCU consistently.  Align structure
indentation.  No functional change intended; only comment and whitespace
changes.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 215 +++++++++++++++++++++++-----------------------
 1 file changed, 106 insertions(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3f6385d27b81..a675205df0f1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -81,8 +81,8 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
  * function. Real network devices commonly used with qdiscs should only return
  * the driver transmit return codes though - when qdiscs are used, the actual
  * transmission happens asynchronously, so the value is not propagated to
- * higher layers. Virtual network devices transmit synchronously, in this case
- * the driver transmit return codes are consumed by dev_queue_xmit(), all
+ * higher layers. Virtual network devices transmit synchronously; in this case
+ * the driver transmit return codes are consumed by dev_queue_xmit(), and all
  * others are propagated to higher layers.
  */
 
@@ -129,7 +129,7 @@ static inline bool dev_xmit_complete(int rc)
 }
 
 /*
- *	Compute the worst case header length according to the protocols
+ *	Compute the worst-case header length according to the protocols
  *	used.
  */
 
@@ -246,7 +246,7 @@ struct hh_cache {
 	unsigned long	hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
 };
 
-/* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much.
+/* Reserve HH_DATA_MOD byte-aligned hard_header_len, but at least that much.
  * Alternative is:
  *   dev->hard_header_len ? (dev->hard_header_len +
  *                           (HH_DATA_MOD - 1)) & ~(HH_DATA_MOD - 1) : 0
@@ -272,7 +272,7 @@ struct header_ops {
 };
 
 /* These flag bits are private to the generic network queueing
- * layer, they may not be explicitly referenced by any other
+ * layer; they may not be explicitly referenced by any other
  * code.
  */
 
@@ -286,7 +286,7 @@ enum netdev_state_t {
 
 
 /*
- * This structure holds at boot time configured netdevice settings. They
+ * This structure holds boot-time configured netdevice settings. They
  * are then used in the device probing.
  */
 struct netdev_boot_setup {
@@ -304,7 +304,7 @@ struct napi_struct {
 	/* The poll_list must only be managed by the entity which
 	 * changes the state of the NAPI_STATE_SCHED bit.  This means
 	 * whoever atomically sets that bit can add this napi_struct
-	 * to the per-cpu poll_list, and whoever clears that bit
+	 * to the per-CPU poll_list, and whoever clears that bit
 	 * can remove from the list right before clearing the bit.
 	 */
 	struct list_head	poll_list;
@@ -350,7 +350,7 @@ typedef enum gro_result gro_result_t;
  * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in
  * case skb->dev was changed by rx_handler.
  * @RX_HANDLER_EXACT: Force exact delivery, no wildcard.
- * @RX_HANDLER_PASS: Do nothing, passe the skb as if no rx_handler was called.
+ * @RX_HANDLER_PASS: Do nothing, pass the skb as if no rx_handler was called.
  *
  * rx_handlers are functions called from inside __netif_receive_skb(), to do
  * special processing of the skb, prior to delivery to protocol handlers.
@@ -365,19 +365,19 @@ typedef enum gro_result gro_result_t;
  * Upon return, rx_handler is expected to tell __netif_receive_skb() what to
  * do with the skb.
  *
- * If the rx_handler consumed to skb in some way, it should return
+ * If the rx_handler consumed the skb in some way, it should return
  * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for
- * the skb to be delivered in some other ways.
+ * the skb to be delivered in some other way.
  *
  * If the rx_handler changed skb->dev, to divert the skb to another
  * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the
  * new device will be called if it exists.
  *
- * If the rx_handler consider the skb should be ignored, it should return
+ * If the rx_handler decides the skb should be ignored, it should return
  * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that
  * are registered on exact device (ptype->dev == skb->dev).
  *
- * If the rx_handler didn't changed skb->dev, but want the skb to be normally
+ * If the rx_handler didn't change skb->dev, but wants the skb to be normally
  * delivered, it should return RX_HANDLER_PASS.
  *
  * A device without a registered rx_handler will behave as if rx_handler
@@ -402,11 +402,11 @@ static inline bool napi_disable_pending(struct napi_struct *n)
 }
 
 /**
- *	napi_schedule_prep - check if napi can be scheduled
- *	@n: napi context
+ *	napi_schedule_prep - check if NAPI can be scheduled
+ *	@n: NAPI context
  *
  * Test if NAPI routine is already running, and if not mark
- * it as running.  This is used as a condition variable
+ * it as running.  This is used as a condition variable to
  * insure only one NAPI poll instance runs.  We also make
  * sure there is no pending NAPI disable.
  */
@@ -418,7 +418,7 @@ static inline bool napi_schedule_prep(struct napi_struct *n)
 
 /**
  *	napi_schedule - schedule NAPI poll
- *	@n: napi context
+ *	@n: NAPI context
  *
  * Schedule NAPI poll routine to be called if it is not already
  * running.
@@ -431,7 +431,7 @@ static inline void napi_schedule(struct napi_struct *n)
 
 /**
  *	napi_schedule_irqoff - schedule NAPI poll
- *	@n: napi context
+ *	@n: NAPI context
  *
  * Variant of napi_schedule(), assuming hard irqs are masked.
  */
@@ -455,7 +455,7 @@ void __napi_complete(struct napi_struct *n);
 void napi_complete_done(struct napi_struct *n, int work_done);
 /**
  *	napi_complete - NAPI processing complete
- *	@n: napi context
+ *	@n: NAPI context
  *
  * Mark NAPI processing as complete.
  * Consider using napi_complete_done() instead.
@@ -467,32 +467,32 @@ static inline void napi_complete(struct napi_struct *n)
 
 /**
  *	napi_hash_add - add a NAPI to global hashtable
- *	@napi: napi context
+ *	@napi: NAPI context
  *
- * generate a new napi_id and store a @napi under it in napi_hash
- * Used for busy polling (CONFIG_NET_RX_BUSY_POLL)
+ * Generate a new napi_id and store a @napi under it in napi_hash.
+ * Used for busy polling (CONFIG_NET_RX_BUSY_POLL).
  * Note: This is normally automatically done from netif_napi_add(),
- * so might disappear in a future linux version.
+ * so might disappear in a future Linux version.
  */
 void napi_hash_add(struct napi_struct *napi);
 
 /**
  *	napi_hash_del - remove a NAPI from global table
- *	@napi: napi context
+ *	@napi: NAPI context
  *
- * Warning: caller must observe rcu grace period
+ * Warning: caller must observe RCU grace period
  * before freeing memory containing @napi, if
  * this function returns true.
  * Note: core networking stack automatically calls it
- * from netif_napi_del()
+ * from netif_napi_del().
  * Drivers might want to call this helper to combine all
- * the needed rcu grace periods into a single one.
+ * the needed RCU grace periods into a single one.
  */
 bool napi_hash_del(struct napi_struct *napi);
 
 /**
  *	napi_disable - prevent NAPI from scheduling
- *	@n: napi context
+ *	@n: NAPI context
  *
  * Stop NAPI from being scheduled on this context.
  * Waits till any outstanding processing completes.
@@ -501,7 +501,7 @@ void napi_disable(struct napi_struct *n);
 
 /**
  *	napi_enable - enable NAPI scheduling
- *	@n: napi context
+ *	@n: NAPI context
  *
  * Resume NAPI from being scheduled on this context.
  * Must be paired with napi_disable.
@@ -516,7 +516,7 @@ static inline void napi_enable(struct napi_struct *n)
 
 /**
  *	napi_synchronize - wait until NAPI is not running
- *	@n: napi context
+ *	@n: NAPI context
  *
  * Wait until NAPI is done being scheduled on this context.
  * Waits till any outstanding processing completes but
@@ -559,7 +559,7 @@ enum netdev_queue_state_t {
 
 struct netdev_queue {
 /*
- * read mostly part
+ * read-mostly part
  */
 	struct net_device	*dev;
 	struct Qdisc __rcu	*qdisc;
@@ -571,7 +571,7 @@ struct netdev_queue {
 	int			numa_node;
 #endif
 /*
- * write mostly part
+ * write-mostly part
  */
 	spinlock_t		_xmit_lock ____cacheline_aligned_in_smp;
 	int			xmit_lock_owner;
@@ -648,11 +648,11 @@ struct rps_dev_flow_table {
 /*
  * The rps_sock_flow_table contains mappings of flows to the last CPU
  * on which they were processed by the application (set in recvmsg).
- * Each entry is a 32bit value. Upper part is the high order bits
- * of flow hash, lower part is cpu number.
+ * Each entry is a 32bit value. Upper part is the high-order bits
+ * of flow hash, lower part is CPU number.
  * rps_cpu_mask is used to partition the space, depending on number of
- * possible cpus : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
- * For example, if 64 cpus are possible, rps_cpu_mask = 0x3f,
+ * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
+ * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
  * meaning we use 32-6=26 bits for the hash.
  */
 struct rps_sock_flow_table {
@@ -674,7 +674,7 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
 		unsigned int index = hash & table->mask;
 		u32 val = hash & ~rps_cpu_mask;
 
-		/* We only give a hint, preemption can change cpu under us */
+		/* We only give a hint, preemption can change CPU under us */
 		val |= raw_smp_processor_id();
 
 		if (table->ents[index] != val)
@@ -807,21 +807,21 @@ struct tc_to_netdev {
  * optional and can be filled with a null pointer.
  *
  * int (*ndo_init)(struct net_device *dev);
- *     This function is called once when network device is registered.
- *     The network device can use this to any late stage initializaton
- *     or semantic validattion. It can fail with an error code which will
- *     be propogated back to register_netdev
+ *     This function is called once when a network device is registered.
+ *     The network device can use this for any late stage initialization
+ *     or semantic validation. It can fail with an error code which will
+ *     be propagated back to register_netdev.
  *
  * void (*ndo_uninit)(struct net_device *dev);
  *     This function is called when device is unregistered or when registration
  *     fails. It is not called if init fails.
  *
  * int (*ndo_open)(struct net_device *dev);
- *     This function is called when network device transistions to the up
+ *     This function is called when a network device transitions to the up
  *     state.
  *
  * int (*ndo_stop)(struct net_device *dev);
- *     This function is called when network device transistions to the down
+ *     This function is called when a network device transitions to the down
  *     state.
  *
  * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb,
@@ -832,7 +832,7 @@ struct tc_to_netdev {
  *	corner cases, but the stack really does a non-trivial amount
  *	of useless work if you return NETDEV_TX_BUSY.
  *        (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
- *	Required can not be NULL.
+ *	Required; cannot be NULL.
  *
  * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
  *		netdev_features_t features);
@@ -842,34 +842,34 @@ struct tc_to_netdev {
  *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
  *                         void *accel_priv, select_queue_fallback_t fallback);
- *	Called to decide which queue to when device supports multiple
+ *	Called to decide which queue to use when device supports multiple
  *	transmit queues.
  *
  * void (*ndo_change_rx_flags)(struct net_device *dev, int flags);
  *	This function is called to allow device receiver to make
- *	changes to configuration when multicast or promiscious is enabled.
+ *	changes to configuration when multicast or promiscuous is enabled.
  *
  * void (*ndo_set_rx_mode)(struct net_device *dev);
  *	This function is called device changes address list filtering.
  *	If driver handles unicast address filtering, it should set
- *	IFF_UNICAST_FLT to its priv_flags.
+ *	IFF_UNICAST_FLT in its priv_flags.
  *
  * int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
  *	This function  is called when the Media Access Control address
  *	needs to be changed. If this interface is not defined, the
- *	mac address can not be changed.
+ *	MAC address can not be changed.
  *
  * int (*ndo_validate_addr)(struct net_device *dev);
  *	Test if Media Access Control address is valid for the device.
  *
  * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
- *	Called when a user request an ioctl which can't be handled by
- *	the generic interface code. If not defined ioctl's return
+ *	Called when a user requests an ioctl which can't be handled by
+ *	the generic interface code. If not defined ioctls return
  *	not supported error code.
  *
  * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map);
  *	Used to set network devices bus interface parameters. This interface
- *	is retained for legacy reason, new devices should use the bus
+ *	is retained for legacy reasons; new devices should use the bus
  *	interface (PCI) for low level management.
  *
  * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu);
@@ -878,7 +878,7 @@ struct tc_to_netdev {
  *	will return an error.
  *
  * void (*ndo_tx_timeout)(struct net_device *dev);
- *	Callback uses when the transmitter has not made any progress
+ *	Callback used when the transmitter has not made any progress
  *	for dev->watchdog ticks.
  *
  * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
@@ -896,11 +896,11 @@ struct tc_to_netdev {
  *	   neither operation.
  *
  * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid);
- *	If device support VLAN filtering this function is called when a
+ *	If device supports VLAN filtering this function is called when a
  *	VLAN id is registered.
  *
  * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid);
- *	If device support VLAN filtering this function is called when a
+ *	If device supports VLAN filtering this function is called when a
  *	VLAN id is unregistered.
  *
  * void (*ndo_poll_controller)(struct net_device *dev);
@@ -920,7 +920,7 @@ struct tc_to_netdev {
  *
  *      Enable or disable the VF ability to query its RSS Redirection Table and
  *      Hash Key. This is needed since on some devices VF share this information
- *      with PF and querying it may adduce a theoretical security risk.
+ *      with PF and querying it may introduce a theoretical security risk.
  * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
  * int (*ndo_setup_tc)(struct net_device *dev, u8 tc)
@@ -1030,20 +1030,20 @@ struct tc_to_netdev {
  *
  * void (*ndo_add_vxlan_port)(struct  net_device *dev,
  *			      sa_family_t sa_family, __be16 port);
- *	Called by vxlan to notiy a driver about the UDP port and socket
- *	address family that vxlan is listnening to. It is called only when
+ *	Called by vxlan to notify a driver about the UDP port and socket
+ *	address family that vxlan is listening to. It is called only when
  *	a new port starts listening. The operation is protected by the
  *	vxlan_net->sock_lock.
  *
  * void (*ndo_add_geneve_port)(struct net_device *dev,
- *			      sa_family_t sa_family, __be16 port);
+ *			       sa_family_t sa_family, __be16 port);
  *	Called by geneve to notify a driver about the UDP port and socket
  *	address family that geneve is listnening to. It is called only when
  *	a new port starts listening. The operation is protected by the
  *	geneve_net->sock_lock.
  *
  * void (*ndo_del_geneve_port)(struct net_device *dev,
- *			      sa_family_t sa_family, __be16 port);
+ *			       sa_family_t sa_family, __be16 port);
  *	Called by geneve to notify the driver about a UDP port and socket
  *	address family that geneve is not listening to anymore. The operation
  *	is protected by the geneve_net->sock_lock.
@@ -1072,9 +1072,9 @@ struct tc_to_netdev {
  *	Callback to use for xmit over the accelerated station. This
  *	is used in place of ndo_start_xmit on accelerated net
  *	devices.
- * netdev_features_t (*ndo_features_check) (struct sk_buff *skb,
- *					    struct net_device *dev
- *					    netdev_features_t features);
+ * netdev_features_t (*ndo_features_check)(struct sk_buff *skb,
+ *					   struct net_device *dev
+ *					   netdev_features_t features);
  *	Called by core transmit path to determine if device is capable of
  *	performing offload operations on a given packet. This is to give
  *	the device an opportunity to implement any restrictions that cannot
@@ -1088,7 +1088,7 @@ struct tc_to_netdev {
  * int (*ndo_get_iflink)(const struct net_device *dev);
  *	Called to get the iflink value of this device.
  * void (*ndo_change_proto_down)(struct net_device *dev,
- *				  bool proto_down);
+ *				 bool proto_down);
  *	This function is used to pass protocol port error state information
  *	to the switch driver. The switch driver can react to the proto_down
  *      by doing a phys down on the associated switch port.
@@ -1100,7 +1100,7 @@ struct tc_to_netdev {
  *	This function is used to specify the headroom that the skb must
  *	consider when allocation skb during packet reception. Setting
  *	appropriate rx headroom value allows avoiding skb head copy on
- *	forward. Setting a negative value reset the rx headroom to the
+ *	forward. Setting a negative value resets the rx headroom to the
  *	default value.
  *
  */
@@ -1296,7 +1296,7 @@ struct net_device_ops {
  *
  * These are the &struct net_device, they are only set internally
  * by drivers and used in the kernel. These flags are invisible to
- * userspace, this means that the order of these flags can change
+ * userspace; this means that the order of these flags can change
  * during any kernel release.
  *
  * You should have a pretty good reason to be extending these flags.
@@ -1414,10 +1414,10 @@ enum netdev_priv_flags {
  *
  *	@state:		Generic network queuing layer state, see netdev_state_t
  *	@dev_list:	The global list of network devices
- *	@napi_list:	List entry, that is used for polling napi devices
- *	@unreg_list:	List entry, that is used, when we are unregistering the
- *			device, see the function unregister_netdev
- *	@close_list:	List entry, that is used, when we are closing the device
+ *	@napi_list:	List entry used for polling NAPI devices
+ *	@unreg_list:	List entry  when we are unregistering the
+ *			device; see the function unregister_netdev
+ *	@close_list:	List entry used when we are closing the device
  *	@ptype_all:     Device-specific packet handlers for all protocols
  *	@ptype_specific: Device-specific, protocol-specific packet handlers
  *
@@ -1437,7 +1437,7 @@ enum netdev_priv_flags {
  *	@mpls_features:	Mask of features inheritable by MPLS
  *
  *	@ifindex:	interface index
- *	@group:		The group, that the device belongs to
+ *	@group:		The group the device belongs to
  *
  *	@stats:		Statistics struct, which was left as a legacy, use
  *			rtnl_link_stats64 instead
@@ -1491,7 +1491,7 @@ enum netdev_priv_flags {
  * 	@dev_port:		Used to differentiate devices that share
  * 				the same function
  *	@addr_list_lock:	XXX: need comments on this one
- *	@uc_promisc:		Counter, that indicates, that promiscuous mode
+ *	@uc_promisc:		Counter that indicates promiscuous mode
  *				has been enabled due to the need to listen to
  *				additional unicast addresses in a device that
  *				does not implement ndo_set_rx_mode()
@@ -1499,9 +1499,9 @@ enum netdev_priv_flags {
  *	@mc:			multicast mac addresses
  *	@dev_addrs:		list of device hw addresses
  *	@queues_kset:		Group of all Kobjects in the Tx and RX queues
- *	@promiscuity:		Number of times, the NIC is told to work in
- *				Promiscuous mode, if it becomes 0 the NIC will
- *				exit from working in Promiscuous mode
+ *	@promiscuity:		Number of times the NIC is told to work in
+ *				promiscuous mode; if it becomes 0 the NIC will
+ *				exit promiscuous mode
  *	@allmulti:		Counter, enables or disables allmulticast mode
  *
  *	@vlan_info:	VLAN info
@@ -1547,7 +1547,7 @@ enum netdev_priv_flags {
  *
  *	@trans_start:		Time (in jiffies) of last Tx
  *	@watchdog_timeo:	Represents the timeout that is used by
- *				the watchdog ( see dev_watchdog() )
+ *				the watchdog (see dev_watchdog())
  *	@watchdog_timer:	List of timers
  *
  *	@pcpu_refcnt:		Number of references to this device
@@ -1664,8 +1664,8 @@ struct net_device {
 	atomic_long_t		rx_nohandler;
 
 #ifdef CONFIG_WIRELESS_EXT
-	const struct iw_handler_def *	wireless_handlers;
-	struct iw_public_data *	wireless_data;
+	const struct iw_handler_def *wireless_handlers;
+	struct iw_public_data	*wireless_data;
 #endif
 	const struct net_device_ops *netdev_ops;
 	const struct ethtool_ops *ethtool_ops;
@@ -1718,7 +1718,7 @@ struct net_device {
 	unsigned int		allmulti;
 
 
-	/* Protocol specific pointers */
+	/* Protocol-specific pointers */
 
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
 	struct vlan_info __rcu	*vlan_info;
@@ -1748,13 +1748,11 @@ struct net_device {
 	/* Interface address info used in eth_type_trans() */
 	unsigned char		*dev_addr;
 
-
 #ifdef CONFIG_SYSFS
 	struct netdev_rx_queue	*_rx;
 
 	unsigned int		num_rx_queues;
 	unsigned int		real_num_rx_queues;
-
 #endif
 
 	unsigned long		gro_flush_timeout;
@@ -1846,7 +1844,7 @@ struct net_device {
 	struct garp_port __rcu	*garp_port;
 	struct mrp_port __rcu	*mrp_port;
 
-	struct device	dev;
+	struct device		dev;
 	const struct attribute_group *sysfs_groups[4];
 	const struct attribute_group *sysfs_rx_queue_group;
 
@@ -1861,9 +1859,9 @@ struct net_device {
 #ifdef CONFIG_DCB
 	const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
-	u8 num_tc;
-	struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
-	u8 prio_tc_map[TC_BITMASK + 1];
+	u8			num_tc;
+	struct netdev_tc_txq	tc_to_txq[TC_MAX_QUEUE];
+	u8			prio_tc_map[TC_BITMASK + 1];
 
 #if IS_ENABLED(CONFIG_FCOE)
 	unsigned int		fcoe_ddp_xid;
@@ -1871,9 +1869,9 @@ struct net_device {
 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 	struct netprio_map __rcu *priomap;
 #endif
-	struct phy_device *phydev;
-	struct lock_class_key *qdisc_tx_busylock;
-	bool proto_down;
+	struct phy_device	*phydev;
+	struct lock_class_key	*qdisc_tx_busylock;
+	bool			proto_down;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
@@ -2021,7 +2019,7 @@ static inline void *netdev_priv(const struct net_device *dev)
 
 /* Set the sysfs device type for the network logical device to allow
  * fine-grained identification of different network device types. For
- * example Ethernet, Wirelss LAN, Bluetooth, WiMAX etc.
+ * example Ethernet, Wireless LAN, Bluetooth, WiMAX etc.
  */
 #define SET_NETDEV_DEVTYPE(net, devtype)	((net)->dev.type = (devtype))
 
@@ -2031,22 +2029,22 @@ static inline void *netdev_priv(const struct net_device *dev)
 #define NAPI_POLL_WEIGHT 64
 
 /**
- *	netif_napi_add - initialize a napi context
+ *	netif_napi_add - initialize a NAPI context
  *	@dev:  network device
- *	@napi: napi context
+ *	@napi: NAPI context
  *	@poll: polling function
  *	@weight: default weight
  *
- * netif_napi_add() must be used to initialize a napi context prior to calling
- * *any* of the other napi related functions.
+ * netif_napi_add() must be used to initialize a NAPI context prior to calling
+ * *any* of the other NAPI-related functions.
  */
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 		    int (*poll)(struct napi_struct *, int), int weight);
 
 /**
- *	netif_tx_napi_add - initialize a napi context
+ *	netif_tx_napi_add - initialize a NAPI context
  *	@dev:  network device
- *	@napi: napi context
+ *	@napi: NAPI context
  *	@poll: polling function
  *	@weight: default weight
  *
@@ -2064,22 +2062,22 @@ static inline void netif_tx_napi_add(struct net_device *dev,
 }
 
 /**
- *  netif_napi_del - remove a napi context
- *  @napi: napi context
+ *  netif_napi_del - remove a NAPI context
+ *  @napi: NAPI context
  *
- *  netif_napi_del() removes a napi context from the network device napi list
+ *  netif_napi_del() removes a NAPI context from the network device NAPI list
  */
 void netif_napi_del(struct napi_struct *napi);
 
 struct napi_gro_cb {
 	/* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
-	void *frag0;
+	void	*frag0;
 
 	/* Length of frag0. */
 	unsigned int frag0_len;
 
 	/* This indicates where we are processing relative to skb->data. */
-	int data_offset;
+	int	data_offset;
 
 	/* This is non-zero if the packet cannot be merged with the new skb. */
 	u16	flush;
@@ -2175,7 +2173,7 @@ struct udp_offload {
 	struct udp_offload_callbacks callbacks;
 };
 
-/* often modified stats are per cpu, other are shared (netdev->stats) */
+/* often modified stats are per-CPU, other are shared (netdev->stats) */
 struct pcpu_sw_netstats {
 	u64     rx_packets;
 	u64     rx_bytes;
@@ -2272,7 +2270,7 @@ struct netdev_notifier_changeupper_info {
 	struct netdev_notifier_info info; /* must be first */
 	struct net_device *upper_dev; /* new upper dev */
 	bool master; /* is upper dev master */
-	bool linking; /* is the nofication for link or unlink */
+	bool linking; /* is the notification for link or unlink */
 	void *upper_info; /* upper dev info */
 };
 
@@ -2737,7 +2735,7 @@ extern int netdev_flow_limit_table_len;
 #endif /* CONFIG_NET_FLOW_LIMIT */
 
 /*
- * Incoming packets are placed on per-cpu queues
+ * Incoming packets are placed on per-CPU queues
  */
 struct softnet_data {
 	struct list_head	poll_list;
@@ -2907,7 +2905,7 @@ netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue)
  *	@dev_queue: pointer to transmit queue
  *
  * BQL enabled drivers might use this helper in their ndo_start_xmit(),
- * to give appropriate hint to the cpu.
+ * to give appropriate hint to the CPU.
  */
 static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_queue)
 {
@@ -2921,7 +2919,7 @@ static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_que
  *	@dev_queue: pointer to transmit queue
  *
  * BQL enabled drivers might use this helper in their TX completion path,
- * to give appropriate hint to the cpu.
+ * to give appropriate hint to the CPU.
  */
 static inline void netdev_txq_bql_complete_prefetchw(struct netdev_queue *dev_queue)
 {
@@ -3060,7 +3058,7 @@ static inline bool netif_running(const struct net_device *dev)
 }
 
 /*
- * Routines to manage the subqueues on a device.  We only need start
+ * Routines to manage the subqueues on a device.  We only need start,
  * stop, and a check if it's stopped.  All other device management is
  * done at the overall netdevice level.
  * Also test the device if we're multiqueue.
@@ -3344,7 +3342,6 @@ void netif_carrier_off(struct net_device *dev);
  * in a "pending" state, waiting for some external event.  For "on-
  * demand" interfaces, this new state identifies the situation where the
  * interface is waiting for events to place it in the up state.
- *
  */
 static inline void netif_dormant_on(struct net_device *dev)
 {
@@ -3679,7 +3676,7 @@ void dev_uc_init(struct net_device *dev);
  *
  *  Add newly added addresses to the interface, and release
  *  addresses that have been deleted.
- **/
+ */
 static inline int __dev_uc_sync(struct net_device *dev,
 				int (*sync)(struct net_device *,
 					    const unsigned char *),
@@ -3695,7 +3692,7 @@ static inline int __dev_uc_sync(struct net_device *dev,
  *  @unsync: function to call if address should be removed
  *
  *  Remove all addresses that were added to the device by dev_uc_sync().
- **/
+ */
 static inline void __dev_uc_unsync(struct net_device *dev,
 				   int (*unsync)(struct net_device *,
 						 const unsigned char *))
@@ -3723,7 +3720,7 @@ void dev_mc_init(struct net_device *dev);
  *
  *  Add newly added addresses to the interface, and release
  *  addresses that have been deleted.
- **/
+ */
 static inline int __dev_mc_sync(struct net_device *dev,
 				int (*sync)(struct net_device *,
 					    const unsigned char *),
@@ -3739,7 +3736,7 @@ static inline int __dev_mc_sync(struct net_device *dev,
  *  @unsync: function to call if address should be removed
  *
  *  Remove all addresses that were added to the device by dev_mc_sync().
- **/
+ */
 static inline void __dev_mc_unsync(struct net_device *dev,
 				   int (*unsync)(struct net_device *,
 						 const unsigned char *))
-- 
cgit v1.2.3


From 4cef191d05871fbc8bb3b812880e1997e855a3b9 Mon Sep 17 00:00:00 2001
From: Jaedon Shin <jaedon.shin@gmail.com>
Date: Fri, 25 Mar 2016 12:46:54 +0900
Subject: net: phy: bcm7xxx: Add entries for Broadcom BCM7346 and BCM7362

Add PHY entries for the Broadcom BCM7346 and BCM7362 chips, these are
40nm generation Ethernet PHY.

Fixes: 815717d1473e ("net: phy: bcm7xxx: Remove wildcard entries")
Signed-off-by: Jaedon Shin <jaedon.shin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm7xxx.c | 4 ++++
 include/linux/brcmphy.h   | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index b881a7b1e4f6..9636da0b6efc 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -339,6 +339,8 @@ static struct phy_driver bcm7xxx_driver[] = {
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7439_2, "Broadcom BCM7439 (2)"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"),
+	BCM7XXX_40NM_EPHY(PHY_ID_BCM7346, "Broadcom BCM7346"),
+	BCM7XXX_40NM_EPHY(PHY_ID_BCM7362, "Broadcom BCM7362"),
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7425, "Broadcom BCM7425"),
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7429, "Broadcom BCM7429"),
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7435, "Broadcom BCM7435"),
@@ -348,6 +350,8 @@ static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
 	{ PHY_ID_BCM7250, 0xfffffff0, },
 	{ PHY_ID_BCM7364, 0xfffffff0, },
 	{ PHY_ID_BCM7366, 0xfffffff0, },
+	{ PHY_ID_BCM7346, 0xfffffff0, },
+	{ PHY_ID_BCM7362, 0xfffffff0, },
 	{ PHY_ID_BCM7425, 0xfffffff0, },
 	{ PHY_ID_BCM7429, 0xfffffff0, },
 	{ PHY_ID_BCM7439, 0xfffffff0, },
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index f0ba9c2ec639..e3354b74286c 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -24,6 +24,8 @@
 #define PHY_ID_BCM7250			0xae025280
 #define PHY_ID_BCM7364			0xae025260
 #define PHY_ID_BCM7366			0x600d8490
+#define PHY_ID_BCM7346			0x600d8650
+#define PHY_ID_BCM7362			0x600d84b0
 #define PHY_ID_BCM7425			0x600d86b0
 #define PHY_ID_BCM7429			0x600d8730
 #define PHY_ID_BCM7435			0x600d8750
-- 
cgit v1.2.3


From 82dcabad750a36a2b749889bc89c5a3188775b2e Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 19 Jan 2016 16:19:06 +0100
Subject: libceph: revamp subs code, switch to SUBSCRIBE2 protocol

It is currently hard-coded in the mon_client that mdsmap and monmap
subs are continuous, while osdmap sub is always "onetime".  To better
handle full clusters/pools in the osd_client, we need to be able to
issue continuous osdmap subs.  Revamp subs code to allow us to specify
for each sub whether it should be continuous or not.

Although not strictly required for the above, switch to SUBSCRIBE2
protocol while at it, eliminating the ambiguity between a request for
"every map since X" and a request for "just the latest" when we don't
have a map yet (i.e. have epoch 0).  SUBSCRIBE2 feature bit is now
required - it's been supported since pre-argonaut (2010).

Move "got mdsmap" call to the end of ceph_mdsc_handle_map() - calling
in before we validate the epoch and successfully install the new map
can mess up mon_client sub state.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c               |   3 +-
 fs/ceph/super.c                    |   2 +-
 include/linux/ceph/ceph_features.h |   2 +
 include/linux/ceph/ceph_fs.h       |   4 +-
 include/linux/ceph/mon_client.h    |  28 +++--
 net/ceph/debugfs.c                 |  17 +--
 net/ceph/mon_client.c              | 210 +++++++++++++++++++++++--------------
 net/ceph/osd_client.c              |   3 +-
 8 files changed, 174 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 911d64d865f1..b43399d22e23 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3764,7 +3764,6 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 	dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
 
 	/* do we need it? */
-	ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch);
 	mutex_lock(&mdsc->mutex);
 	if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
 		dout("handle_map epoch %u <= our %u\n",
@@ -3791,6 +3790,8 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 	mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
 
 	__wake_requests(mdsc, &mdsc->waiting_for_map);
+	ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP,
+			  mdsc->mdsmap->m_epoch);
 
 	mutex_unlock(&mdsc->mutex);
 	schedule_delayed(mdsc);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index ca4d5e8457f1..c941fd1a8eb8 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -530,7 +530,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 		goto fail;
 	}
 	fsc->client->extra_mon_dispatch = extra_mon_dispatch;
-	fsc->client->monc.want_mdsmap = 1;
+	ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 0, true);
 
 	fsc->mount_options = fsopt;
 
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 15151f3c4120..ae2f66833762 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -105,6 +105,7 @@ static inline u64 ceph_sanitize_features(u64 features)
  */
 #define CEPH_FEATURES_SUPPORTED_DEFAULT		\
 	(CEPH_FEATURE_NOSRCADDR |		\
+	 CEPH_FEATURE_SUBSCRIBE2 |		\
 	 CEPH_FEATURE_RECONNECT_SEQ |		\
 	 CEPH_FEATURE_PGID64 |			\
 	 CEPH_FEATURE_PGPOOL3 |			\
@@ -127,6 +128,7 @@ static inline u64 ceph_sanitize_features(u64 features)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
 	(CEPH_FEATURE_NOSRCADDR |	 \
+	 CEPH_FEATURE_SUBSCRIBE2 |	 \
 	 CEPH_FEATURE_RECONNECT_SEQ |	 \
 	 CEPH_FEATURE_PGID64 |		 \
 	 CEPH_FEATURE_PGPOOL3 |		 \
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index d7d072a25c27..bf74005eedec 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -198,8 +198,8 @@ struct ceph_client_mount {
 #define CEPH_SUBSCRIBE_ONETIME    1  /* i want only 1 update after have */
 
 struct ceph_mon_subscribe_item {
-	__le64 have_version;    __le64 have;
-	__u8 onetime;
+	__le64 start;
+	__u8 flags;
 } __attribute__ ((packed));
 
 struct ceph_mon_subscribe_ack {
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index 81810dc21f06..8b2d2f0b659e 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -68,7 +68,8 @@ struct ceph_mon_client {
 
 	bool hunting;
 	int cur_mon;                       /* last monitor i contacted */
-	unsigned long sub_sent, sub_renew_after;
+	unsigned long sub_renew_after;
+	unsigned long sub_renew_sent;
 	struct ceph_connection con;
 
 	/* pending generic requests */
@@ -76,10 +77,12 @@ struct ceph_mon_client {
 	int num_generic_requests;
 	u64 last_tid;
 
-	/* mds/osd map */
-	int want_mdsmap;
-	int want_next_osdmap; /* 1 = want, 2 = want+asked */
-	u32 have_osdmap, have_mdsmap;
+	/* subs, indexed with CEPH_SUB_* */
+	struct {
+		struct ceph_mon_subscribe_item item;
+		bool want;
+		u32 have; /* epoch */
+	} subs[3];
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debugfs_file;
@@ -93,14 +96,23 @@ extern int ceph_monmap_contains(struct ceph_monmap *m,
 extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
 extern void ceph_monc_stop(struct ceph_mon_client *monc);
 
+enum {
+	CEPH_SUB_MDSMAP = 0,
+	CEPH_SUB_MONMAP,
+	CEPH_SUB_OSDMAP,
+};
+
+extern const char *ceph_sub_str[];
+
 /*
  * The model here is to indicate that we need a new map of at least
- * epoch @want, and also call in when we receive a map.  We will
+ * epoch @epoch, and also call in when we receive a map.  We will
  * periodically rerequest the map from the monitor cluster until we
  * get what we want.
  */
-extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have);
-extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
+bool ceph_monc_want_map(struct ceph_mon_client *monc, int sub, u32 epoch,
+			bool continuous);
+void ceph_monc_got_map(struct ceph_mon_client *monc, int sub, u32 epoch);
 
 extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
 extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 593dc2eabcc8..b902fbc7863e 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -112,15 +112,20 @@ static int monc_show(struct seq_file *s, void *p)
 	struct ceph_mon_generic_request *req;
 	struct ceph_mon_client *monc = &client->monc;
 	struct rb_node *rp;
+	int i;
 
 	mutex_lock(&monc->mutex);
 
-	if (monc->have_mdsmap)
-		seq_printf(s, "have mdsmap %u\n", (unsigned int)monc->have_mdsmap);
-	if (monc->have_osdmap)
-		seq_printf(s, "have osdmap %u\n", (unsigned int)monc->have_osdmap);
-	if (monc->want_next_osdmap)
-		seq_printf(s, "want next osdmap\n");
+	for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
+		seq_printf(s, "have %s %u", ceph_sub_str[i],
+			   monc->subs[i].have);
+		if (monc->subs[i].want)
+			seq_printf(s, " want %llu%s",
+				   le64_to_cpu(monc->subs[i].item.start),
+				   (monc->subs[i].item.flags &
+					CEPH_SUBSCRIBE_ONETIME ?  "" : "+"));
+		seq_putc(s, '\n');
+	}
 
 	for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
 		__u16 op;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index d6af6ca26e8d..89029916315c 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -140,9 +140,8 @@ static int __open_session(struct ceph_mon_client *monc)
 		monc->cur_mon = r % monc->monmap->num_mon;
 		dout("open_session num=%d r=%d -> mon%d\n",
 		     monc->monmap->num_mon, r, monc->cur_mon);
-		monc->sub_sent = 0;
 		monc->sub_renew_after = jiffies;  /* i.e., expired */
-		monc->want_next_osdmap = !!monc->want_next_osdmap;
+		monc->sub_renew_sent = 0;
 
 		dout("open_session mon%d opening\n", monc->cur_mon);
 		ceph_con_open(&monc->con,
@@ -189,59 +188,58 @@ static void __schedule_delayed(struct ceph_mon_client *monc)
 			      round_jiffies_relative(delay));
 }
 
+const char *ceph_sub_str[] = {
+	[CEPH_SUB_MDSMAP] = "mdsmap",
+	[CEPH_SUB_MONMAP] = "monmap",
+	[CEPH_SUB_OSDMAP] = "osdmap",
+};
+
 /*
- * Send subscribe request for mdsmap and/or osdmap.
+ * Send subscribe request for one or more maps, according to
+ * monc->subs.
  */
 static void __send_subscribe(struct ceph_mon_client *monc)
 {
-	dout("__send_subscribe sub_sent=%u exp=%u want_osd=%d\n",
-	     (unsigned int)monc->sub_sent, __sub_expired(monc),
-	     monc->want_next_osdmap);
-	if ((__sub_expired(monc) && !monc->sub_sent) ||
-	    monc->want_next_osdmap == 1) {
-		struct ceph_msg *msg = monc->m_subscribe;
-		struct ceph_mon_subscribe_item *i;
-		void *p, *end;
-		int num;
-
-		p = msg->front.iov_base;
-		end = p + msg->front_alloc_len;
-
-		num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap;
-		ceph_encode_32(&p, num);
-
-		if (monc->want_next_osdmap) {
-			dout("__send_subscribe to 'osdmap' %u\n",
-			     (unsigned int)monc->have_osdmap);
-			ceph_encode_string(&p, end, "osdmap", 6);
-			i = p;
-			i->have = cpu_to_le64(monc->have_osdmap);
-			i->onetime = 1;
-			p += sizeof(*i);
-			monc->want_next_osdmap = 2;  /* requested */
-		}
-		if (monc->want_mdsmap) {
-			dout("__send_subscribe to 'mdsmap' %u+\n",
-			     (unsigned int)monc->have_mdsmap);
-			ceph_encode_string(&p, end, "mdsmap", 6);
-			i = p;
-			i->have = cpu_to_le64(monc->have_mdsmap);
-			i->onetime = 0;
-			p += sizeof(*i);
-		}
-		ceph_encode_string(&p, end, "monmap", 6);
-		i = p;
-		i->have = 0;
-		i->onetime = 0;
-		p += sizeof(*i);
-
-		msg->front.iov_len = p - msg->front.iov_base;
-		msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
-		ceph_msg_revoke(msg);
-		ceph_con_send(&monc->con, ceph_msg_get(msg));
-
-		monc->sub_sent = jiffies | 1;  /* never 0 */
+	struct ceph_msg *msg = monc->m_subscribe;
+	void *p = msg->front.iov_base;
+	void *const end = p + msg->front_alloc_len;
+	int num = 0;
+	int i;
+
+	dout("%s sent %lu\n", __func__, monc->sub_renew_sent);
+
+	BUG_ON(monc->cur_mon < 0);
+
+	if (!monc->sub_renew_sent)
+		monc->sub_renew_sent = jiffies | 1; /* never 0 */
+
+	msg->hdr.version = cpu_to_le16(2);
+
+	for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
+		if (monc->subs[i].want)
+			num++;
 	}
+	BUG_ON(num < 1); /* monmap sub is always there */
+	ceph_encode_32(&p, num);
+	for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
+		const char *s = ceph_sub_str[i];
+
+		if (!monc->subs[i].want)
+			continue;
+
+		dout("%s %s start %llu flags 0x%x\n", __func__, s,
+		     le64_to_cpu(monc->subs[i].item.start),
+		     monc->subs[i].item.flags);
+		ceph_encode_string(&p, end, s, strlen(s));
+		memcpy(p, &monc->subs[i].item, sizeof(monc->subs[i].item));
+		p += sizeof(monc->subs[i].item);
+	}
+
+	BUG_ON(p != (end - 35 - (ARRAY_SIZE(monc->subs) - num) * 19));
+	msg->front.iov_len = p - msg->front.iov_base;
+	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+	ceph_msg_revoke(msg);
+	ceph_con_send(&monc->con, ceph_msg_get(msg));
 }
 
 static void handle_subscribe_ack(struct ceph_mon_client *monc,
@@ -255,9 +253,16 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
 	seconds = le32_to_cpu(h->duration);
 
 	mutex_lock(&monc->mutex);
-	dout("handle_subscribe_ack after %d seconds\n", seconds);
-	monc->sub_renew_after = monc->sub_sent + (seconds >> 1)*HZ - 1;
-	monc->sub_sent = 0;
+	if (monc->sub_renew_sent) {
+		monc->sub_renew_after = monc->sub_renew_sent +
+					    (seconds >> 1) * HZ - 1;
+		dout("%s sent %lu duration %d renew after %lu\n", __func__,
+		     monc->sub_renew_sent, seconds, monc->sub_renew_after);
+		monc->sub_renew_sent = 0;
+	} else {
+		dout("%s sent %lu renew after %lu, ignoring\n", __func__,
+		     monc->sub_renew_sent, monc->sub_renew_after);
+	}
 	mutex_unlock(&monc->mutex);
 	return;
 bad:
@@ -266,36 +271,82 @@ bad:
 }
 
 /*
- * Keep track of which maps we have
+ * Register interest in a map
+ *
+ * @sub: one of CEPH_SUB_*
+ * @epoch: X for "every map since X", or 0 for "just the latest"
  */
-int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got)
+static bool __ceph_monc_want_map(struct ceph_mon_client *monc, int sub,
+				 u32 epoch, bool continuous)
+{
+	__le64 start = cpu_to_le64(epoch);
+	u8 flags = !continuous ? CEPH_SUBSCRIBE_ONETIME : 0;
+
+	dout("%s %s epoch %u continuous %d\n", __func__, ceph_sub_str[sub],
+	     epoch, continuous);
+
+	if (monc->subs[sub].want &&
+	    monc->subs[sub].item.start == start &&
+	    monc->subs[sub].item.flags == flags)
+		return false;
+
+	monc->subs[sub].item.start = start;
+	monc->subs[sub].item.flags = flags;
+	monc->subs[sub].want = true;
+
+	return true;
+}
+
+bool ceph_monc_want_map(struct ceph_mon_client *monc, int sub, u32 epoch,
+			bool continuous)
 {
+	bool need_request;
+
 	mutex_lock(&monc->mutex);
-	monc->have_mdsmap = got;
+	need_request = __ceph_monc_want_map(monc, sub, epoch, continuous);
 	mutex_unlock(&monc->mutex);
-	return 0;
+
+	return need_request;
 }
-EXPORT_SYMBOL(ceph_monc_got_mdsmap);
+EXPORT_SYMBOL(ceph_monc_want_map);
 
-int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got)
+/*
+ * Keep track of which maps we have
+ *
+ * @sub: one of CEPH_SUB_*
+ */
+static void __ceph_monc_got_map(struct ceph_mon_client *monc, int sub,
+				u32 epoch)
+{
+	dout("%s %s epoch %u\n", __func__, ceph_sub_str[sub], epoch);
+
+	if (monc->subs[sub].want) {
+		if (monc->subs[sub].item.flags & CEPH_SUBSCRIBE_ONETIME)
+			monc->subs[sub].want = false;
+		else
+			monc->subs[sub].item.start = cpu_to_le64(epoch + 1);
+	}
+
+	monc->subs[sub].have = epoch;
+}
+
+void ceph_monc_got_map(struct ceph_mon_client *monc, int sub, u32 epoch)
 {
 	mutex_lock(&monc->mutex);
-	monc->have_osdmap = got;
-	monc->want_next_osdmap = 0;
+	__ceph_monc_got_map(monc, sub, epoch);
 	mutex_unlock(&monc->mutex);
-	return 0;
 }
+EXPORT_SYMBOL(ceph_monc_got_map);
 
 /*
  * Register interest in the next osdmap
  */
 void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
 {
-	dout("request_next_osdmap have %u\n", monc->have_osdmap);
+	dout("%s have %u\n", __func__, monc->subs[CEPH_SUB_OSDMAP].have);
 	mutex_lock(&monc->mutex);
-	if (!monc->want_next_osdmap)
-		monc->want_next_osdmap = 1;
-	if (monc->want_next_osdmap < 2)
+	if (__ceph_monc_want_map(monc, CEPH_SUB_OSDMAP,
+				 monc->subs[CEPH_SUB_OSDMAP].have + 1, false))
 		__send_subscribe(monc);
 	mutex_unlock(&monc->mutex);
 }
@@ -314,15 +365,15 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
 	long ret;
 
 	mutex_lock(&monc->mutex);
-	while (monc->have_osdmap < epoch) {
+	while (monc->subs[CEPH_SUB_OSDMAP].have < epoch) {
 		mutex_unlock(&monc->mutex);
 
 		if (timeout && time_after_eq(jiffies, started + timeout))
 			return -ETIMEDOUT;
 
 		ret = wait_event_interruptible_timeout(monc->client->auth_wq,
-						monc->have_osdmap >= epoch,
-						ceph_timeout_jiffies(timeout));
+				     monc->subs[CEPH_SUB_OSDMAP].have >= epoch,
+				     ceph_timeout_jiffies(timeout));
 		if (ret < 0)
 			return ret;
 
@@ -335,11 +386,14 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
 EXPORT_SYMBOL(ceph_monc_wait_osdmap);
 
 /*
- *
+ * Open a session with a random monitor.  Request monmap and osdmap,
+ * which are waited upon in __ceph_open_session().
  */
 int ceph_monc_open_session(struct ceph_mon_client *monc)
 {
 	mutex_lock(&monc->mutex);
+	__ceph_monc_want_map(monc, CEPH_SUB_MONMAP, 0, true);
+	__ceph_monc_want_map(monc, CEPH_SUB_OSDMAP, 0, false);
 	__open_session(monc);
 	__schedule_delayed(monc);
 	mutex_unlock(&monc->mutex);
@@ -375,6 +429,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
 	client->monc.monmap = monmap;
 	kfree(old);
 
+	__ceph_monc_got_map(monc, CEPH_SUB_MONMAP, monc->monmap->epoch);
 	client->have_fsid = true;
 
 out:
@@ -725,8 +780,14 @@ static void delayed_work(struct work_struct *work)
 			__validate_auth(monc);
 		}
 
-		if (is_auth)
-			__send_subscribe(monc);
+		if (is_auth) {
+			unsigned long now = jiffies;
+
+			dout("%s renew subs? now %lu renew after %lu\n",
+			     __func__, now, monc->sub_renew_after);
+			if (time_after_eq(now, monc->sub_renew_after))
+				__send_subscribe(monc);
+		}
 	}
 	__schedule_delayed(monc);
 	mutex_unlock(&monc->mutex);
@@ -815,16 +876,13 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
 	monc->cur_mon = -1;
 	monc->hunting = true;
 	monc->sub_renew_after = jiffies;
-	monc->sub_sent = 0;
+	monc->sub_renew_sent = 0;
 
 	INIT_DELAYED_WORK(&monc->delayed_work, delayed_work);
 	monc->generic_request_tree = RB_ROOT;
 	monc->num_generic_requests = 0;
 	monc->last_tid = 0;
 
-	monc->have_mdsmap = 0;
-	monc->have_osdmap = 0;
-	monc->want_next_osdmap = 1;
 	return 0;
 
 out_auth_reply:
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 5bc053778fed..3309112e23d0 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2187,7 +2187,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 		goto bad;
 done:
 	downgrade_write(&osdc->map_sem);
-	ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
+	ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP,
+			  osdc->osdmap->epoch);
 
 	/*
 	 * subscribe to subsequent osdmap updates if full to ensure
-- 
cgit v1.2.3


From 58d81b1294f02262a141687cd62529c1ec8e6484 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 21 Jan 2016 16:33:15 +0100
Subject: libceph: monc ping rate is 10s

Split ping interval and ping timeout: ping interval is 10s; keepalive
timeout is 30s.

Make monc_ping_timeout a constant while at it - it's not actually
exported as a mount option (and the rest of tick-related settings won't
be either), so it's got no place in ceph_options.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h | 5 +++--
 net/ceph/ceph_common.c       | 1 -
 net/ceph/mon_client.c        | 8 ++------
 3 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 3e3799cdc6e6..f5466273b9a3 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -47,7 +47,6 @@ struct ceph_options {
 	unsigned long mount_timeout;		/* jiffies */
 	unsigned long osd_idle_ttl;		/* jiffies */
 	unsigned long osd_keepalive_timeout;	/* jiffies */
-	unsigned long monc_ping_timeout;	/* jiffies */
 
 	/*
 	 * any type that can't be simply compared or doesn't need need
@@ -68,7 +67,9 @@ struct ceph_options {
 #define CEPH_MOUNT_TIMEOUT_DEFAULT	msecs_to_jiffies(60 * 1000)
 #define CEPH_OSD_KEEPALIVE_DEFAULT	msecs_to_jiffies(5 * 1000)
 #define CEPH_OSD_IDLE_TTL_DEFAULT	msecs_to_jiffies(60 * 1000)
-#define CEPH_MONC_PING_TIMEOUT_DEFAULT	msecs_to_jiffies(30 * 1000)
+
+#define CEPH_MONC_PING_INTERVAL		msecs_to_jiffies(10 * 1000)
+#define CEPH_MONC_PING_TIMEOUT		msecs_to_jiffies(30 * 1000)
 
 #define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
 #define CEPH_MSG_MAX_MIDDLE_LEN	(16*1024*1024)
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 389dbabba17b..dcc18c6f7cf9 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -361,7 +361,6 @@ ceph_parse_options(char *options, const char *dev_name,
 	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
 	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
 	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
-	opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT;
 
 	/* get mon ip(s) */
 	/* ip1[:port1][,ip2[:port2]...] */
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index accfded53bae..23a270c49baf 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -202,15 +202,12 @@ static bool __sub_expired(struct ceph_mon_client *monc)
  */
 static void __schedule_delayed(struct ceph_mon_client *monc)
 {
-	struct ceph_options *opt = monc->client->options;
 	unsigned long delay;
 
 	if (monc->cur_mon < 0 || __sub_expired(monc)) {
 		delay = 10 * HZ;
 	} else {
-		delay = 20 * HZ;
-		if (opt->monc_ping_timeout > 0)
-			delay = min(delay, opt->monc_ping_timeout / 3);
+		delay = CEPH_MONC_PING_INTERVAL;
 	}
 	dout("__schedule_delayed after %lu\n", delay);
 	schedule_delayed_work(&monc->delayed_work,
@@ -793,10 +790,9 @@ static void delayed_work(struct work_struct *work)
 		__close_session(monc);
 		__open_session(monc);  /* continue hunting */
 	} else {
-		struct ceph_options *opt = monc->client->options;
 		int is_auth = ceph_auth_is_authenticated(monc->auth);
 		if (ceph_con_keepalive_expired(&monc->con,
-					       opt->monc_ping_timeout)) {
+					       CEPH_MONC_PING_TIMEOUT)) {
 			dout("monc keepalive timeout\n");
 			is_auth = 0;
 			__close_session(monc);
-- 
cgit v1.2.3


From 168b9090c739c4b5556023a3f08789b349ca7339 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 21 Jan 2016 16:33:19 +0100
Subject: libceph: monc hunt rate is 3s with backoff up to 30s

Unless we are in the process of setting up a client (i.e. connecting to
the monitor cluster for the first time), apply a backoff: every time we
want to reopen a session, increase our timeout by a multiple (currently
2); when we complete the connection, reduce that multipler by 50%.

Mirrors ceph.git commit 794c86fd289bd62a35ed14368fa096c46736e9a2.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h    |  3 +++
 include/linux/ceph/mon_client.h |  3 +++
 net/ceph/mon_client.c           | 25 ++++++++++++++++---------
 3 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index f5466273b9a3..e7975e4681e1 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -68,8 +68,11 @@ struct ceph_options {
 #define CEPH_OSD_KEEPALIVE_DEFAULT	msecs_to_jiffies(5 * 1000)
 #define CEPH_OSD_IDLE_TTL_DEFAULT	msecs_to_jiffies(60 * 1000)
 
+#define CEPH_MONC_HUNT_INTERVAL		msecs_to_jiffies(3 * 1000)
 #define CEPH_MONC_PING_INTERVAL		msecs_to_jiffies(10 * 1000)
 #define CEPH_MONC_PING_TIMEOUT		msecs_to_jiffies(30 * 1000)
+#define CEPH_MONC_HUNT_BACKOFF		2
+#define CEPH_MONC_HUNT_MAX_MULT		10
 
 #define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
 #define CEPH_MSG_MAX_MIDDLE_LEN	(16*1024*1024)
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index 8b2d2f0b659e..e230e7ed60d3 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -72,6 +72,9 @@ struct ceph_mon_client {
 	unsigned long sub_renew_sent;
 	struct ceph_connection con;
 
+	bool had_a_connection;
+	int hunt_mult; /* [1..CEPH_MONC_HUNT_MAX_MULT] */
+
 	/* pending generic requests */
 	struct rb_root generic_request_tree;
 	int num_generic_requests;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 23a270c49baf..fd1cf408fd89 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -171,6 +171,12 @@ static void __open_session(struct ceph_mon_client *monc)
 
 	pick_new_mon(monc);
 
+	if (monc->had_a_connection) {
+		monc->hunt_mult *= CEPH_MONC_HUNT_BACKOFF;
+		if (monc->hunt_mult > CEPH_MONC_HUNT_MAX_MULT)
+			monc->hunt_mult = CEPH_MONC_HUNT_MAX_MULT;
+	}
+
 	monc->sub_renew_after = jiffies; /* i.e., expired */
 	monc->sub_renew_sent = 0;
 
@@ -192,11 +198,6 @@ static void __open_session(struct ceph_mon_client *monc)
 	__send_prepared_auth_request(monc, ret);
 }
 
-static bool __sub_expired(struct ceph_mon_client *monc)
-{
-	return time_after_eq(jiffies, monc->sub_renew_after);
-}
-
 /*
  * Reschedule delayed work timer.
  */
@@ -204,11 +205,11 @@ static void __schedule_delayed(struct ceph_mon_client *monc)
 {
 	unsigned long delay;
 
-	if (monc->cur_mon < 0 || __sub_expired(monc)) {
-		delay = 10 * HZ;
-	} else {
+	if (monc->hunting)
+		delay = CEPH_MONC_HUNT_INTERVAL * monc->hunt_mult;
+	else
 		delay = CEPH_MONC_PING_INTERVAL;
-	}
+
 	dout("__schedule_delayed after %lu\n", delay);
 	schedule_delayed_work(&monc->delayed_work,
 			      round_jiffies_relative(delay));
@@ -902,6 +903,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
 	monc->hunting = true;
 	monc->sub_renew_after = jiffies;
 	monc->sub_renew_sent = 0;
+	monc->had_a_connection = false;
+	monc->hunt_mult = 1;
 
 	INIT_DELAYED_WORK(&monc->delayed_work, delayed_work);
 	monc->generic_request_tree = RB_ROOT;
@@ -959,6 +962,10 @@ static void finish_hunting(struct ceph_mon_client *monc)
 	if (monc->hunting) {
 		dout("%s found mon%d\n", __func__, monc->cur_mon);
 		monc->hunting = false;
+		monc->had_a_connection = true;
+		monc->hunt_mult /= 2; /* reduce by 50% */
+		if (monc->hunt_mult < 1)
+			monc->hunt_mult = 1;
 	}
 }
 
-- 
cgit v1.2.3


From de2aa102ea464a54dba14b9588e0bc188bd94707 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 8 Feb 2016 13:39:46 +0100
Subject: libceph: rename ceph_osd_req_op::payload_len to indata_len

Follow userspace nomenclature on this - the next commit adds
outdata_len.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h |  2 +-
 net/ceph/osd_client.c           | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 7506b485bb6d..35c8b006916f 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -77,7 +77,7 @@ struct ceph_osd_data {
 struct ceph_osd_req_op {
 	u16 op;           /* CEPH_OSD_OP_* */
 	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
-	u32 payload_len;
+	u32 indata_len;   /* request */
 	union {
 		struct ceph_osd_data raw_data_in;
 		struct {
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 3309112e23d0..84075539135b 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -498,7 +498,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
 	if (opcode == CEPH_OSD_OP_WRITE || opcode == CEPH_OSD_OP_WRITEFULL)
 		payload_len += length;
 
-	op->payload_len = payload_len;
+	op->indata_len = payload_len;
 }
 EXPORT_SYMBOL(osd_req_op_extent_init);
 
@@ -517,7 +517,7 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
 	BUG_ON(length > previous);
 
 	op->extent.length = length;
-	op->payload_len -= previous - length;
+	op->indata_len -= previous - length;
 }
 EXPORT_SYMBOL(osd_req_op_extent_update);
 
@@ -554,7 +554,7 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
 
 	op->cls.argc = 0;	/* currently unused */
 
-	op->payload_len = payload_len;
+	op->indata_len = payload_len;
 }
 EXPORT_SYMBOL(osd_req_op_cls_init);
 
@@ -587,7 +587,7 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
 	op->xattr.cmp_mode = cmp_mode;
 
 	ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
-	op->payload_len = payload_len;
+	op->indata_len = payload_len;
 	return 0;
 }
 EXPORT_SYMBOL(osd_req_op_xattr_init);
@@ -707,7 +707,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
 			BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
 			dst->cls.indata_len = cpu_to_le32(data_length);
 			ceph_osdc_msg_data_add(req->r_request, osd_data);
-			src->payload_len += data_length;
+			src->indata_len += data_length;
 			request_data_len += data_length;
 		}
 		osd_data = &src->cls.response_data;
@@ -750,7 +750,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
 
 	dst->op = cpu_to_le16(src->op);
 	dst->flags = cpu_to_le32(src->flags);
-	dst->payload_len = cpu_to_le32(src->payload_len);
+	dst->payload_len = cpu_to_le32(src->indata_len);
 
 	return request_data_len;
 }
-- 
cgit v1.2.3


From 7665d85b7307fa0218881bc2009de067c42dc52e Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Thu, 7 Jan 2016 16:48:57 +0800
Subject: libceph: move r_reply_op_{len,result} into struct ceph_osd_req_op

This avoids defining large array of r_reply_op_{len,result} in
in struct ceph_osd_request.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c             | 2 +-
 include/linux/ceph/osd_client.h | 5 +++--
 net/ceph/osd_client.c           | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4a876785b68c..94f31bde73e8 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1854,7 +1854,7 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
 	 * passed to the block layer, which just supports a 32-bit
 	 * length field.
 	 */
-	obj_request->xferred = osd_req->r_reply_op_len[0];
+	obj_request->xferred = osd_req->r_ops[0].outdata_len;
 	rbd_assert(obj_request->xferred < (u64)UINT_MAX);
 
 	opcode = osd_req->r_ops[0].op;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 35c8b006916f..c6d1d603bacf 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -78,6 +78,9 @@ struct ceph_osd_req_op {
 	u16 op;           /* CEPH_OSD_OP_* */
 	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
 	u32 indata_len;   /* request */
+	u32 outdata_len;  /* reply */
+	s32 rval;
+
 	union {
 		struct ceph_osd_data raw_data_in;
 		struct {
@@ -148,8 +151,6 @@ struct ceph_osd_request {
 	struct ceph_eversion *r_request_reassert_version;
 
 	int               r_result;
-	int               r_reply_op_len[CEPH_OSD_MAX_OP];
-	s32               r_reply_op_result[CEPH_OSD_MAX_OP];
 	int               r_got_reply;
 	int		  r_linger;
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 84075539135b..1048edb44343 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1821,7 +1821,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 		int len;
 
 		len = le32_to_cpu(op->payload_len);
-		req->r_reply_op_len[i] = len;
+		req->r_ops[i].outdata_len = len;
 		dout(" op %d has %d bytes\n", i, len);
 		payload_len += len;
 		p += sizeof(*op);
@@ -1836,7 +1836,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 	ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
 	retry_attempt = ceph_decode_32(&p);
 	for (i = 0; i < numops; i++)
-		req->r_reply_op_result[i] = ceph_decode_32(&p);
+		req->r_ops[i].rval = ceph_decode_32(&p);
 
 	if (le16_to_cpu(msg->hdr.version) >= 6) {
 		p += 8 + 4; /* skip replay_version */
-- 
cgit v1.2.3


From 3f1af42ad0fad8a12242233dd0d9fc42f5e83415 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 9 Feb 2016 17:50:15 +0100
Subject: libceph: enable large, variable-sized OSD requests

Turn r_ops into a flexible array member to enable large, consisting of
up to 16 ops, OSD requests.  The use case is scattered writeback in
cephfs and, as far as the kernel client is concerned, 16 is just a made
up number.

r_ops had size 3 for copyup+hint+write, but copyup is really a special
case - it can only happen once.  ceph_osd_request_cache is therefore
stuffed with num_ops=2 requests, anything bigger than that is allocated
with kmalloc().  req_mempool is backed by ceph_osd_request_cache, which
means either num_ops=1 or num_ops=2 for use_mempool=true - all existing
users (ceph_writepages_start(), ceph_osdc_writepages()) are fine with
that.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c             |  2 --
 include/linux/ceph/osd_client.h |  6 ++++--
 net/ceph/osd_client.c           | 43 +++++++++++++++++++++++++++--------------
 3 files changed, 32 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 94f31bde73e8..08018710f363 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1847,8 +1847,6 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
 	if (osd_req->r_result < 0)
 		obj_request->result = osd_req->r_result;
 
-	rbd_assert(osd_req->r_num_ops <= CEPH_OSD_MAX_OP);
-
 	/*
 	 * We support a 64-bit length, but ultimately it has to be
 	 * passed to the block layer, which just supports a 32-bit
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index c6d1d603bacf..aada6a1383a4 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -43,7 +43,8 @@ struct ceph_osd {
 };
 
 
-#define CEPH_OSD_MAX_OP	3
+#define CEPH_OSD_SLAB_OPS	2
+#define CEPH_OSD_MAX_OPS	16
 
 enum ceph_osd_data_type {
 	CEPH_OSD_DATA_TYPE_NONE = 0,
@@ -139,7 +140,6 @@ struct ceph_osd_request {
 
 	/* request osd ops array  */
 	unsigned int		r_num_ops;
-	struct ceph_osd_req_op	r_ops[CEPH_OSD_MAX_OP];
 
 	/* these are updated on each send */
 	__le32           *r_request_osdmap_epoch;
@@ -175,6 +175,8 @@ struct ceph_osd_request {
 	unsigned long     r_stamp;            /* send OR check time */
 
 	struct ceph_snap_context *r_snapc;    /* snap context for writes */
+
+	struct ceph_osd_req_op r_ops[];
 };
 
 struct ceph_request_redirect {
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index f93d0e893f3f..ccd4e031fa3f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -338,9 +338,10 @@ static void ceph_osdc_release_request(struct kref *kref)
 	ceph_put_snap_context(req->r_snapc);
 	if (req->r_mempool)
 		mempool_free(req, req->r_osdc->req_mempool);
-	else
+	else if (req->r_num_ops <= CEPH_OSD_SLAB_OPS)
 		kmem_cache_free(ceph_osd_request_cache, req);
-
+	else
+		kfree(req);
 }
 
 void ceph_osdc_get_request(struct ceph_osd_request *req)
@@ -369,18 +370,22 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	struct ceph_msg *msg;
 	size_t msg_size;
 
-	BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
-	BUG_ON(num_ops > CEPH_OSD_MAX_OP);
-
 	if (use_mempool) {
+		BUG_ON(num_ops > CEPH_OSD_SLAB_OPS);
 		req = mempool_alloc(osdc->req_mempool, gfp_flags);
-		memset(req, 0, sizeof(*req));
+	} else if (num_ops <= CEPH_OSD_SLAB_OPS) {
+		req = kmem_cache_alloc(ceph_osd_request_cache, gfp_flags);
 	} else {
-		req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags);
+		BUG_ON(num_ops > CEPH_OSD_MAX_OPS);
+		req = kmalloc(sizeof(*req) + num_ops * sizeof(req->r_ops[0]),
+			      gfp_flags);
 	}
-	if (req == NULL)
+	if (unlikely(!req))
 		return NULL;
 
+	/* req only, each op is zeroed in _osd_req_op_init() */
+	memset(req, 0, sizeof(*req));
+
 	req->r_osdc = osdc;
 	req->r_mempool = use_mempool;
 	req->r_num_ops = num_ops;
@@ -398,12 +403,19 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	req->r_base_oloc.pool = -1;
 	req->r_target_oloc.pool = -1;
 
+	msg_size = OSD_OPREPLY_FRONT_LEN;
+	if (num_ops > CEPH_OSD_SLAB_OPS) {
+		/* ceph_osd_op and rval */
+		msg_size += (num_ops - CEPH_OSD_SLAB_OPS) *
+			    (sizeof(struct ceph_osd_op) + 4);
+	}
+
 	/* create reply message */
 	if (use_mempool)
 		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
 	else
-		msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY,
-				   OSD_OPREPLY_FRONT_LEN, gfp_flags, true);
+		msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size,
+				   gfp_flags, true);
 	if (!msg) {
 		ceph_osdc_put_request(req);
 		return NULL;
@@ -1811,7 +1823,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 
 	ceph_decode_need(&p, end, 4, bad_put);
 	numops = ceph_decode_32(&p);
-	if (numops > CEPH_OSD_MAX_OP)
+	if (numops > CEPH_OSD_MAX_OPS)
 		goto bad_put;
 	if (numops != req->r_num_ops)
 		goto bad_put;
@@ -2784,11 +2796,12 @@ EXPORT_SYMBOL(ceph_osdc_writepages);
 
 int ceph_osdc_setup(void)
 {
+	size_t size = sizeof(struct ceph_osd_request) +
+	    CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op);
+
 	BUG_ON(ceph_osd_request_cache);
-	ceph_osd_request_cache = kmem_cache_create("ceph_osd_request",
-					sizeof (struct ceph_osd_request),
-					__alignof__(struct ceph_osd_request),
-					0, NULL);
+	ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", size,
+						   0, 0, NULL);
 
 	return ceph_osd_request_cache ? 0 : -ENOMEM;
 }
-- 
cgit v1.2.3


From 2c63f49a724a10bb71cc0fd34f8e5acce78525d5 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Thu, 7 Jan 2016 17:32:54 +0800
Subject: libceph: add helper that duplicates last extent operation

This helper duplicates last extent operation in OSD request, then
adjusts the new extent operation's offset and length. The helper
is for scatterd page writeback, which adds nonconsecutive dirty
pages to single OSD request.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h |  2 ++
 net/ceph/osd_client.c           | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index aada6a1383a4..4343df806710 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -266,6 +266,8 @@ extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
 					u64 truncate_size, u32 truncate_seq);
 extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
 					unsigned int which, u64 length);
+extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
+				       unsigned int which, u64 offset_inc);
 
 extern struct ceph_osd_data *osd_req_op_extent_osd_data(
 					struct ceph_osd_request *osd_req,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ccd4e031fa3f..32355d9d0103 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -534,6 +534,28 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
 }
 EXPORT_SYMBOL(osd_req_op_extent_update);
 
+void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
+				unsigned int which, u64 offset_inc)
+{
+	struct ceph_osd_req_op *op, *prev_op;
+
+	BUG_ON(which + 1 >= osd_req->r_num_ops);
+
+	prev_op = &osd_req->r_ops[which];
+	op = _osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags);
+	/* dup previous one */
+	op->indata_len = prev_op->indata_len;
+	op->outdata_len = prev_op->outdata_len;
+	op->extent = prev_op->extent;
+	/* adjust offset */
+	op->extent.offset += offset_inc;
+	op->extent.length -= offset_inc;
+
+	if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL)
+		op->indata_len -= offset_inc;
+}
+EXPORT_SYMBOL(osd_req_op_extent_dup_last);
+
 void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
 			u16 opcode, const char *class, const char *method)
 {
-- 
cgit v1.2.3


From 315f24088048a51eed341c53be66ea477a3c7d16 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Mon, 7 Mar 2016 10:34:50 +0800
Subject: ceph: fix security xattr deadlock

When security is enabled, security module can call filesystem's
getxattr/setxattr callbacks during d_instantiate(). For cephfs,
d_instantiate() is usually called by MDS' dispatch thread, while
handling MDS reply. If the MDS reply does not include xattrs and
corresponding caps, getxattr/setxattr need to send a new request
to MDS and waits for the reply. This makes MDS' dispatch sleep,
nobody handles later MDS replies.

The fix is make sure lookup/atomic_open reply include xattrs and
corresponding caps. So getxattr can be handled by cached xattrs.
This requires some modification to both MDS and request message.
(Client tells MDS what caps it wants; MDS encodes proper caps in
the reply)

Smack security module may call setxattr during d_instantiate().
Unlike getxattr, we can't force MDS to issue CEPH_CAP_XATTR_EXCL
to us. So just make setxattr return error when called by MDS'
dispatch thread.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/dir.c                |  9 ++++--
 fs/ceph/export.c             | 13 +++++++++
 fs/ceph/file.c               |  7 +++++
 fs/ceph/inode.c              | 18 +++++++++---
 fs/ceph/mds_client.c         |  2 ++
 fs/ceph/super.h              | 16 ++++++++++-
 fs/ceph/xattr.c              | 68 ++++++++++++++++++++++++++++++++++++++++++--
 include/linux/ceph/ceph_fs.h |  3 +-
 8 files changed, 125 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index fd11fb231a2e..b9f50a388aee 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -624,6 +624,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int op;
+	int mask;
 	int err;
 
 	dout("lookup %p dentry %p '%pd'\n",
@@ -666,8 +667,12 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 		return ERR_CAST(req);
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
-	/* we only need inode linkage */
-	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
+
+	mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
+	if (ceph_security_xattr_wanted(dir))
+		mask |= CEPH_CAP_XATTR_SHARED;
+	req->r_args.getattr.mask = cpu_to_le32(mask);
+
 	req->r_locked_dir = dir;
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
 	err = ceph_handle_snapdir(req, dentry, err);
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 3b3172357326..6e72c98162d5 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -71,12 +71,18 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
 	inode = ceph_find_inode(sb, vino);
 	if (!inode) {
 		struct ceph_mds_request *req;
+		int mask;
 
 		req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
 					       USE_ANY_MDS);
 		if (IS_ERR(req))
 			return ERR_CAST(req);
 
+		mask = CEPH_STAT_CAP_INODE;
+		if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
+			mask |= CEPH_CAP_XATTR_SHARED;
+		req->r_args.getattr.mask = cpu_to_le32(mask);
+
 		req->r_ino1 = vino;
 		req->r_num_caps = 1;
 		err = ceph_mdsc_do_request(mdsc, NULL, req);
@@ -128,6 +134,7 @@ static struct dentry *__get_parent(struct super_block *sb,
 	struct ceph_mds_request *req;
 	struct inode *inode;
 	struct dentry *dentry;
+	int mask;
 	int err;
 
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
@@ -144,6 +151,12 @@ static struct dentry *__get_parent(struct super_block *sb,
 			.snap = CEPH_NOSNAP,
 		};
 	}
+
+	mask = CEPH_STAT_CAP_INODE;
+	if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
+		mask |= CEPH_CAP_XATTR_SHARED;
+	req->r_args.getattr.mask = cpu_to_le32(mask);
+
 	req->r_num_caps = 1;
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
 	inode = req->r_target_inode;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 389adacbc719..334a75170a3b 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -300,6 +300,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	struct ceph_mds_request *req;
 	struct dentry *dn;
 	struct ceph_acls_info acls = {};
+       int mask;
 	int err;
 
 	dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n",
@@ -335,6 +336,12 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 			acls.pagelist = NULL;
 		}
 	}
+
+       mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
+       if (ceph_security_xattr_wanted(dir))
+               mask |= CEPH_CAP_XATTR_SHARED;
+       req->r_args.open.mask = cpu_to_le32(mask);
+
 	req->r_locked_dir = dir;           /* caller holds dir->i_mutex */
 	err = ceph_mdsc_do_request(mdsc,
 				   (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 66edef12c6f2..8b136dc0bc13 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1389,7 +1389,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 	struct qstr dname;
 	struct dentry *dn;
 	struct inode *in;
-	int err = 0, ret, i;
+	int err = 0, skipped = 0, ret, i;
 	struct inode *snapdir = NULL;
 	struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
 	struct ceph_dentry_info *di;
@@ -1501,7 +1501,17 @@ retry_lookup:
 		}
 
 		if (d_really_is_negative(dn)) {
-			struct dentry *realdn = splice_dentry(dn, in);
+			struct dentry *realdn;
+
+			if (ceph_security_xattr_deadlock(in)) {
+				dout(" skip splicing dn %p to inode %p"
+				     " (security xattr deadlock)\n", dn, in);
+				iput(in);
+				skipped++;
+				goto next_item;
+			}
+
+			realdn = splice_dentry(dn, in);
 			if (IS_ERR(realdn)) {
 				err = PTR_ERR(realdn);
 				d_drop(dn);
@@ -1518,7 +1528,7 @@ retry_lookup:
 				    req->r_session,
 				    req->r_request_started);
 
-		if (err == 0 && cache_ctl.index >= 0) {
+		if (err == 0 && skipped == 0 && cache_ctl.index >= 0) {
 			ret = fill_readdir_cache(d_inode(parent), dn,
 						 &cache_ctl, req);
 			if (ret < 0)
@@ -1529,7 +1539,7 @@ next_item:
 			dput(dn);
 	}
 out:
-	if (err == 0) {
+	if (err == 0 && skipped == 0) {
 		req->r_did_prepopulate = true;
 		req->r_readdir_cache_idx = cache_ctl.index;
 	}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index aa43dcb5f9b9..44852c3ae531 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2540,6 +2540,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 
 	/* insert trace into our cache */
 	mutex_lock(&req->r_fill_mutex);
+	current->journal_info = req;
 	err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
 	if (err == 0) {
 		if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
@@ -2547,6 +2548,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 			ceph_readdir_prepopulate(req, req->r_session);
 		ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
 	}
+	current->journal_info = NULL;
 	mutex_unlock(&req->r_fill_mutex);
 
 	up_read(&mdsc->snap_rwsem);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 57ac43d64322..2d48138da58e 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -468,7 +468,7 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 #define CEPH_I_POOL_PERM	(1 << 4)  /* pool rd/wr bits are valid */
 #define CEPH_I_POOL_RD		(1 << 5)  /* can read from pool */
 #define CEPH_I_POOL_WR		(1 << 6)  /* can write to pool */
-
+#define CEPH_I_SEC_INITED	(1 << 7)  /* security initialized */
 
 static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
 					   long long release_count,
@@ -804,6 +804,20 @@ extern void __init ceph_xattr_init(void);
 extern void ceph_xattr_exit(void);
 extern const struct xattr_handler *ceph_xattr_handlers[];
 
+#ifdef CONFIG_SECURITY
+extern bool ceph_security_xattr_deadlock(struct inode *in);
+extern bool ceph_security_xattr_wanted(struct inode *in);
+#else
+static inline bool ceph_security_xattr_deadlock(struct inode *in)
+{
+	return false;
+}
+static inline bool ceph_security_xattr_wanted(struct inode *in)
+{
+	return false;
+}
+#endif
+
 /* acl.c */
 struct ceph_acls_info {
 	void *default_acl;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 139cdef8eb41..9410abdef3ce 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -714,13 +714,31 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
 	}
 }
 
+static inline int __get_request_mask(struct inode *in) {
+	struct ceph_mds_request *req = current->journal_info;
+	int mask = 0;
+	if (req && req->r_target_inode == in) {
+		if (req->r_op == CEPH_MDS_OP_LOOKUP ||
+		    req->r_op == CEPH_MDS_OP_LOOKUPINO ||
+		    req->r_op == CEPH_MDS_OP_LOOKUPPARENT ||
+		    req->r_op == CEPH_MDS_OP_GETATTR) {
+			mask = le32_to_cpu(req->r_args.getattr.mask);
+		} else if (req->r_op == CEPH_MDS_OP_OPEN ||
+			   req->r_op == CEPH_MDS_OP_CREATE) {
+			mask = le32_to_cpu(req->r_args.open.mask);
+		}
+	}
+	return mask;
+}
+
 ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 		      size_t size)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	int err;
 	struct ceph_inode_xattr *xattr;
 	struct ceph_vxattr *vxattr = NULL;
+	int req_mask;
+	int err;
 
 	if (!ceph_is_valid_xattr(name))
 		return -ENODATA;
@@ -734,13 +752,24 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 		return err;
 	}
 
+	req_mask = __get_request_mask(inode);
+
 	spin_lock(&ci->i_ceph_lock);
 	dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
 	     ci->i_xattrs.version, ci->i_xattrs.index_version);
 
 	if (ci->i_xattrs.version == 0 ||
-	    !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
+	    !((req_mask & CEPH_CAP_XATTR_SHARED) ||
+	      __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
 		spin_unlock(&ci->i_ceph_lock);
+
+		/* security module gets xattr while filling trace */
+		if (current->journal_info != NULL) {
+			pr_warn_ratelimited("sync getxattr %p "
+					    "during filling trace\n", inode);
+			return -EBUSY;
+		}
+
 		/* get xattrs from mds (if we don't already have them) */
 		err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
 		if (err)
@@ -767,6 +796,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 
 	memcpy(value, xattr->val, xattr->val_len);
 
+	if (current->journal_info != NULL &&
+	    !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
+		ci->i_ceph_flags |= CEPH_I_SEC_INITED;
 out:
 	spin_unlock(&ci->i_ceph_lock);
 	return err;
@@ -1017,7 +1049,15 @@ do_sync:
 do_sync_unlocked:
 	if (lock_snap_rwsem)
 		up_read(&mdsc->snap_rwsem);
-	err = ceph_sync_setxattr(dentry, name, value, size, flags);
+
+	/* security module set xattr while filling trace */
+	if (current->journal_info != NULL) {
+		pr_warn_ratelimited("sync setxattr %p "
+				    "during filling trace\n", inode);
+		err = -EBUSY;
+	} else {
+		err = ceph_sync_setxattr(dentry, name, value, size, flags);
+	}
 out:
 	ceph_free_cap_flush(prealloc_cf);
 	kfree(newname);
@@ -1166,3 +1206,25 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
 
 	return __ceph_removexattr(dentry, name);
 }
+
+#ifdef CONFIG_SECURITY
+bool ceph_security_xattr_wanted(struct inode *in)
+{
+	return in->i_security != NULL;
+}
+
+bool ceph_security_xattr_deadlock(struct inode *in)
+{
+	struct ceph_inode_info *ci;
+	bool ret;
+	if (in->i_security == NULL)
+		return false;
+	ci = ceph_inode(in);
+	spin_lock(&ci->i_ceph_lock);
+	ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
+	      !(ci->i_xattrs.version > 0 &&
+		__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
+	spin_unlock(&ci->i_ceph_lock);
+	return ret;
+}
+#endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index bf74005eedec..37f28bf55ce4 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -376,7 +376,8 @@ union ceph_mds_request_args {
 		__le32 stripe_count;         /* ... */
 		__le32 object_size;
 		__le32 file_replication;
-		__le32 unused;               /* used to be preferred osd */
+               __le32 mask;                 /* CEPH_CAP_* */
+               __le32 old_size;
 	} __attribute__ ((packed)) open;
 	struct {
 		__le32 flags;
-- 
cgit v1.2.3


From 96c22a3293512ba684e73a981196430f524689da Mon Sep 17 00:00:00 2001
From: Octavian Purdila <octavian.purdila@intel.com>
Date: Wed, 23 Mar 2016 14:14:48 +0200
Subject: configfs: fix CONFIGFS_BIN_ATTR_[RW]O definitions

The type should be struct configfs_bin_attribute and not struct
configfs_attribute.

Signed-off-by: Octavian Purdila <octavian.purdila@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/configfs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 485fe5519448..d9d6a9d77489 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -188,7 +188,7 @@ static struct configfs_bin_attribute _pfx##attr_##_name = {	\
 }
 
 #define CONFIGFS_BIN_ATTR_RO(_pfx, _name, _priv, _maxsz)	\
-static struct configfs_attribute _pfx##attr_##_name = {		\
+static struct configfs_bin_attribute _pfx##attr_##_name = {	\
 	.cb_attr = {						\
 		.ca_name	= __stringify(_name),		\
 		.ca_mode	= S_IRUGO,			\
@@ -200,7 +200,7 @@ static struct configfs_attribute _pfx##attr_##_name = {		\
 }
 
 #define CONFIGFS_BIN_ATTR_WO(_pfx, _name, _priv, _maxsz)	\
-static struct configfs_attribute _pfx##attr_##_name = {		\
+static struct configfs_bin_attribute _pfx##attr_##_name = {	\
 	.cb_attr = {						\
 		.ca_name	= __stringify(_name),		\
 		.ca_mode	= S_IWUSR,			\
-- 
cgit v1.2.3


From 69b27baf00fa9b7b14b3263c105390d1683425b2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 25 Mar 2016 14:20:21 -0700
Subject: sched: add schedule_timeout_idle()

This will be needed in the patch "mm, oom: introduce oom reaper".

Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  1 +
 kernel/time/timer.c   | 11 +++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 589c4780b077..478b41de7f7d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -426,6 +426,7 @@ extern signed long schedule_timeout(signed long timeout);
 extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_killable(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
+extern signed long schedule_timeout_idle(signed long timeout);
 asmlinkage void schedule(void);
 extern void schedule_preempt_disabled(void);
 
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index d1798fa0c743..73164c3aa56b 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1566,6 +1566,17 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
 }
 EXPORT_SYMBOL(schedule_timeout_uninterruptible);
 
+/*
+ * Like schedule_timeout_uninterruptible(), except this task will not contribute
+ * to load average.
+ */
+signed long __sched schedule_timeout_idle(signed long timeout)
+{
+	__set_current_state(TASK_IDLE);
+	return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_idle);
+
 #ifdef CONFIG_HOTPLUG_CPU
 static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
 {
-- 
cgit v1.2.3


From aac453635549699c13a84ea1456d5b0e574ef855 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 25 Mar 2016 14:20:24 -0700
Subject: mm, oom: introduce oom reaper

This patch (of 5):

This is based on the idea from Mel Gorman discussed during LSFMM 2015
and independently brought up by Oleg Nesterov.

The OOM killer currently allows to kill only a single task in a good
hope that the task will terminate in a reasonable time and frees up its
memory.  Such a task (oom victim) will get an access to memory reserves
via mark_oom_victim to allow a forward progress should there be a need
for additional memory during exit path.

It has been shown (e.g.  by Tetsuo Handa) that it is not that hard to
construct workloads which break the core assumption mentioned above and
the OOM victim might take unbounded amount of time to exit because it
might be blocked in the uninterruptible state waiting for an event (e.g.
lock) which is blocked by another task looping in the page allocator.

This patch reduces the probability of such a lockup by introducing a
specialized kernel thread (oom_reaper) which tries to reclaim additional
memory by preemptively reaping the anonymous or swapped out memory owned
by the oom victim under an assumption that such a memory won't be needed
when its owner is killed and kicked from the userspace anyway.  There is
one notable exception to this, though, if the OOM victim was in the
process of coredumping the result would be incomplete.  This is
considered a reasonable constrain because the overall system health is
more important than debugability of a particular application.

A kernel thread has been chosen because we need a reliable way of
invocation so workqueue context is not appropriate because all the
workers might be busy (e.g.  allocating memory).  Kswapd which sounds
like another good fit is not appropriate as well because it might get
blocked on locks during reclaim as well.

oom_reaper has to take mmap_sem on the target task for reading so the
solution is not 100% because the semaphore might be held or blocked for
write but the probability is reduced considerably wrt.  basically any
lock blocking forward progress as described above.  In order to prevent
from blocking on the lock without any forward progress we are using only
a trylock and retry 10 times with a short sleep in between.  Users of
mmap_sem which need it for write should be carefully reviewed to use
_killable waiting as much as possible and reduce allocations requests
done with the lock held to absolute minimum to reduce the risk even
further.

The API between oom killer and oom reaper is quite trivial.
wake_oom_reaper updates mm_to_reap with cmpxchg to guarantee only
NULL->mm transition and oom_reaper clear this atomically once it is done
with the work.  This means that only a single mm_struct can be reaped at
the time.  As the operation is potentially disruptive we are trying to
limit it to the ncessary minimum and the reaper blocks any updates while
it operates on an mm.  mm_struct is pinned by mm_count to allow parallel
exit_mmap and a race is detected by atomic_inc_not_zero(mm_users).

Signed-off-by: Michal Hocko <mhocko@suse.com>
Suggested-by: Oleg Nesterov <oleg@redhat.com>
Suggested-by: Mel Gorman <mgorman@suse.de>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrea Argangeli <andrea@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |   2 +
 mm/internal.h      |   5 ++
 mm/memory.c        |  17 +++---
 mm/oom_kill.c      | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 162 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 450fc977ed02..ed6407d1b7b5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1132,6 +1132,8 @@ struct zap_details {
 	struct address_space *check_mapping;	/* Check page->mapping if set */
 	pgoff_t	first_index;			/* Lowest page->index to unmap */
 	pgoff_t last_index;			/* Highest page->index to unmap */
+	bool ignore_dirty;			/* Ignore dirty pages */
+	bool check_swap_entries;		/* Check also swap entries */
 };
 
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/internal.h b/mm/internal.h
index 7449392c6faa..b79abb6721cf 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -38,6 +38,11 @@
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 
+void unmap_page_range(struct mmu_gather *tlb,
+			     struct vm_area_struct *vma,
+			     unsigned long addr, unsigned long end,
+			     struct zap_details *details);
+
 extern int __do_page_cache_readahead(struct address_space *mapping,
 		struct file *filp, pgoff_t offset, unsigned long nr_to_read,
 		unsigned long lookahead_size);
diff --git a/mm/memory.c b/mm/memory.c
index 81dca0083fcd..098f00d05461 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1102,6 +1102,12 @@ again:
 
 			if (!PageAnon(page)) {
 				if (pte_dirty(ptent)) {
+					/*
+					 * oom_reaper cannot tear down dirty
+					 * pages
+					 */
+					if (unlikely(details && details->ignore_dirty))
+						continue;
 					force_flush = 1;
 					set_page_dirty(page);
 				}
@@ -1120,8 +1126,8 @@ again:
 			}
 			continue;
 		}
-		/* If details->check_mapping, we leave swap entries. */
-		if (unlikely(details))
+		/* only check swap_entries if explicitly asked for in details */
+		if (unlikely(details && !details->check_swap_entries))
 			continue;
 
 		entry = pte_to_swp_entry(ptent);
@@ -1226,7 +1232,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 	return addr;
 }
 
-static void unmap_page_range(struct mmu_gather *tlb,
+void unmap_page_range(struct mmu_gather *tlb,
 			     struct vm_area_struct *vma,
 			     unsigned long addr, unsigned long end,
 			     struct zap_details *details)
@@ -1234,9 +1240,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
 	pgd_t *pgd;
 	unsigned long next;
 
-	if (details && !details->check_mapping)
-		details = NULL;
-
 	BUG_ON(addr >= end);
 	tlb_start_vma(tlb, vma);
 	pgd = pgd_offset(vma->vm_mm, addr);
@@ -2432,7 +2435,7 @@ static inline void unmap_mapping_range_tree(struct rb_root *root,
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows)
 {
-	struct zap_details details;
+	struct zap_details details = { };
 	pgoff_t hba = holebegin >> PAGE_SHIFT;
 	pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 06f7e1707847..f7ed6ece0719 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -35,6 +35,11 @@
 #include <linux/freezer.h>
 #include <linux/ftrace.h>
 #include <linux/ratelimit.h>
+#include <linux/kthread.h>
+#include <linux/init.h>
+
+#include <asm/tlb.h>
+#include "internal.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/oom.h>
@@ -405,6 +410,133 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
 
 bool oom_killer_disabled __read_mostly;
 
+#ifdef CONFIG_MMU
+/*
+ * OOM Reaper kernel thread which tries to reap the memory used by the OOM
+ * victim (if that is possible) to help the OOM killer to move on.
+ */
+static struct task_struct *oom_reaper_th;
+static struct mm_struct *mm_to_reap;
+static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
+
+static bool __oom_reap_vmas(struct mm_struct *mm)
+{
+	struct mmu_gather tlb;
+	struct vm_area_struct *vma;
+	struct zap_details details = {.check_swap_entries = true,
+				      .ignore_dirty = true};
+	bool ret = true;
+
+	/* We might have raced with exit path */
+	if (!atomic_inc_not_zero(&mm->mm_users))
+		return true;
+
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		ret = false;
+		goto out;
+	}
+
+	tlb_gather_mmu(&tlb, mm, 0, -1);
+	for (vma = mm->mmap ; vma; vma = vma->vm_next) {
+		if (is_vm_hugetlb_page(vma))
+			continue;
+
+		/*
+		 * mlocked VMAs require explicit munlocking before unmap.
+		 * Let's keep it simple here and skip such VMAs.
+		 */
+		if (vma->vm_flags & VM_LOCKED)
+			continue;
+
+		/*
+		 * Only anonymous pages have a good chance to be dropped
+		 * without additional steps which we cannot afford as we
+		 * are OOM already.
+		 *
+		 * We do not even care about fs backed pages because all
+		 * which are reclaimable have already been reclaimed and
+		 * we do not want to block exit_mmap by keeping mm ref
+		 * count elevated without a good reason.
+		 */
+		if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED))
+			unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
+					 &details);
+	}
+	tlb_finish_mmu(&tlb, 0, -1);
+	up_read(&mm->mmap_sem);
+out:
+	mmput(mm);
+	return ret;
+}
+
+static void oom_reap_vmas(struct mm_struct *mm)
+{
+	int attempts = 0;
+
+	/* Retry the down_read_trylock(mmap_sem) a few times */
+	while (attempts++ < 10 && !__oom_reap_vmas(mm))
+		schedule_timeout_idle(HZ/10);
+
+	/* Drop a reference taken by wake_oom_reaper */
+	mmdrop(mm);
+}
+
+static int oom_reaper(void *unused)
+{
+	while (true) {
+		struct mm_struct *mm;
+
+		wait_event_freezable(oom_reaper_wait,
+				     (mm = READ_ONCE(mm_to_reap)));
+		oom_reap_vmas(mm);
+		WRITE_ONCE(mm_to_reap, NULL);
+	}
+
+	return 0;
+}
+
+static void wake_oom_reaper(struct mm_struct *mm)
+{
+	struct mm_struct *old_mm;
+
+	if (!oom_reaper_th)
+		return;
+
+	/*
+	 * Pin the given mm. Use mm_count instead of mm_users because
+	 * we do not want to delay the address space tear down.
+	 */
+	atomic_inc(&mm->mm_count);
+
+	/*
+	 * Make sure that only a single mm is ever queued for the reaper
+	 * because multiple are not necessary and the operation might be
+	 * disruptive so better reduce it to the bare minimum.
+	 */
+	old_mm = cmpxchg(&mm_to_reap, NULL, mm);
+	if (!old_mm)
+		wake_up(&oom_reaper_wait);
+	else
+		mmdrop(mm);
+}
+
+static int __init oom_init(void)
+{
+	oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
+	if (IS_ERR(oom_reaper_th)) {
+		pr_err("Unable to start OOM reaper %ld. Continuing regardless\n",
+				PTR_ERR(oom_reaper_th));
+		oom_reaper_th = NULL;
+	}
+	return 0;
+}
+subsys_initcall(oom_init)
+#else
+static void wake_oom_reaper(struct mm_struct *mm)
+{
+}
+#endif
+
 /**
  * mark_oom_victim - mark the given task as OOM victim
  * @tsk: task to mark
@@ -510,6 +642,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 	unsigned int victim_points = 0;
 	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
 					      DEFAULT_RATELIMIT_BURST);
+	bool can_oom_reap = true;
 
 	/*
 	 * If the task is already exiting, don't alarm the sysadmin or kill
@@ -600,17 +733,23 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 			continue;
 		if (same_thread_group(p, victim))
 			continue;
-		if (unlikely(p->flags & PF_KTHREAD))
-			continue;
-		if (is_global_init(p))
-			continue;
-		if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
+		if (unlikely(p->flags & PF_KTHREAD) || is_global_init(p) ||
+		    p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
+			/*
+			 * We cannot use oom_reaper for the mm shared by this
+			 * process because it wouldn't get killed and so the
+			 * memory might be still used.
+			 */
+			can_oom_reap = false;
 			continue;
-
+		}
 		do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
 	}
 	rcu_read_unlock();
 
+	if (can_oom_reap)
+		wake_oom_reaper(mm);
+
 	mmdrop(mm);
 	put_task_struct(victim);
 }
-- 
cgit v1.2.3


From 36324a990cf578b57828c04cd85ac62cd25cf5a4 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 25 Mar 2016 14:20:27 -0700
Subject: oom: clear TIF_MEMDIE after oom_reaper managed to unmap the address
 space

When oom_reaper manages to unmap all the eligible vmas there shouldn't
be much of the freable memory held by the oom victim left anymore so it
makes sense to clear the TIF_MEMDIE flag for the victim and allow the
OOM killer to select another task.

The lack of TIF_MEMDIE also means that the victim cannot access memory
reserves anymore but that shouldn't be a problem because it would get
the access again if it needs to allocate and hits the OOM killer again
due to the fatal_signal_pending resp.  PF_EXITING check.  We can safely
hide the task from the OOM killer because it is clearly not a good
candidate anymore as everyhing reclaimable has been torn down already.

This patch will allow to cap the time an OOM victim can keep TIF_MEMDIE
and thus hold off further global OOM killer actions granted the oom
reaper is able to take mmap_sem for the associated mm struct.  This is
not guaranteed now but further steps should make sure that mmap_sem for
write should be blocked killable which will help to reduce such a lock
contention.  This is not done by this patch.

Note that exit_oom_victim might be called on a remote task from
__oom_reap_task now so we have to check and clear the flag atomically
otherwise we might race and underflow oom_victims or wake up waiters too
early.

Signed-off-by: Michal Hocko <mhocko@suse.com>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Suggested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Andrea Argangeli <andrea@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h |  2 +-
 kernel/exit.c       |  2 +-
 mm/oom_kill.c       | 73 +++++++++++++++++++++++++++++++++++------------------
 3 files changed, 50 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 03e6257321f0..45993b840ed6 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -91,7 +91,7 @@ extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
 
 extern bool out_of_memory(struct oom_control *oc);
 
-extern void exit_oom_victim(void);
+extern void exit_oom_victim(struct task_struct *tsk);
 
 extern int register_oom_notifier(struct notifier_block *nb);
 extern int unregister_oom_notifier(struct notifier_block *nb);
diff --git a/kernel/exit.c b/kernel/exit.c
index 953d1a1c0387..fd90195667e1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -435,7 +435,7 @@ static void exit_mm(struct task_struct *tsk)
 	mm_update_next_owner(mm);
 	mmput(mm);
 	if (test_thread_flag(TIF_MEMDIE))
-		exit_oom_victim();
+		exit_oom_victim(tsk);
 }
 
 static struct task_struct *find_alive_thread(struct task_struct *p)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f7ed6ece0719..2830b1c6483e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -416,20 +416,36 @@ bool oom_killer_disabled __read_mostly;
  * victim (if that is possible) to help the OOM killer to move on.
  */
 static struct task_struct *oom_reaper_th;
-static struct mm_struct *mm_to_reap;
+static struct task_struct *task_to_reap;
 static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
 
-static bool __oom_reap_vmas(struct mm_struct *mm)
+static bool __oom_reap_task(struct task_struct *tsk)
 {
 	struct mmu_gather tlb;
 	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	struct task_struct *p;
 	struct zap_details details = {.check_swap_entries = true,
 				      .ignore_dirty = true};
 	bool ret = true;
 
-	/* We might have raced with exit path */
-	if (!atomic_inc_not_zero(&mm->mm_users))
+	/*
+	 * Make sure we find the associated mm_struct even when the particular
+	 * thread has already terminated and cleared its mm.
+	 * We might have race with exit path so consider our work done if there
+	 * is no mm.
+	 */
+	p = find_lock_task_mm(tsk);
+	if (!p)
+		return true;
+
+	mm = p->mm;
+	if (!atomic_inc_not_zero(&mm->mm_users)) {
+		task_unlock(p);
 		return true;
+	}
+
+	task_unlock(p);
 
 	if (!down_read_trylock(&mm->mmap_sem)) {
 		ret = false;
@@ -464,60 +480,66 @@ static bool __oom_reap_vmas(struct mm_struct *mm)
 	}
 	tlb_finish_mmu(&tlb, 0, -1);
 	up_read(&mm->mmap_sem);
+
+	/*
+	 * Clear TIF_MEMDIE because the task shouldn't be sitting on a
+	 * reasonably reclaimable memory anymore. OOM killer can continue
+	 * by selecting other victim if unmapping hasn't led to any
+	 * improvements. This also means that selecting this task doesn't
+	 * make any sense.
+	 */
+	tsk->signal->oom_score_adj = OOM_SCORE_ADJ_MIN;
+	exit_oom_victim(tsk);
 out:
 	mmput(mm);
 	return ret;
 }
 
-static void oom_reap_vmas(struct mm_struct *mm)
+static void oom_reap_task(struct task_struct *tsk)
 {
 	int attempts = 0;
 
 	/* Retry the down_read_trylock(mmap_sem) a few times */
-	while (attempts++ < 10 && !__oom_reap_vmas(mm))
+	while (attempts++ < 10 && !__oom_reap_task(tsk))
 		schedule_timeout_idle(HZ/10);
 
 	/* Drop a reference taken by wake_oom_reaper */
-	mmdrop(mm);
+	put_task_struct(tsk);
 }
 
 static int oom_reaper(void *unused)
 {
 	while (true) {
-		struct mm_struct *mm;
+		struct task_struct *tsk;
 
 		wait_event_freezable(oom_reaper_wait,
-				     (mm = READ_ONCE(mm_to_reap)));
-		oom_reap_vmas(mm);
-		WRITE_ONCE(mm_to_reap, NULL);
+				     (tsk = READ_ONCE(task_to_reap)));
+		oom_reap_task(tsk);
+		WRITE_ONCE(task_to_reap, NULL);
 	}
 
 	return 0;
 }
 
-static void wake_oom_reaper(struct mm_struct *mm)
+static void wake_oom_reaper(struct task_struct *tsk)
 {
-	struct mm_struct *old_mm;
+	struct task_struct *old_tsk;
 
 	if (!oom_reaper_th)
 		return;
 
-	/*
-	 * Pin the given mm. Use mm_count instead of mm_users because
-	 * we do not want to delay the address space tear down.
-	 */
-	atomic_inc(&mm->mm_count);
+	get_task_struct(tsk);
 
 	/*
 	 * Make sure that only a single mm is ever queued for the reaper
 	 * because multiple are not necessary and the operation might be
 	 * disruptive so better reduce it to the bare minimum.
 	 */
-	old_mm = cmpxchg(&mm_to_reap, NULL, mm);
-	if (!old_mm)
+	old_tsk = cmpxchg(&task_to_reap, NULL, tsk);
+	if (!old_tsk)
 		wake_up(&oom_reaper_wait);
 	else
-		mmdrop(mm);
+		put_task_struct(tsk);
 }
 
 static int __init oom_init(void)
@@ -532,7 +554,7 @@ static int __init oom_init(void)
 }
 subsys_initcall(oom_init)
 #else
-static void wake_oom_reaper(struct mm_struct *mm)
+static void wake_oom_reaper(struct task_struct *tsk)
 {
 }
 #endif
@@ -563,9 +585,10 @@ void mark_oom_victim(struct task_struct *tsk)
 /**
  * exit_oom_victim - note the exit of an OOM victim
  */
-void exit_oom_victim(void)
+void exit_oom_victim(struct task_struct *tsk)
 {
-	clear_thread_flag(TIF_MEMDIE);
+	if (!test_and_clear_tsk_thread_flag(tsk, TIF_MEMDIE))
+		return;
 
 	if (!atomic_dec_return(&oom_victims))
 		wake_up_all(&oom_victims_wait);
@@ -748,7 +771,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 	rcu_read_unlock();
 
 	if (can_oom_reap)
-		wake_oom_reaper(mm);
+		wake_oom_reaper(victim);
 
 	mmdrop(mm);
 	put_task_struct(victim);
-- 
cgit v1.2.3


From 03049269de433cb5fe2859be9ae4469ceb1163ed Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 25 Mar 2016 14:20:33 -0700
Subject: mm, oom_reaper: implement OOM victims queuing

wake_oom_reaper has allowed only 1 oom victim to be queued.  The main
reason for that was the simplicity as other solutions would require some
way of queuing.  The current approach is racy and that was deemed
sufficient as the oom_reaper is considered a best effort approach to
help with oom handling when the OOM victim cannot terminate in a
reasonable time.  The race could lead to missing an oom victim which can
get stuck

out_of_memory
  wake_oom_reaper
    cmpxchg // OK
    			oom_reaper
			  oom_reap_task
			    __oom_reap_task
oom_victim terminates
			      atomic_inc_not_zero // fail
out_of_memory
  wake_oom_reaper
    cmpxchg // fails
			  task_to_reap = NULL

This race requires 2 OOM invocations in a short time period which is not
very likely but certainly not impossible.  E.g.  the original victim
might have not released a lot of memory for some reason.

The situation would improve considerably if wake_oom_reaper used a more
robust queuing.  This is what this patch implements.  This means adding
oom_reaper_list list_head into task_struct (eat a hole before embeded
thread_struct for that purpose) and a oom_reaper_lock spinlock for
queuing synchronization.  wake_oom_reaper will then add the task on the
queue and oom_reaper will dequeue it.

Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: Andrea Argangeli <andrea@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  3 +++
 mm/oom_kill.c         | 36 +++++++++++++++++++-----------------
 2 files changed, 22 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 478b41de7f7d..788f223f8f8f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1849,6 +1849,9 @@ struct task_struct {
 	unsigned long	task_state_change;
 #endif
 	int pagefault_disabled;
+#ifdef CONFIG_MMU
+	struct list_head oom_reaper_list;
+#endif
 /* CPU-specific state of this task */
 	struct thread_struct thread;
 /*
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 30a60991173a..f6d4ae9f1c69 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -418,8 +418,10 @@ bool oom_killer_disabled __read_mostly;
  * victim (if that is possible) to help the OOM killer to move on.
  */
 static struct task_struct *oom_reaper_th;
-static struct task_struct *task_to_reap;
 static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
+static LIST_HEAD(oom_reaper_list);
+static DEFINE_SPINLOCK(oom_reaper_lock);
+
 
 static bool __oom_reap_task(struct task_struct *tsk)
 {
@@ -524,12 +526,20 @@ static void oom_reap_task(struct task_struct *tsk)
 static int oom_reaper(void *unused)
 {
 	while (true) {
-		struct task_struct *tsk;
+		struct task_struct *tsk = NULL;
 
 		wait_event_freezable(oom_reaper_wait,
-				     (tsk = READ_ONCE(task_to_reap)));
-		oom_reap_task(tsk);
-		WRITE_ONCE(task_to_reap, NULL);
+				     (!list_empty(&oom_reaper_list)));
+		spin_lock(&oom_reaper_lock);
+		if (!list_empty(&oom_reaper_list)) {
+			tsk = list_first_entry(&oom_reaper_list,
+					struct task_struct, oom_reaper_list);
+			list_del(&tsk->oom_reaper_list);
+		}
+		spin_unlock(&oom_reaper_lock);
+
+		if (tsk)
+			oom_reap_task(tsk);
 	}
 
 	return 0;
@@ -537,23 +547,15 @@ static int oom_reaper(void *unused)
 
 static void wake_oom_reaper(struct task_struct *tsk)
 {
-	struct task_struct *old_tsk;
-
 	if (!oom_reaper_th)
 		return;
 
 	get_task_struct(tsk);
 
-	/*
-	 * Make sure that only a single mm is ever queued for the reaper
-	 * because multiple are not necessary and the operation might be
-	 * disruptive so better reduce it to the bare minimum.
-	 */
-	old_tsk = cmpxchg(&task_to_reap, NULL, tsk);
-	if (!old_tsk)
-		wake_up(&oom_reaper_wait);
-	else
-		put_task_struct(tsk);
+	spin_lock(&oom_reaper_lock);
+	list_add(&tsk->oom_reaper_list, &oom_reaper_list);
+	spin_unlock(&oom_reaper_lock);
+	wake_up(&oom_reaper_wait);
 }
 
 static int __init oom_init(void)
-- 
cgit v1.2.3


From 855b018325737f7691f9b7d86339df40aa4e47c3 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 25 Mar 2016 14:20:36 -0700
Subject: oom, oom_reaper: disable oom_reaper for oom_kill_allocating_task

Tetsuo has reported that oom_kill_allocating_task=1 will cause
oom_reaper_list corruption because oom_kill_process doesn't follow
standard OOM exclusion (aka ignores TIF_MEMDIE) and allows to enqueue
the same task multiple times - e.g.  by sacrificing the same child
multiple times.

This patch fixes the issue by introducing a new MMF_OOM_KILLED mm flag
which is set in oom_kill_process atomically and oom reaper is disabled
if the flag was already set.

Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 2 ++
 mm/oom_kill.c         | 6 +++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 788f223f8f8f..c2d2d7c5d463 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -512,6 +512,8 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_HAS_UPROBES		19	/* has uprobes */
 #define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
 
+#define MMF_OOM_KILLED		21	/* OOM killer has chosen this mm */
+
 #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
 struct sighand_struct {
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f6d4ae9f1c69..1a21819a8e5e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -680,7 +680,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 	unsigned int victim_points = 0;
 	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
 					      DEFAULT_RATELIMIT_BURST);
-	bool can_oom_reap = true;
+	bool can_oom_reap;
 
 	/*
 	 * If the task is already exiting, don't alarm the sysadmin or kill
@@ -742,6 +742,10 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 	/* Get a reference to safely compare mm after task_unlock(victim) */
 	mm = victim->mm;
 	atomic_inc(&mm->mm_count);
+
+	/* Make sure we do not try to oom reap the mm multiple times */
+	can_oom_reap = !test_and_set_bit(MMF_OOM_KILLED, &mm->flags);
+
 	/*
 	 * We should send SIGKILL before setting TIF_MEMDIE in order to prevent
 	 * the OOM victim from depleting the memory reserves from the user
-- 
cgit v1.2.3


From 29c696e1c6eceb5db6b21f0c89495fcfcd40c0eb Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@virtuozzo.com>
Date: Fri, 25 Mar 2016 14:20:39 -0700
Subject: oom: make oom_reaper_list single linked

Entries are only added/removed from oom_reaper_list at head so we can
use a single linked list and hence save a word in task_struct.

Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  2 +-
 mm/oom_kill.c         | 15 +++++++--------
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c2d2d7c5d463..49b1febcf7c3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1852,7 +1852,7 @@ struct task_struct {
 #endif
 	int pagefault_disabled;
 #ifdef CONFIG_MMU
-	struct list_head oom_reaper_list;
+	struct task_struct *oom_reaper_list;
 #endif
 /* CPU-specific state of this task */
 	struct thread_struct thread;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 1a21819a8e5e..a49638f41e45 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -419,7 +419,7 @@ bool oom_killer_disabled __read_mostly;
  */
 static struct task_struct *oom_reaper_th;
 static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
-static LIST_HEAD(oom_reaper_list);
+static struct task_struct *oom_reaper_list;
 static DEFINE_SPINLOCK(oom_reaper_lock);
 
 
@@ -528,13 +528,11 @@ static int oom_reaper(void *unused)
 	while (true) {
 		struct task_struct *tsk = NULL;
 
-		wait_event_freezable(oom_reaper_wait,
-				     (!list_empty(&oom_reaper_list)));
+		wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
 		spin_lock(&oom_reaper_lock);
-		if (!list_empty(&oom_reaper_list)) {
-			tsk = list_first_entry(&oom_reaper_list,
-					struct task_struct, oom_reaper_list);
-			list_del(&tsk->oom_reaper_list);
+		if (oom_reaper_list != NULL) {
+			tsk = oom_reaper_list;
+			oom_reaper_list = tsk->oom_reaper_list;
 		}
 		spin_unlock(&oom_reaper_lock);
 
@@ -553,7 +551,8 @@ static void wake_oom_reaper(struct task_struct *tsk)
 	get_task_struct(tsk);
 
 	spin_lock(&oom_reaper_lock);
-	list_add(&tsk->oom_reaper_list, &oom_reaper_list);
+	tsk->oom_reaper_list = oom_reaper_list;
+	oom_reaper_list = tsk;
 	spin_unlock(&oom_reaper_lock);
 	wake_up(&oom_reaper_wait);
 }
-- 
cgit v1.2.3


From bb29902a7515208846114b3b36a4281a9bbf766a Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 25 Mar 2016 14:20:44 -0700
Subject: oom, oom_reaper: protect oom_reaper_list using simpler way

"oom, oom_reaper: disable oom_reaper for oom_kill_allocating_task" tried
to protect oom_reaper_list using MMF_OOM_KILLED flag.  But we can do it
by simply checking tsk->oom_reaper_list != NULL.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 2 --
 mm/oom_kill.c         | 8 ++------
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 49b1febcf7c3..60bba7e032dc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -512,8 +512,6 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_HAS_UPROBES		19	/* has uprobes */
 #define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
 
-#define MMF_OOM_KILLED		21	/* OOM killer has chosen this mm */
-
 #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
 struct sighand_struct {
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 8cc55f0f0e5c..b34d279a7ee6 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -547,7 +547,7 @@ static int oom_reaper(void *unused)
 
 static void wake_oom_reaper(struct task_struct *tsk)
 {
-	if (!oom_reaper_th)
+	if (!oom_reaper_th || tsk->oom_reaper_list)
 		return;
 
 	get_task_struct(tsk);
@@ -681,7 +681,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 	unsigned int victim_points = 0;
 	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
 					      DEFAULT_RATELIMIT_BURST);
-	bool can_oom_reap;
+	bool can_oom_reap = true;
 
 	/*
 	 * If the task is already exiting, don't alarm the sysadmin or kill
@@ -743,10 +743,6 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 	/* Get a reference to safely compare mm after task_unlock(victim) */
 	mm = victim->mm;
 	atomic_inc(&mm->mm_count);
-
-	/* Make sure we do not try to oom reap the mm multiple times */
-	can_oom_reap = !test_and_set_bit(MMF_OOM_KILLED, &mm->flags);
-
 	/*
 	 * We should send SIGKILL before setting TIF_MEMDIE in order to prevent
 	 * the OOM victim from depleting the memory reserves from the user
-- 
cgit v1.2.3


From aaf4fb712b8311d8b950e89937479d61e9c25ba8 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 25 Mar 2016 14:21:53 -0700
Subject: include/linux/oom.h: remove undefined
 oom_kills_count()/note_oom_kill()

A leftover from commit c32b3cbe0d06 ("oom, PM: make OOM detection in the
freezer path raceless").

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 45993b840ed6..628a43242a34 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -76,8 +76,6 @@ extern unsigned long oom_badness(struct task_struct *p,
 		struct mem_cgroup *memcg, const nodemask_t *nodemask,
 		unsigned long totalpages);
 
-extern int oom_kills_count(void);
-extern void note_oom_kill(void);
 extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 			     unsigned int points, unsigned long totalpages,
 			     struct mem_cgroup *memcg, const char *message);
-- 
cgit v1.2.3


From 7ed2f9e663854db313f177a511145630e398b402 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Fri, 25 Mar 2016 14:21:59 -0700
Subject: mm, kasan: SLAB support

Add KASAN hooks to SLAB allocator.

This patch is based on the "mm: kasan: unified support for SLUB and SLAB
allocators" patch originally prepared by Dmitry Chernenkov.

Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrey Konovalov <adech.fo@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Konstantin Serebryany <kcc@google.com>
Cc: Dmitry Chernenkov <dmitryc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kasan.txt  |   5 +--
 include/linux/kasan.h    |  12 ++++++
 include/linux/slab.h     |   6 +++
 include/linux/slab_def.h |  14 +++++++
 include/linux/slub_def.h |  11 +++++
 lib/Kconfig.kasan        |   4 +-
 mm/Makefile              |   1 +
 mm/kasan/kasan.c         | 102 +++++++++++++++++++++++++++++++++++++++++++++++
 mm/kasan/kasan.h         |  34 ++++++++++++++++
 mm/kasan/report.c        |  54 ++++++++++++++++++++-----
 mm/slab.c                |  43 +++++++++++++++++---
 mm/slab_common.c         |   2 +-
 12 files changed, 266 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt
index aa1e0c91e368..7dd95b35cd7c 100644
--- a/Documentation/kasan.txt
+++ b/Documentation/kasan.txt
@@ -12,8 +12,7 @@ KASAN uses compile-time instrumentation for checking every memory access,
 therefore you will need a GCC version 4.9.2 or later. GCC 5.0 or later is
 required for detection of out-of-bounds accesses to stack or global variables.
 
-Currently KASAN is supported only for x86_64 architecture and requires the
-kernel to be built with the SLUB allocator.
+Currently KASAN is supported only for x86_64 architecture.
 
 1. Usage
 ========
@@ -27,7 +26,7 @@ inline are compiler instrumentation types. The former produces smaller binary
 the latter is 1.1 - 2 times faster. Inline instrumentation requires a GCC
 version 5.0 or later.
 
-Currently KASAN works only with the SLUB memory allocator.
+KASAN works with both SLUB and SLAB memory allocators.
 For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
 
 To disable instrumentation for specific files or directories, add a line
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 0fdc798e3ff7..839f2007a0f9 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -48,6 +48,9 @@ void kasan_unpoison_task_stack(struct task_struct *task);
 void kasan_alloc_pages(struct page *page, unsigned int order);
 void kasan_free_pages(struct page *page, unsigned int order);
 
+void kasan_cache_create(struct kmem_cache *cache, size_t *size,
+			unsigned long *flags);
+
 void kasan_poison_slab(struct page *page);
 void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
 void kasan_poison_object_data(struct kmem_cache *cache, void *object);
@@ -61,6 +64,11 @@ void kasan_krealloc(const void *object, size_t new_size);
 void kasan_slab_alloc(struct kmem_cache *s, void *object);
 void kasan_slab_free(struct kmem_cache *s, void *object);
 
+struct kasan_cache {
+	int alloc_meta_offset;
+	int free_meta_offset;
+};
+
 int kasan_module_alloc(void *addr, size_t size);
 void kasan_free_shadow(const struct vm_struct *vm);
 
@@ -76,6 +84,10 @@ static inline void kasan_disable_current(void) {}
 static inline void kasan_alloc_pages(struct page *page, unsigned int order) {}
 static inline void kasan_free_pages(struct page *page, unsigned int order) {}
 
+static inline void kasan_cache_create(struct kmem_cache *cache,
+				      size_t *size,
+				      unsigned long *flags) {}
+
 static inline void kasan_poison_slab(struct page *page) {}
 static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
 					void *object) {}
diff --git a/include/linux/slab.h b/include/linux/slab.h
index e4b568738ca3..aa61595a1482 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -92,6 +92,12 @@
 # define SLAB_ACCOUNT		0x00000000UL
 #endif
 
+#ifdef CONFIG_KASAN
+#define SLAB_KASAN		0x08000000UL
+#else
+#define SLAB_KASAN		0x00000000UL
+#endif
+
 /* The following flags affect the page allocator grouping pages by mobility */
 #define SLAB_RECLAIM_ACCOUNT	0x00020000UL		/* Objects are reclaimable */
 #define SLAB_TEMPORARY		SLAB_RECLAIM_ACCOUNT	/* Objects are short-lived */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index e878ba35ae91..9edbbf352340 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -76,8 +76,22 @@ struct kmem_cache {
 #ifdef CONFIG_MEMCG
 	struct memcg_cache_params memcg_params;
 #endif
+#ifdef CONFIG_KASAN
+	struct kasan_cache kasan_info;
+#endif
 
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
+static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
+				void *x) {
+	void *object = x - (x - page->s_mem) % cache->size;
+	void *last_object = page->s_mem + (cache->num - 1) * cache->size;
+
+	if (unlikely(object > last_object))
+		return last_object;
+	else
+		return object;
+}
+
 #endif	/* _LINUX_SLAB_DEF_H */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index ac5143f95ee6..665cd0cd18b8 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -130,4 +130,15 @@ static inline void *virt_to_obj(struct kmem_cache *s,
 void object_err(struct kmem_cache *s, struct page *page,
 		u8 *object, char *reason);
 
+static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
+				void *x) {
+	void *object = x - (x - page_address(page)) % cache->size;
+	void *last_object = page_address(page) +
+		(page->objects - 1) * cache->size;
+	if (unlikely(object > last_object))
+		return last_object;
+	else
+		return object;
+}
+
 #endif /* _LINUX_SLUB_DEF_H */
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 0fee5acd5aa0..0e4d2b3b0aee 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -5,7 +5,7 @@ if HAVE_ARCH_KASAN
 
 config KASAN
 	bool "KASan: runtime memory debugger"
-	depends on SLUB_DEBUG
+	depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB)
 	select CONSTRUCTORS
 	help
 	  Enables kernel address sanitizer - runtime memory debugger,
@@ -16,6 +16,8 @@ config KASAN
 	  This feature consumes about 1/8 of available memory and brings about
 	  ~x3 performance slowdown.
 	  For better error detection enable CONFIG_STACKTRACE.
+	  Currently CONFIG_KASAN doesn't work with CONFIG_DEBUG_SLAB
+	  (the resulting kernel does not boot).
 
 choice
 	prompt "Instrumentation type"
diff --git a/mm/Makefile b/mm/Makefile
index f5e797cbd128..deb467edca2d 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -3,6 +3,7 @@
 #
 
 KASAN_SANITIZE_slab_common.o := n
+KASAN_SANITIZE_slab.o := n
 KASAN_SANITIZE_slub.o := n
 
 # These files are disabled because they produce non-interesting and/or
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 1ad20ade8c91..7c82509ef169 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -334,6 +334,59 @@ void kasan_free_pages(struct page *page, unsigned int order)
 				KASAN_FREE_PAGE);
 }
 
+#ifdef CONFIG_SLAB
+/*
+ * Adaptive redzone policy taken from the userspace AddressSanitizer runtime.
+ * For larger allocations larger redzones are used.
+ */
+static size_t optimal_redzone(size_t object_size)
+{
+	int rz =
+		object_size <= 64        - 16   ? 16 :
+		object_size <= 128       - 32   ? 32 :
+		object_size <= 512       - 64   ? 64 :
+		object_size <= 4096      - 128  ? 128 :
+		object_size <= (1 << 14) - 256  ? 256 :
+		object_size <= (1 << 15) - 512  ? 512 :
+		object_size <= (1 << 16) - 1024 ? 1024 : 2048;
+	return rz;
+}
+
+void kasan_cache_create(struct kmem_cache *cache, size_t *size,
+			unsigned long *flags)
+{
+	int redzone_adjust;
+	/* Make sure the adjusted size is still less than
+	 * KMALLOC_MAX_CACHE_SIZE.
+	 * TODO: this check is only useful for SLAB, but not SLUB. We'll need
+	 * to skip it for SLUB when it starts using kasan_cache_create().
+	 */
+	if (*size > KMALLOC_MAX_CACHE_SIZE -
+	    sizeof(struct kasan_alloc_meta) -
+	    sizeof(struct kasan_free_meta))
+		return;
+	*flags |= SLAB_KASAN;
+	/* Add alloc meta. */
+	cache->kasan_info.alloc_meta_offset = *size;
+	*size += sizeof(struct kasan_alloc_meta);
+
+	/* Add free meta. */
+	if (cache->flags & SLAB_DESTROY_BY_RCU || cache->ctor ||
+	    cache->object_size < sizeof(struct kasan_free_meta)) {
+		cache->kasan_info.free_meta_offset = *size;
+		*size += sizeof(struct kasan_free_meta);
+	}
+	redzone_adjust = optimal_redzone(cache->object_size) -
+		(*size - cache->object_size);
+	if (redzone_adjust > 0)
+		*size += redzone_adjust;
+	*size = min(KMALLOC_MAX_CACHE_SIZE,
+		    max(*size,
+			cache->object_size +
+			optimal_redzone(cache->object_size)));
+}
+#endif
+
 void kasan_poison_slab(struct page *page)
 {
 	kasan_poison_shadow(page_address(page),
@@ -351,8 +404,36 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object)
 	kasan_poison_shadow(object,
 			round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE),
 			KASAN_KMALLOC_REDZONE);
+#ifdef CONFIG_SLAB
+	if (cache->flags & SLAB_KASAN) {
+		struct kasan_alloc_meta *alloc_info =
+			get_alloc_info(cache, object);
+		alloc_info->state = KASAN_STATE_INIT;
+	}
+#endif
+}
+
+static inline void set_track(struct kasan_track *track)
+{
+	track->cpu = raw_smp_processor_id();
+	track->pid = current->pid;
+	track->when = jiffies;
 }
 
+#ifdef CONFIG_SLAB
+struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
+					const void *object)
+{
+	return (void *)object + cache->kasan_info.alloc_meta_offset;
+}
+
+struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
+				      const void *object)
+{
+	return (void *)object + cache->kasan_info.free_meta_offset;
+}
+#endif
+
 void kasan_slab_alloc(struct kmem_cache *cache, void *object)
 {
 	kasan_kmalloc(cache, object, cache->object_size);
@@ -367,6 +448,17 @@ void kasan_slab_free(struct kmem_cache *cache, void *object)
 	if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
 		return;
 
+#ifdef CONFIG_SLAB
+	if (cache->flags & SLAB_KASAN) {
+		struct kasan_free_meta *free_info =
+			get_free_info(cache, object);
+		struct kasan_alloc_meta *alloc_info =
+			get_alloc_info(cache, object);
+		alloc_info->state = KASAN_STATE_FREE;
+		set_track(&free_info->track);
+	}
+#endif
+
 	kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
 }
 
@@ -386,6 +478,16 @@ void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size)
 	kasan_unpoison_shadow(object, size);
 	kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
 		KASAN_KMALLOC_REDZONE);
+#ifdef CONFIG_SLAB
+	if (cache->flags & SLAB_KASAN) {
+		struct kasan_alloc_meta *alloc_info =
+			get_alloc_info(cache, object);
+
+		alloc_info->state = KASAN_STATE_ALLOC;
+		alloc_info->alloc_size = size;
+		set_track(&alloc_info->track);
+	}
+#endif
 }
 EXPORT_SYMBOL(kasan_kmalloc);
 
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 4f6c62e5c21e..7b9e4ab9b66b 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -54,6 +54,40 @@ struct kasan_global {
 #endif
 };
 
+/**
+ * Structures to keep alloc and free tracks *
+ */
+
+enum kasan_state {
+	KASAN_STATE_INIT,
+	KASAN_STATE_ALLOC,
+	KASAN_STATE_FREE
+};
+
+struct kasan_track {
+	u64 cpu : 6;			/* for NR_CPUS = 64 */
+	u64 pid : 16;			/* 65536 processes */
+	u64 when : 42;			/* ~140 years */
+};
+
+struct kasan_alloc_meta {
+	u32 state : 2;	/* enum kasan_state */
+	u32 alloc_size : 30;
+	struct kasan_track track;
+};
+
+struct kasan_free_meta {
+	/* Allocator freelist pointer, unused by KASAN. */
+	void **freelist;
+	struct kasan_track track;
+};
+
+struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
+					const void *object);
+struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
+					const void *object);
+
+
 static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
 {
 	return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 745aa8f36028..3e3385cc97ac 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -115,6 +115,46 @@ static inline bool init_task_stack_addr(const void *addr)
 			sizeof(init_thread_union.stack));
 }
 
+#ifdef CONFIG_SLAB
+static void print_track(struct kasan_track *track)
+{
+	pr_err("PID = %u, CPU = %u, timestamp = %lu\n", track->pid,
+	       track->cpu, (unsigned long)track->when);
+}
+
+static void object_err(struct kmem_cache *cache, struct page *page,
+			void *object, char *unused_reason)
+{
+	struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object);
+	struct kasan_free_meta *free_info;
+
+	dump_stack();
+	pr_err("Object at %p, in cache %s\n", object, cache->name);
+	if (!(cache->flags & SLAB_KASAN))
+		return;
+	switch (alloc_info->state) {
+	case KASAN_STATE_INIT:
+		pr_err("Object not allocated yet\n");
+		break;
+	case KASAN_STATE_ALLOC:
+		pr_err("Object allocated with size %u bytes.\n",
+		       alloc_info->alloc_size);
+		pr_err("Allocation:\n");
+		print_track(&alloc_info->track);
+		break;
+	case KASAN_STATE_FREE:
+		pr_err("Object freed, allocated with size %u bytes\n",
+		       alloc_info->alloc_size);
+		free_info = get_free_info(cache, object);
+		pr_err("Allocation:\n");
+		print_track(&alloc_info->track);
+		pr_err("Deallocation:\n");
+		print_track(&free_info->track);
+		break;
+	}
+}
+#endif
+
 static void print_address_description(struct kasan_access_info *info)
 {
 	const void *addr = info->access_addr;
@@ -126,17 +166,10 @@ static void print_address_description(struct kasan_access_info *info)
 		if (PageSlab(page)) {
 			void *object;
 			struct kmem_cache *cache = page->slab_cache;
-			void *last_object;
-
-			object = virt_to_obj(cache, page_address(page), addr);
-			last_object = page_address(page) +
-				page->objects * cache->size;
-
-			if (unlikely(object > last_object))
-				object = last_object; /* we hit into padding */
-
+			object = nearest_obj(cache, page,
+						(void *)info->access_addr);
 			object_err(cache, page, object,
-				"kasan: bad access detected");
+					"kasan: bad access detected");
 			return;
 		}
 		dump_page(page, "kasan: bad access detected");
@@ -146,7 +179,6 @@ static void print_address_description(struct kasan_access_info *info)
 		if (!init_task_stack_addr(addr))
 			pr_err("Address belongs to variable %pS\n", addr);
 	}
-
 	dump_stack();
 }
 
diff --git a/mm/slab.c b/mm/slab.c
index e719a5cb3396..7515578471d8 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2086,6 +2086,8 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	}
 #endif
 
+	kasan_cache_create(cachep, &size, &flags);
+
 	size = ALIGN(size, cachep->align);
 	/*
 	 * We should restrict the number of objects in a slab to implement
@@ -2387,8 +2389,13 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
 		 * cache which they are a constructor for.  Otherwise, deadlock.
 		 * They must also be threaded.
 		 */
-		if (cachep->ctor && !(cachep->flags & SLAB_POISON))
+		if (cachep->ctor && !(cachep->flags & SLAB_POISON)) {
+			kasan_unpoison_object_data(cachep,
+						   objp + obj_offset(cachep));
 			cachep->ctor(objp + obj_offset(cachep));
+			kasan_poison_object_data(
+				cachep, objp + obj_offset(cachep));
+		}
 
 		if (cachep->flags & SLAB_RED_ZONE) {
 			if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
@@ -2409,6 +2416,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
 			    struct page *page)
 {
 	int i;
+	void *objp;
 
 	cache_init_objs_debug(cachep, page);
 
@@ -2419,8 +2427,12 @@ static void cache_init_objs(struct kmem_cache *cachep,
 
 	for (i = 0; i < cachep->num; i++) {
 		/* constructor could break poison info */
-		if (DEBUG == 0 && cachep->ctor)
-			cachep->ctor(index_to_obj(cachep, page, i));
+		if (DEBUG == 0 && cachep->ctor) {
+			objp = index_to_obj(cachep, page, i);
+			kasan_unpoison_object_data(cachep, objp);
+			cachep->ctor(objp);
+			kasan_poison_object_data(cachep, objp);
+		}
 
 		set_free_obj(page, i, i);
 	}
@@ -2550,6 +2562,7 @@ static int cache_grow(struct kmem_cache *cachep,
 
 	slab_map_pages(cachep, page, freelist);
 
+	kasan_poison_slab(page);
 	cache_init_objs(cachep, page);
 
 	if (gfpflags_allow_blocking(local_flags))
@@ -3316,6 +3329,8 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,
 {
 	struct array_cache *ac = cpu_cache_get(cachep);
 
+	kasan_slab_free(cachep, objp);
+
 	check_irq_off();
 	kmemleak_free_recursive(objp, cachep->flags);
 	objp = cache_free_debugcheck(cachep, objp, caller);
@@ -3363,6 +3378,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 {
 	void *ret = slab_alloc(cachep, flags, _RET_IP_);
 
+	kasan_slab_alloc(cachep, ret);
 	trace_kmem_cache_alloc(_RET_IP_, ret,
 			       cachep->object_size, cachep->size, flags);
 
@@ -3428,6 +3444,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
 
 	ret = slab_alloc(cachep, flags, _RET_IP_);
 
+	kasan_kmalloc(cachep, ret, size);
 	trace_kmalloc(_RET_IP_, ret,
 		      size, cachep->size, flags);
 	return ret;
@@ -3451,6 +3468,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
 	void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
 
+	kasan_slab_alloc(cachep, ret);
 	trace_kmem_cache_alloc_node(_RET_IP_, ret,
 				    cachep->object_size, cachep->size,
 				    flags, nodeid);
@@ -3468,7 +3486,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
 	void *ret;
 
 	ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
-
+	kasan_kmalloc(cachep, ret, size);
 	trace_kmalloc_node(_RET_IP_, ret,
 			   size, cachep->size,
 			   flags, nodeid);
@@ -3481,11 +3499,15 @@ static __always_inline void *
 __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
 {
 	struct kmem_cache *cachep;
+	void *ret;
 
 	cachep = kmalloc_slab(size, flags);
 	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
 		return cachep;
-	return kmem_cache_alloc_node_trace(cachep, flags, node, size);
+	ret = kmem_cache_alloc_node_trace(cachep, flags, node, size);
+	kasan_kmalloc(cachep, ret, size);
+
+	return ret;
 }
 
 void *__kmalloc_node(size_t size, gfp_t flags, int node)
@@ -3519,6 +3541,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 		return cachep;
 	ret = slab_alloc(cachep, flags, caller);
 
+	kasan_kmalloc(cachep, ret, size);
 	trace_kmalloc(caller, ret,
 		      size, cachep->size, flags);
 
@@ -4290,10 +4313,18 @@ module_init(slab_proc_init);
  */
 size_t ksize(const void *objp)
 {
+	size_t size;
+
 	BUG_ON(!objp);
 	if (unlikely(objp == ZERO_SIZE_PTR))
 		return 0;
 
-	return virt_to_cache(objp)->object_size;
+	size = virt_to_cache(objp)->object_size;
+	/* We assume that ksize callers could use the whole allocated area,
+	 * so we need to unpoison this area.
+	 */
+	kasan_krealloc(objp, size);
+
+	return size;
 }
 EXPORT_SYMBOL(ksize);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index b2e379639a5b..4de72e220c82 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -35,7 +35,7 @@ struct kmem_cache *kmem_cache;
  */
 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
 		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
-		SLAB_FAILSLAB)
+		SLAB_FAILSLAB | SLAB_KASAN)
 
 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
 			 SLAB_NOTRACK | SLAB_ACCOUNT)
-- 
cgit v1.2.3


From 505f5dcb1c419e55a9621a01f83eb5745d8d7398 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Fri, 25 Mar 2016 14:22:02 -0700
Subject: mm, kasan: add GFP flags to KASAN API

Add GFP flags to KASAN hooks for future patches to use.

This patch is based on the "mm: kasan: unified support for SLUB and SLAB
allocators" patch originally prepared by Dmitry Chernenkov.

Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrey Konovalov <adech.fo@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Konstantin Serebryany <kcc@google.com>
Cc: Dmitry Chernenkov <dmitryc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h | 19 +++++++++++--------
 include/linux/slab.h  |  4 ++--
 mm/kasan/kasan.c      | 15 ++++++++-------
 mm/mempool.c          | 16 ++++++++--------
 mm/slab.c             | 15 ++++++++-------
 mm/slab.h             |  2 +-
 mm/slab_common.c      |  4 ++--
 mm/slub.c             | 15 ++++++++-------
 8 files changed, 48 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 839f2007a0f9..737371b56044 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -55,13 +55,14 @@ void kasan_poison_slab(struct page *page);
 void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
 void kasan_poison_object_data(struct kmem_cache *cache, void *object);
 
-void kasan_kmalloc_large(const void *ptr, size_t size);
+void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags);
 void kasan_kfree_large(const void *ptr);
 void kasan_kfree(void *ptr);
-void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size);
-void kasan_krealloc(const void *object, size_t new_size);
+void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size,
+		  gfp_t flags);
+void kasan_krealloc(const void *object, size_t new_size, gfp_t flags);
 
-void kasan_slab_alloc(struct kmem_cache *s, void *object);
+void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags);
 void kasan_slab_free(struct kmem_cache *s, void *object);
 
 struct kasan_cache {
@@ -94,14 +95,16 @@ static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
 static inline void kasan_poison_object_data(struct kmem_cache *cache,
 					void *object) {}
 
-static inline void kasan_kmalloc_large(void *ptr, size_t size) {}
+static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {}
 static inline void kasan_kfree_large(const void *ptr) {}
 static inline void kasan_kfree(void *ptr) {}
 static inline void kasan_kmalloc(struct kmem_cache *s, const void *object,
-				size_t size) {}
-static inline void kasan_krealloc(const void *object, size_t new_size) {}
+				size_t size, gfp_t flags) {}
+static inline void kasan_krealloc(const void *object, size_t new_size,
+				 gfp_t flags) {}
 
-static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {}
+static inline void kasan_slab_alloc(struct kmem_cache *s, void *object,
+				   gfp_t flags) {}
 static inline void kasan_slab_free(struct kmem_cache *s, void *object) {}
 
 static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
diff --git a/include/linux/slab.h b/include/linux/slab.h
index aa61595a1482..508bd827e6dc 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -376,7 +376,7 @@ static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s,
 {
 	void *ret = kmem_cache_alloc(s, flags);
 
-	kasan_kmalloc(s, ret, size);
+	kasan_kmalloc(s, ret, size, flags);
 	return ret;
 }
 
@@ -387,7 +387,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
 {
 	void *ret = kmem_cache_alloc_node(s, gfpflags, node);
 
-	kasan_kmalloc(s, ret, size);
+	kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
 }
 #endif /* CONFIG_TRACING */
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 7c82509ef169..cb998e0ec9d3 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -434,9 +434,9 @@ struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
 }
 #endif
 
-void kasan_slab_alloc(struct kmem_cache *cache, void *object)
+void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags)
 {
-	kasan_kmalloc(cache, object, cache->object_size);
+	kasan_kmalloc(cache, object, cache->object_size, flags);
 }
 
 void kasan_slab_free(struct kmem_cache *cache, void *object)
@@ -462,7 +462,8 @@ void kasan_slab_free(struct kmem_cache *cache, void *object)
 	kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
 }
 
-void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size)
+void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,
+		   gfp_t flags)
 {
 	unsigned long redzone_start;
 	unsigned long redzone_end;
@@ -491,7 +492,7 @@ void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size)
 }
 EXPORT_SYMBOL(kasan_kmalloc);
 
-void kasan_kmalloc_large(const void *ptr, size_t size)
+void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags)
 {
 	struct page *page;
 	unsigned long redzone_start;
@@ -510,7 +511,7 @@ void kasan_kmalloc_large(const void *ptr, size_t size)
 		KASAN_PAGE_REDZONE);
 }
 
-void kasan_krealloc(const void *object, size_t size)
+void kasan_krealloc(const void *object, size_t size, gfp_t flags)
 {
 	struct page *page;
 
@@ -520,9 +521,9 @@ void kasan_krealloc(const void *object, size_t size)
 	page = virt_to_head_page(object);
 
 	if (unlikely(!PageSlab(page)))
-		kasan_kmalloc_large(object, size);
+		kasan_kmalloc_large(object, size, flags);
 	else
-		kasan_kmalloc(page->slab_cache, object, size);
+		kasan_kmalloc(page->slab_cache, object, size, flags);
 }
 
 void kasan_kfree(void *ptr)
diff --git a/mm/mempool.c b/mm/mempool.c
index 07c383ddbbab..9b7a14a791cc 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -112,12 +112,12 @@ static void kasan_poison_element(mempool_t *pool, void *element)
 		kasan_free_pages(element, (unsigned long)pool->pool_data);
 }
 
-static void kasan_unpoison_element(mempool_t *pool, void *element)
+static void kasan_unpoison_element(mempool_t *pool, void *element, gfp_t flags)
 {
 	if (pool->alloc == mempool_alloc_slab)
-		kasan_slab_alloc(pool->pool_data, element);
+		kasan_slab_alloc(pool->pool_data, element, flags);
 	if (pool->alloc == mempool_kmalloc)
-		kasan_krealloc(element, (size_t)pool->pool_data);
+		kasan_krealloc(element, (size_t)pool->pool_data, flags);
 	if (pool->alloc == mempool_alloc_pages)
 		kasan_alloc_pages(element, (unsigned long)pool->pool_data);
 }
@@ -130,12 +130,12 @@ static void add_element(mempool_t *pool, void *element)
 	pool->elements[pool->curr_nr++] = element;
 }
 
-static void *remove_element(mempool_t *pool)
+static void *remove_element(mempool_t *pool, gfp_t flags)
 {
 	void *element = pool->elements[--pool->curr_nr];
 
 	BUG_ON(pool->curr_nr < 0);
-	kasan_unpoison_element(pool, element);
+	kasan_unpoison_element(pool, element, flags);
 	check_element(pool, element);
 	return element;
 }
@@ -154,7 +154,7 @@ void mempool_destroy(mempool_t *pool)
 		return;
 
 	while (pool->curr_nr) {
-		void *element = remove_element(pool);
+		void *element = remove_element(pool, GFP_KERNEL);
 		pool->free(element, pool->pool_data);
 	}
 	kfree(pool->elements);
@@ -250,7 +250,7 @@ int mempool_resize(mempool_t *pool, int new_min_nr)
 	spin_lock_irqsave(&pool->lock, flags);
 	if (new_min_nr <= pool->min_nr) {
 		while (new_min_nr < pool->curr_nr) {
-			element = remove_element(pool);
+			element = remove_element(pool, GFP_KERNEL);
 			spin_unlock_irqrestore(&pool->lock, flags);
 			pool->free(element, pool->pool_data);
 			spin_lock_irqsave(&pool->lock, flags);
@@ -347,7 +347,7 @@ repeat_alloc:
 
 	spin_lock_irqsave(&pool->lock, flags);
 	if (likely(pool->curr_nr)) {
-		element = remove_element(pool);
+		element = remove_element(pool, gfp_temp);
 		spin_unlock_irqrestore(&pool->lock, flags);
 		/* paired with rmb in mempool_free(), read comment there */
 		smp_wmb();
diff --git a/mm/slab.c b/mm/slab.c
index 7515578471d8..17e2848979c5 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3378,7 +3378,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 {
 	void *ret = slab_alloc(cachep, flags, _RET_IP_);
 
-	kasan_slab_alloc(cachep, ret);
+	kasan_slab_alloc(cachep, ret, flags);
 	trace_kmem_cache_alloc(_RET_IP_, ret,
 			       cachep->object_size, cachep->size, flags);
 
@@ -3444,7 +3444,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
 
 	ret = slab_alloc(cachep, flags, _RET_IP_);
 
-	kasan_kmalloc(cachep, ret, size);
+	kasan_kmalloc(cachep, ret, size, flags);
 	trace_kmalloc(_RET_IP_, ret,
 		      size, cachep->size, flags);
 	return ret;
@@ -3468,7 +3468,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
 	void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
 
-	kasan_slab_alloc(cachep, ret);
+	kasan_slab_alloc(cachep, ret, flags);
 	trace_kmem_cache_alloc_node(_RET_IP_, ret,
 				    cachep->object_size, cachep->size,
 				    flags, nodeid);
@@ -3486,7 +3486,8 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
 	void *ret;
 
 	ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
-	kasan_kmalloc(cachep, ret, size);
+
+	kasan_kmalloc(cachep, ret, size, flags);
 	trace_kmalloc_node(_RET_IP_, ret,
 			   size, cachep->size,
 			   flags, nodeid);
@@ -3505,7 +3506,7 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
 	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
 		return cachep;
 	ret = kmem_cache_alloc_node_trace(cachep, flags, node, size);
-	kasan_kmalloc(cachep, ret, size);
+	kasan_kmalloc(cachep, ret, size, flags);
 
 	return ret;
 }
@@ -3541,7 +3542,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 		return cachep;
 	ret = slab_alloc(cachep, flags, caller);
 
-	kasan_kmalloc(cachep, ret, size);
+	kasan_kmalloc(cachep, ret, size, flags);
 	trace_kmalloc(caller, ret,
 		      size, cachep->size, flags);
 
@@ -4323,7 +4324,7 @@ size_t ksize(const void *objp)
 	/* We assume that ksize callers could use the whole allocated area,
 	 * so we need to unpoison this area.
 	 */
-	kasan_krealloc(objp, size);
+	kasan_krealloc(objp, size, GFP_NOWAIT);
 
 	return size;
 }
diff --git a/mm/slab.h b/mm/slab.h
index ff39a8fc3b3f..5969769fbee6 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -405,7 +405,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
 		kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
 		kmemleak_alloc_recursive(object, s->object_size, 1,
 					 s->flags, flags);
-		kasan_slab_alloc(s, object);
+		kasan_slab_alloc(s, object, flags);
 	}
 	memcg_kmem_put_cache(s);
 }
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 4de72e220c82..3239bfd758e6 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1013,7 +1013,7 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
 	page = alloc_kmem_pages(flags, order);
 	ret = page ? page_address(page) : NULL;
 	kmemleak_alloc(ret, size, 1, flags);
-	kasan_kmalloc_large(ret, size);
+	kasan_kmalloc_large(ret, size, flags);
 	return ret;
 }
 EXPORT_SYMBOL(kmalloc_order);
@@ -1192,7 +1192,7 @@ static __always_inline void *__do_krealloc(const void *p, size_t new_size,
 		ks = ksize(p);
 
 	if (ks >= new_size) {
-		kasan_krealloc((void *)p, new_size);
+		kasan_krealloc((void *)p, new_size, flags);
 		return (void *)p;
 	}
 
diff --git a/mm/slub.c b/mm/slub.c
index 7277413ebc8b..4dbb109eb8cd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1313,7 +1313,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node,
 static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
 {
 	kmemleak_alloc(ptr, size, 1, flags);
-	kasan_kmalloc_large(ptr, size);
+	kasan_kmalloc_large(ptr, size, flags);
 }
 
 static inline void kfree_hook(const void *x)
@@ -2596,7 +2596,7 @@ void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 {
 	void *ret = slab_alloc(s, gfpflags, _RET_IP_);
 	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
-	kasan_kmalloc(s, ret, size);
+	kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_trace);
@@ -2624,7 +2624,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 	trace_kmalloc_node(_RET_IP_, ret,
 			   size, s->size, gfpflags, node);
 
-	kasan_kmalloc(s, ret, size);
+	kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
@@ -3182,7 +3182,8 @@ static void early_kmem_cache_node_alloc(int node)
 	init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
 	init_tracking(kmem_cache_node, n);
 #endif
-	kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node));
+	kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
+		      GFP_KERNEL);
 	init_kmem_cache_node(n);
 	inc_slabs_node(kmem_cache_node, node, page->objects);
 
@@ -3561,7 +3562,7 @@ void *__kmalloc(size_t size, gfp_t flags)
 
 	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
 
-	kasan_kmalloc(s, ret, size);
+	kasan_kmalloc(s, ret, size, flags);
 
 	return ret;
 }
@@ -3606,7 +3607,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 
 	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
 
-	kasan_kmalloc(s, ret, size);
+	kasan_kmalloc(s, ret, size, flags);
 
 	return ret;
 }
@@ -3635,7 +3636,7 @@ size_t ksize(const void *object)
 	size_t size = __ksize(object);
 	/* We assume that ksize callers could use whole allocated area,
 	   so we need unpoison this area. */
-	kasan_krealloc(object, size);
+	kasan_krealloc(object, size, GFP_NOWAIT);
 	return size;
 }
 EXPORT_SYMBOL(ksize);
-- 
cgit v1.2.3


From be7635e7287e0e8013af3c89a6354a9e0182594c Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Fri, 25 Mar 2016 14:22:05 -0700
Subject: arch, ftrace: for KASAN put hard/soft IRQ entries into separate
 sections

KASAN needs to know whether the allocation happens in an IRQ handler.
This lets us strip everything below the IRQ entry point to reduce the
number of unique stack traces needed to be stored.

Move the definition of __irq_entry to <linux/interrupt.h> so that the
users don't need to pull in <linux/ftrace.h>.  Also introduce the
__softirq_entry macro which is similar to __irq_entry, but puts the
corresponding functions to the .softirqentry.text section.

Signed-off-by: Alexander Potapenko <glider@google.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrey Konovalov <adech.fo@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Konstantin Serebryany <kcc@google.com>
Cc: Dmitry Chernenkov <dmitryc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/exception.h     |  2 +-
 arch/arm/kernel/vmlinux.lds.S        |  1 +
 arch/arm64/include/asm/exception.h   |  2 +-
 arch/arm64/kernel/vmlinux.lds.S      |  1 +
 arch/blackfin/kernel/vmlinux.lds.S   |  1 +
 arch/c6x/kernel/vmlinux.lds.S        |  1 +
 arch/metag/kernel/vmlinux.lds.S      |  1 +
 arch/microblaze/kernel/vmlinux.lds.S |  1 +
 arch/mips/kernel/vmlinux.lds.S       |  1 +
 arch/nios2/kernel/vmlinux.lds.S      |  1 +
 arch/openrisc/kernel/vmlinux.lds.S   |  1 +
 arch/parisc/kernel/vmlinux.lds.S     |  1 +
 arch/powerpc/kernel/vmlinux.lds.S    |  1 +
 arch/s390/kernel/vmlinux.lds.S       |  1 +
 arch/sh/kernel/vmlinux.lds.S         |  1 +
 arch/sparc/kernel/vmlinux.lds.S      |  1 +
 arch/tile/kernel/vmlinux.lds.S       |  1 +
 arch/x86/kernel/vmlinux.lds.S        |  1 +
 include/asm-generic/vmlinux.lds.h    | 12 +++++++++++-
 include/linux/ftrace.h               | 11 -----------
 include/linux/interrupt.h            | 20 ++++++++++++++++++++
 kernel/softirq.c                     |  2 +-
 kernel/trace/trace_functions_graph.c |  1 +
 23 files changed, 51 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/exception.h b/arch/arm/include/asm/exception.h
index 5abaf5bbd985..bf1991263d2d 100644
--- a/arch/arm/include/asm/exception.h
+++ b/arch/arm/include/asm/exception.h
@@ -7,7 +7,7 @@
 #ifndef __ASM_ARM_EXCEPTION_H
 #define __ASM_ARM_EXCEPTION_H
 
-#include <linux/ftrace.h>
+#include <linux/interrupt.h>
 
 #define __exception	__attribute__((section(".exception.text")))
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 1fab979daeaf..e2c6da096cef 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -108,6 +108,7 @@ SECTIONS
 			*(.exception.text)
 			__exception_text_end = .;
 			IRQENTRY_TEXT
+			SOFTIRQENTRY_TEXT
 			TEXT_TEXT
 			SCHED_TEXT
 			LOCK_TEXT
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 6cb7e1a6bc02..0c2eec490abf 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -18,7 +18,7 @@
 #ifndef __ASM_EXCEPTION_H
 #define __ASM_EXCEPTION_H
 
-#include <linux/ftrace.h>
+#include <linux/interrupt.h>
 
 #define __exception	__attribute__((section(".exception.text")))
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 37f624df68fa..5a1939a74ff3 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -103,6 +103,7 @@ SECTIONS
 			*(.exception.text)
 			__exception_text_end = .;
 			IRQENTRY_TEXT
+			SOFTIRQENTRY_TEXT
 			TEXT_TEXT
 			SCHED_TEXT
 			LOCK_TEXT
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index c9eec84aa258..d920b959ff3a 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -35,6 +35,7 @@ SECTIONS
 #endif
 		LOCK_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		KPROBES_TEXT
 #ifdef CONFIG_ROMKERNEL
 		__sinittext = .;
diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S
index 5a6e141d1641..50bc10f97bcb 100644
--- a/arch/c6x/kernel/vmlinux.lds.S
+++ b/arch/c6x/kernel/vmlinux.lds.S
@@ -72,6 +72,7 @@ SECTIONS
 		SCHED_TEXT
 		LOCK_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		KPROBES_TEXT
 		*(.fixup)
 		*(.gnu.warning)
diff --git a/arch/metag/kernel/vmlinux.lds.S b/arch/metag/kernel/vmlinux.lds.S
index e12055e88bfe..150ace92c7ad 100644
--- a/arch/metag/kernel/vmlinux.lds.S
+++ b/arch/metag/kernel/vmlinux.lds.S
@@ -24,6 +24,7 @@ SECTIONS
 	LOCK_TEXT
 	KPROBES_TEXT
 	IRQENTRY_TEXT
+	SOFTIRQENTRY_TEXT
 	*(.text.*)
 	*(.gnu.warning)
 	}
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index be9488d69734..0a47f0410554 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -36,6 +36,7 @@ SECTIONS {
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		. = ALIGN (4) ;
 		_etext = . ;
 	}
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 0a93e83cd014..54d653ee17e1 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -58,6 +58,7 @@ SECTIONS
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		*(.text.*)
 		*(.fixup)
 		*(.gnu.warning)
diff --git a/arch/nios2/kernel/vmlinux.lds.S b/arch/nios2/kernel/vmlinux.lds.S
index 326fab40a9de..e23e89539967 100644
--- a/arch/nios2/kernel/vmlinux.lds.S
+++ b/arch/nios2/kernel/vmlinux.lds.S
@@ -39,6 +39,7 @@ SECTIONS
 		SCHED_TEXT
 		LOCK_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		KPROBES_TEXT
 	} =0
 	_etext = .;
diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S
index 2d69a853b742..d936de4c07ca 100644
--- a/arch/openrisc/kernel/vmlinux.lds.S
+++ b/arch/openrisc/kernel/vmlinux.lds.S
@@ -50,6 +50,7 @@ SECTIONS
 	  LOCK_TEXT
 	  KPROBES_TEXT
 	  IRQENTRY_TEXT
+	  SOFTIRQENTRY_TEXT
 	  *(.fixup)
 	  *(.text.__*)
 	  _etext = .;
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 308f29081d46..f3ead0b6ce46 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -72,6 +72,7 @@ SECTIONS
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		*(.text.do_softirq)
 		*(.text.sys_exit)
 		*(.text.do_sigaltstack)
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index d41fd0af8980..2dd91f79de05 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -55,6 +55,7 @@ SECTIONS
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 
 #ifdef CONFIG_PPC32
 		*(.got1)
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 445657fe658c..0f41a8286378 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -28,6 +28,7 @@ SECTIONS
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		*(.fixup)
 		*(.gnu.warning)
 	} :text = 0x0700
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index db88cbf9eafd..235a4101999f 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -39,6 +39,7 @@ SECTIONS
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		*(.fixup)
 		*(.gnu.warning)
 		_etext = .;		/* End of text section */
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index f1a2f688b28a..aadd321aa05d 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -48,6 +48,7 @@ SECTIONS
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		*(.gnu.warning)
 	} = 0
 	_etext = .;
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S
index 0e059a0101ea..378f5d8d1ec8 100644
--- a/arch/tile/kernel/vmlinux.lds.S
+++ b/arch/tile/kernel/vmlinux.lds.S
@@ -45,6 +45,7 @@ SECTIONS
     LOCK_TEXT
     KPROBES_TEXT
     IRQENTRY_TEXT
+    SOFTIRQENTRY_TEXT
     __fix_text_end = .;   /* tile-cpack won't rearrange before this */
     ALIGN_FUNCTION();
     *(.hottext*)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index d239639e0c1d..4c941f88d405 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -101,6 +101,7 @@ SECTIONS
 		KPROBES_TEXT
 		ENTRY_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		*(.fixup)
 		*(.gnu.warning)
 		/* End of text section */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8f5a12ab2f2b..339125bb4d2c 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -456,7 +456,7 @@
 		*(.entry.text)						\
 		VMLINUX_SYMBOL(__entry_text_end) = .;
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
 #define IRQENTRY_TEXT							\
 		ALIGN_FUNCTION();					\
 		VMLINUX_SYMBOL(__irqentry_text_start) = .;		\
@@ -466,6 +466,16 @@
 #define IRQENTRY_TEXT
 #endif
 
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+#define SOFTIRQENTRY_TEXT						\
+		ALIGN_FUNCTION();					\
+		VMLINUX_SYMBOL(__softirqentry_text_start) = .;		\
+		*(.softirqentry.text)					\
+		VMLINUX_SYMBOL(__softirqentry_text_end) = .;
+#else
+#define SOFTIRQENTRY_TEXT
+#endif
+
 /* Section used for early init (in .S files) */
 #define HEAD_TEXT  *(.head.text)
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 6d9df3f7e334..dea12a6e413b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -811,16 +811,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
  */
 #define __notrace_funcgraph		notrace
 
-/*
- * We want to which function is an entrypoint of a hardirq.
- * That will help us to put a signal on output.
- */
-#define __irq_entry		 __attribute__((__section__(".irqentry.text")))
-
-/* Limits of hardirq entrypoints */
-extern char __irqentry_text_start[];
-extern char __irqentry_text_end[];
-
 #define FTRACE_NOTRACE_DEPTH 65536
 #define FTRACE_RETFUNC_DEPTH 50
 #define FTRACE_RETSTACK_ALLOC_SIZE 32
@@ -857,7 +847,6 @@ static inline void unpause_graph_tracing(void)
 #else /* !CONFIG_FUNCTION_GRAPH_TRACER */
 
 #define __notrace_funcgraph
-#define __irq_entry
 #define INIT_FTRACE_GRAPH
 
 static inline void ftrace_graph_init_task(struct task_struct *t) { }
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 358076eda364..9fcabeb07787 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -683,4 +683,24 @@ extern int early_irq_init(void);
 extern int arch_probe_nr_irqs(void);
 extern int arch_early_irq_init(void);
 
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+/*
+ * We want to know which function is an entrypoint of a hardirq or a softirq.
+ */
+#define __irq_entry		 __attribute__((__section__(".irqentry.text")))
+#define __softirq_entry  \
+	__attribute__((__section__(".softirqentry.text")))
+
+/* Limits of hardirq entrypoints */
+extern char __irqentry_text_start[];
+extern char __irqentry_text_end[];
+/* Limits of softirq entrypoints */
+extern char __softirqentry_text_start[];
+extern char __softirqentry_text_end[];
+
+#else
+#define __irq_entry
+#define __softirq_entry
+#endif
+
 #endif
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 8aae49dd7da8..17caf4b63342 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -227,7 +227,7 @@ static inline bool lockdep_softirq_start(void) { return false; }
 static inline void lockdep_softirq_end(bool in_hardirq) { }
 #endif
 
-asmlinkage __visible void __do_softirq(void)
+asmlinkage __visible void __softirq_entry __do_softirq(void)
 {
 	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
 	unsigned long old_flags = current->flags;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 91d6a63a2ea7..3a0244ff7ea8 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -8,6 +8,7 @@
  */
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
+#include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
 
-- 
cgit v1.2.3


From cd11016e5f5212c13c0cec7384a525edc93b4921 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Fri, 25 Mar 2016 14:22:08 -0700
Subject: mm, kasan: stackdepot implementation. Enable stackdepot for SLAB

Implement the stack depot and provide CONFIG_STACKDEPOT.  Stack depot
will allow KASAN store allocation/deallocation stack traces for memory
chunks.  The stack traces are stored in a hash table and referenced by
handles which reside in the kasan_alloc_meta and kasan_free_meta
structures in the allocated memory chunks.

IRQ stack traces are cut below the IRQ entry point to avoid unnecessary
duplication.

Right now stackdepot support is only enabled in SLAB allocator.  Once
KASAN features in SLAB are on par with those in SLUB we can switch SLUB
to stackdepot as well, thus removing the dependency on SLUB stack
bookkeeping, which wastes a lot of memory.

This patch is based on the "mm: kasan: stack depots" patch originally
prepared by Dmitry Chernenkov.

Joonsoo has said that he plans to reuse the stackdepot code for the
mm/page_owner.c debugging facility.

[akpm@linux-foundation.org: s/depot_stack_handle/depot_stack_handle_t]
[aryabinin@virtuozzo.com: comment style fixes]
Signed-off-by: Alexander Potapenko <glider@google.com>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrey Konovalov <adech.fo@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Konstantin Serebryany <kcc@google.com>
Cc: Dmitry Chernenkov <dmitryc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/Makefile   |   1 +
 include/linux/stackdepot.h |  32 +++++
 lib/Kconfig                |   4 +
 lib/Kconfig.kasan          |   1 +
 lib/Makefile               |   3 +
 lib/stackdepot.c           | 284 +++++++++++++++++++++++++++++++++++++++++++++
 mm/kasan/kasan.c           |  55 ++++++++-
 mm/kasan/kasan.h           |  11 +-
 mm/kasan/report.c          |  12 +-
 9 files changed, 391 insertions(+), 12 deletions(-)
 create mode 100644 include/linux/stackdepot.h
 create mode 100644 lib/stackdepot.c

(limited to 'include/linux')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index adaae2c781c1..616ebd22ef9a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -19,6 +19,7 @@ endif
 KASAN_SANITIZE_head$(BITS).o				:= n
 KASAN_SANITIZE_dumpstack.o				:= n
 KASAN_SANITIZE_dumpstack_$(BITS).o			:= n
+KASAN_SANITIZE_stacktrace.o := n
 
 OBJECT_FILES_NON_STANDARD_head_$(BITS).o		:= y
 OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o	:= y
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
new file mode 100644
index 000000000000..7978b3e2c1e1
--- /dev/null
+++ b/include/linux/stackdepot.h
@@ -0,0 +1,32 @@
+/*
+ * A generic stack depot implementation
+ *
+ * Author: Alexander Potapenko <glider@google.com>
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * Based on code by Dmitry Chernenkov.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_STACKDEPOT_H
+#define _LINUX_STACKDEPOT_H
+
+typedef u32 depot_stack_handle_t;
+
+struct stack_trace;
+
+depot_stack_handle_t depot_save_stack(struct stack_trace *trace, gfp_t flags);
+
+void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace);
+
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index 133ebc0c1773..3cca1222578e 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -536,4 +536,8 @@ config ARCH_HAS_PMEM_API
 config ARCH_HAS_MMIO_FLUSH
 	bool
 
+config STACKDEPOT
+	bool
+	select STACKTRACE
+
 endmenu
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 0e4d2b3b0aee..67d8c6838ba9 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -7,6 +7,7 @@ config KASAN
 	bool "KASan: runtime memory debugger"
 	depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB)
 	select CONSTRUCTORS
+	select STACKDEPOT if SLAB
 	help
 	  Enables kernel address sanitizer - runtime memory debugger,
 	  designed to find out-of-bounds accesses and use-after-free bugs.
diff --git a/lib/Makefile b/lib/Makefile
index a1de5b61ff40..7bd6fd436c97 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -181,6 +181,9 @@ obj-$(CONFIG_SG_SPLIT) += sg_split.o
 obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
 obj-$(CONFIG_IRQ_POLL) += irq_poll.o
 
+obj-$(CONFIG_STACKDEPOT) += stackdepot.o
+KASAN_SANITIZE_stackdepot.o := n
+
 libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
 	       fdt_empty_tree.o
 $(foreach file, $(libfdt_files), \
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
new file mode 100644
index 000000000000..654c9d87e83a
--- /dev/null
+++ b/lib/stackdepot.c
@@ -0,0 +1,284 @@
+/*
+ * Generic stack depot for storing stack traces.
+ *
+ * Some debugging tools need to save stack traces of certain events which can
+ * be later presented to the user. For example, KASAN needs to safe alloc and
+ * free stacks for each object, but storing two stack traces per object
+ * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for
+ * that).
+ *
+ * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc
+ * and free stacks repeat a lot, we save about 100x space.
+ * Stacks are never removed from depot, so we store them contiguously one after
+ * another in a contiguos memory allocation.
+ *
+ * Author: Alexander Potapenko <glider@google.com>
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * Based on code by Dmitry Chernenkov.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <linux/gfp.h>
+#include <linux/jhash.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/stacktrace.h>
+#include <linux/stackdepot.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
+
+#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */
+#define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER))
+#define STACK_ALLOC_ALIGN 4
+#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \
+					STACK_ALLOC_ALIGN)
+#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - STACK_ALLOC_OFFSET_BITS)
+#define STACK_ALLOC_SLABS_CAP 1024
+#define STACK_ALLOC_MAX_SLABS \
+	(((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \
+	 (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP)
+
+/* The compact structure to store the reference to stacks. */
+union handle_parts {
+	depot_stack_handle_t handle;
+	struct {
+		u32 slabindex : STACK_ALLOC_INDEX_BITS;
+		u32 offset : STACK_ALLOC_OFFSET_BITS;
+	};
+};
+
+struct stack_record {
+	struct stack_record *next;	/* Link in the hashtable */
+	u32 hash;			/* Hash in the hastable */
+	u32 size;			/* Number of frames in the stack */
+	union handle_parts handle;
+	unsigned long entries[1];	/* Variable-sized array of entries. */
+};
+
+static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
+
+static int depot_index;
+static int next_slab_inited;
+static size_t depot_offset;
+static DEFINE_SPINLOCK(depot_lock);
+
+static bool init_stack_slab(void **prealloc)
+{
+	if (!*prealloc)
+		return false;
+	/*
+	 * This smp_load_acquire() pairs with smp_store_release() to
+	 * |next_slab_inited| below and in depot_alloc_stack().
+	 */
+	if (smp_load_acquire(&next_slab_inited))
+		return true;
+	if (stack_slabs[depot_index] == NULL) {
+		stack_slabs[depot_index] = *prealloc;
+	} else {
+		stack_slabs[depot_index + 1] = *prealloc;
+		/*
+		 * This smp_store_release pairs with smp_load_acquire() from
+		 * |next_slab_inited| above and in depot_save_stack().
+		 */
+		smp_store_release(&next_slab_inited, 1);
+	}
+	*prealloc = NULL;
+	return true;
+}
+
+/* Allocation of a new stack in raw storage */
+static struct stack_record *depot_alloc_stack(unsigned long *entries, int size,
+		u32 hash, void **prealloc, gfp_t alloc_flags)
+{
+	int required_size = offsetof(struct stack_record, entries) +
+		sizeof(unsigned long) * size;
+	struct stack_record *stack;
+
+	required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN);
+
+	if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) {
+		if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) {
+			WARN_ONCE(1, "Stack depot reached limit capacity");
+			return NULL;
+		}
+		depot_index++;
+		depot_offset = 0;
+		/*
+		 * smp_store_release() here pairs with smp_load_acquire() from
+		 * |next_slab_inited| in depot_save_stack() and
+		 * init_stack_slab().
+		 */
+		if (depot_index + 1 < STACK_ALLOC_MAX_SLABS)
+			smp_store_release(&next_slab_inited, 0);
+	}
+	init_stack_slab(prealloc);
+	if (stack_slabs[depot_index] == NULL)
+		return NULL;
+
+	stack = stack_slabs[depot_index] + depot_offset;
+
+	stack->hash = hash;
+	stack->size = size;
+	stack->handle.slabindex = depot_index;
+	stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
+	memcpy(stack->entries, entries, size * sizeof(unsigned long));
+	depot_offset += required_size;
+
+	return stack;
+}
+
+#define STACK_HASH_ORDER 20
+#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
+#define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
+#define STACK_HASH_SEED 0x9747b28c
+
+static struct stack_record *stack_table[STACK_HASH_SIZE] = {
+	[0 ...	STACK_HASH_SIZE - 1] = NULL
+};
+
+/* Calculate hash for a stack */
+static inline u32 hash_stack(unsigned long *entries, unsigned int size)
+{
+	return jhash2((u32 *)entries,
+			       size * sizeof(unsigned long) / sizeof(u32),
+			       STACK_HASH_SEED);
+}
+
+/* Find a stack that is equal to the one stored in entries in the hash */
+static inline struct stack_record *find_stack(struct stack_record *bucket,
+					     unsigned long *entries, int size,
+					     u32 hash)
+{
+	struct stack_record *found;
+
+	for (found = bucket; found; found = found->next) {
+		if (found->hash == hash &&
+		    found->size == size &&
+		    !memcmp(entries, found->entries,
+			    size * sizeof(unsigned long))) {
+			return found;
+		}
+	}
+	return NULL;
+}
+
+void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace)
+{
+	union handle_parts parts = { .handle = handle };
+	void *slab = stack_slabs[parts.slabindex];
+	size_t offset = parts.offset << STACK_ALLOC_ALIGN;
+	struct stack_record *stack = slab + offset;
+
+	trace->nr_entries = trace->max_entries = stack->size;
+	trace->entries = stack->entries;
+	trace->skip = 0;
+}
+
+/**
+ * depot_save_stack - save stack in a stack depot.
+ * @trace - the stacktrace to save.
+ * @alloc_flags - flags for allocating additional memory if required.
+ *
+ * Returns the handle of the stack struct stored in depot.
+ */
+depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
+				    gfp_t alloc_flags)
+{
+	u32 hash;
+	depot_stack_handle_t retval = 0;
+	struct stack_record *found = NULL, **bucket;
+	unsigned long flags;
+	struct page *page = NULL;
+	void *prealloc = NULL;
+
+	if (unlikely(trace->nr_entries == 0))
+		goto fast_exit;
+
+	hash = hash_stack(trace->entries, trace->nr_entries);
+	/* Bad luck, we won't store this stack. */
+	if (hash == 0)
+		goto exit;
+
+	bucket = &stack_table[hash & STACK_HASH_MASK];
+
+	/*
+	 * Fast path: look the stack trace up without locking.
+	 * The smp_load_acquire() here pairs with smp_store_release() to
+	 * |bucket| below.
+	 */
+	found = find_stack(smp_load_acquire(bucket), trace->entries,
+			   trace->nr_entries, hash);
+	if (found)
+		goto exit;
+
+	/*
+	 * Check if the current or the next stack slab need to be initialized.
+	 * If so, allocate the memory - we won't be able to do that under the
+	 * lock.
+	 *
+	 * The smp_load_acquire() here pairs with smp_store_release() to
+	 * |next_slab_inited| in depot_alloc_stack() and init_stack_slab().
+	 */
+	if (unlikely(!smp_load_acquire(&next_slab_inited))) {
+		/*
+		 * Zero out zone modifiers, as we don't have specific zone
+		 * requirements. Keep the flags related to allocation in atomic
+		 * contexts and I/O.
+		 */
+		alloc_flags &= ~GFP_ZONEMASK;
+		alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
+		page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
+		if (page)
+			prealloc = page_address(page);
+	}
+
+	spin_lock_irqsave(&depot_lock, flags);
+
+	found = find_stack(*bucket, trace->entries, trace->nr_entries, hash);
+	if (!found) {
+		struct stack_record *new =
+			depot_alloc_stack(trace->entries, trace->nr_entries,
+					  hash, &prealloc, alloc_flags);
+		if (new) {
+			new->next = *bucket;
+			/*
+			 * This smp_store_release() pairs with
+			 * smp_load_acquire() from |bucket| above.
+			 */
+			smp_store_release(bucket, new);
+			found = new;
+		}
+	} else if (prealloc) {
+		/*
+		 * We didn't need to store this stack trace, but let's keep
+		 * the preallocated memory for the future.
+		 */
+		WARN_ON(!init_stack_slab(&prealloc));
+	}
+
+	spin_unlock_irqrestore(&depot_lock, flags);
+exit:
+	if (prealloc) {
+		/* Nobody used this memory, ok to free it. */
+		free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
+	}
+	if (found)
+		retval = found->handle.handle;
+fast_exit:
+	return retval;
+}
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index cb998e0ec9d3..acb3b6c4dd89 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -17,7 +17,9 @@
 #define DISABLE_BRANCH_PROFILING
 
 #include <linux/export.h>
+#include <linux/interrupt.h>
 #include <linux/init.h>
+#include <linux/kasan.h>
 #include <linux/kernel.h>
 #include <linux/kmemleak.h>
 #include <linux/linkage.h>
@@ -32,7 +34,6 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/vmalloc.h>
-#include <linux/kasan.h>
 
 #include "kasan.h"
 #include "../slab.h"
@@ -413,23 +414,65 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object)
 #endif
 }
 
-static inline void set_track(struct kasan_track *track)
+#ifdef CONFIG_SLAB
+static inline int in_irqentry_text(unsigned long ptr)
+{
+	return (ptr >= (unsigned long)&__irqentry_text_start &&
+		ptr < (unsigned long)&__irqentry_text_end) ||
+		(ptr >= (unsigned long)&__softirqentry_text_start &&
+		 ptr < (unsigned long)&__softirqentry_text_end);
+}
+
+static inline void filter_irq_stacks(struct stack_trace *trace)
+{
+	int i;
+
+	if (!trace->nr_entries)
+		return;
+	for (i = 0; i < trace->nr_entries; i++)
+		if (in_irqentry_text(trace->entries[i])) {
+			/* Include the irqentry function into the stack. */
+			trace->nr_entries = i + 1;
+			break;
+		}
+}
+
+static inline depot_stack_handle_t save_stack(gfp_t flags)
+{
+	unsigned long entries[KASAN_STACK_DEPTH];
+	struct stack_trace trace = {
+		.nr_entries = 0,
+		.entries = entries,
+		.max_entries = KASAN_STACK_DEPTH,
+		.skip = 0
+	};
+
+	save_stack_trace(&trace);
+	filter_irq_stacks(&trace);
+	if (trace.nr_entries != 0 &&
+	    trace.entries[trace.nr_entries-1] == ULONG_MAX)
+		trace.nr_entries--;
+
+	return depot_save_stack(&trace, flags);
+}
+
+static inline void set_track(struct kasan_track *track, gfp_t flags)
 {
-	track->cpu = raw_smp_processor_id();
 	track->pid = current->pid;
-	track->when = jiffies;
+	track->stack = save_stack(flags);
 }
 
-#ifdef CONFIG_SLAB
 struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
 					const void *object)
 {
+	BUILD_BUG_ON(sizeof(struct kasan_alloc_meta) > 32);
 	return (void *)object + cache->kasan_info.alloc_meta_offset;
 }
 
 struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
 				      const void *object)
 {
+	BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32);
 	return (void *)object + cache->kasan_info.free_meta_offset;
 }
 #endif
@@ -486,7 +529,7 @@ void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,
 
 		alloc_info->state = KASAN_STATE_ALLOC;
 		alloc_info->alloc_size = size;
-		set_track(&alloc_info->track);
+		set_track(&alloc_info->track, flags);
 	}
 #endif
 }
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 7b9e4ab9b66b..30a2f0ba0e09 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -2,6 +2,7 @@
 #define __MM_KASAN_KASAN_H
 
 #include <linux/kasan.h>
+#include <linux/stackdepot.h>
 
 #define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT)
 #define KASAN_SHADOW_MASK       (KASAN_SHADOW_SCALE_SIZE - 1)
@@ -64,16 +65,18 @@ enum kasan_state {
 	KASAN_STATE_FREE
 };
 
+#define KASAN_STACK_DEPTH 64
+
 struct kasan_track {
-	u64 cpu : 6;			/* for NR_CPUS = 64 */
-	u64 pid : 16;			/* 65536 processes */
-	u64 when : 42;			/* ~140 years */
+	u32 pid;
+	depot_stack_handle_t stack;
 };
 
 struct kasan_alloc_meta {
+	struct kasan_track track;
 	u32 state : 2;	/* enum kasan_state */
 	u32 alloc_size : 30;
-	struct kasan_track track;
+	u32 reserved;
 };
 
 struct kasan_free_meta {
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 3e3385cc97ac..60869a5a0124 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -18,6 +18,7 @@
 #include <linux/printk.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/stackdepot.h>
 #include <linux/stacktrace.h>
 #include <linux/string.h>
 #include <linux/types.h>
@@ -118,8 +119,15 @@ static inline bool init_task_stack_addr(const void *addr)
 #ifdef CONFIG_SLAB
 static void print_track(struct kasan_track *track)
 {
-	pr_err("PID = %u, CPU = %u, timestamp = %lu\n", track->pid,
-	       track->cpu, (unsigned long)track->when);
+	pr_err("PID = %u\n", track->pid);
+	if (track->stack) {
+		struct stack_trace trace;
+
+		depot_fetch_stack(track->stack, &trace);
+		print_stack_trace(&trace, 0);
+	} else {
+		pr_err("(stack is not available)\n");
+	}
 }
 
 static void object_err(struct kmem_cache *cache, struct page *page,
-- 
cgit v1.2.3


From d101a125954eae1d397adda94ca6319485a50493 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Sat, 26 Mar 2016 16:14:37 -0400
Subject: fs: add file_dentry()

This series fixes bugs in nfs and ext4 due to 4bacc9c9234c ("overlayfs:
Make f_path always point to the overlay and f_inode to the underlay").

Regular files opened on overlayfs will result in the file being opened on
the underlying filesystem, while f_path points to the overlayfs
mount/dentry.

This confuses filesystems which get the dentry from struct file and assume
it's theirs.

Add a new helper, file_dentry() [*], to get the filesystem's own dentry
from the file.  This checks file->f_path.dentry->d_flags against
DCACHE_OP_REAL, and returns file->f_path.dentry if DCACHE_OP_REAL is not
set (this is the common, non-overlayfs case).

In the uncommon case it will call into overlayfs's ->d_real() to get the
underlying dentry, matching file_inode(file).

The reason we need to check against the inode is that if the file is copied
up while being open, d_real() would return the upper dentry, while the open
file comes from the lower dentry.

[*] If possible, it's better simply to use file_inode() instead.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: <stable@vger.kernel.org> # v4.2
Cc: David Howells <dhowells@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Daniel Axtens <dja@axtens.net>
---
 fs/dcache.c            |  5 ++++-
 fs/overlayfs/super.c   | 33 +++++++++++++++++++++++++++++++++
 include/linux/dcache.h | 10 ++++++++++
 include/linux/fs.h     | 10 ++++++++++
 4 files changed, 57 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 32ceae3e6112..d5ecc6e477da 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1667,7 +1667,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
 				DCACHE_OP_REVALIDATE	|
 				DCACHE_OP_WEAK_REVALIDATE	|
 				DCACHE_OP_DELETE	|
-				DCACHE_OP_SELECT_INODE));
+				DCACHE_OP_SELECT_INODE	|
+				DCACHE_OP_REAL));
 	dentry->d_op = op;
 	if (!op)
 		return;
@@ -1685,6 +1686,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
 		dentry->d_flags |= DCACHE_OP_PRUNE;
 	if (op->d_select_inode)
 		dentry->d_flags |= DCACHE_OP_SELECT_INODE;
+	if (op->d_real)
+		dentry->d_flags |= DCACHE_OP_REAL;
 
 }
 EXPORT_SYMBOL(d_set_d_op);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index ef64984c9bbc..5d972e6cd3fe 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -295,6 +295,37 @@ static void ovl_dentry_release(struct dentry *dentry)
 	}
 }
 
+static struct dentry *ovl_d_real(struct dentry *dentry, struct inode *inode)
+{
+	struct dentry *real;
+
+	if (d_is_dir(dentry)) {
+		if (!inode || inode == d_inode(dentry))
+			return dentry;
+		goto bug;
+	}
+
+	real = ovl_dentry_upper(dentry);
+	if (real && (!inode || inode == d_inode(real)))
+		return real;
+
+	real = ovl_dentry_lower(dentry);
+	if (!real)
+		goto bug;
+
+	if (!inode || inode == d_inode(real))
+		return real;
+
+	/* Handle recursion */
+	if (real->d_flags & DCACHE_OP_REAL)
+		return real->d_op->d_real(real, inode);
+
+bug:
+	WARN(1, "ovl_d_real(%pd4, %s:%lu\n): real dentry not found\n", dentry,
+	     inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
+	return dentry;
+}
+
 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct ovl_entry *oe = dentry->d_fsdata;
@@ -339,11 +370,13 @@ static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
 static const struct dentry_operations ovl_dentry_operations = {
 	.d_release = ovl_dentry_release,
 	.d_select_inode = ovl_d_select_inode,
+	.d_real = ovl_d_real,
 };
 
 static const struct dentry_operations ovl_reval_dentry_operations = {
 	.d_release = ovl_dentry_release,
 	.d_select_inode = ovl_d_select_inode,
+	.d_real = ovl_d_real,
 	.d_revalidate = ovl_dentry_revalidate,
 	.d_weak_revalidate = ovl_dentry_weak_revalidate,
 };
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 7cb043d8f4e8..4bb4de8d95ea 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -161,6 +161,7 @@ struct dentry_operations {
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(struct dentry *, bool);
 	struct inode *(*d_select_inode)(struct dentry *, unsigned);
+	struct dentry *(*d_real)(struct dentry *, struct inode *);
 } ____cacheline_aligned;
 
 /*
@@ -229,6 +230,7 @@ struct dentry_operations {
 #define DCACHE_OP_SELECT_INODE		0x02000000 /* Unioned entry: dcache op selects inode */
 
 #define DCACHE_ENCRYPTED_WITH_KEY	0x04000000 /* dir is encrypted with a valid key */
+#define DCACHE_OP_REAL			0x08000000
 
 extern seqlock_t rename_lock;
 
@@ -555,4 +557,12 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
 	return upper;
 }
 
+static inline struct dentry *d_real(struct dentry *dentry)
+{
+	if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
+		return dentry->d_op->d_real(dentry, NULL);
+	else
+		return dentry;
+}
+
 #endif	/* __LINUX_DCACHE_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 35d99266ca9a..b2ed2311b021 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1241,6 +1241,16 @@ static inline struct inode *file_inode(const struct file *f)
 	return f->f_inode;
 }
 
+static inline struct dentry *file_dentry(const struct file *file)
+{
+	struct dentry *dentry = file->f_path.dentry;
+
+	if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
+		return dentry->d_op->d_real(dentry, file_inode(file));
+	else
+		return dentry;
+}
+
 static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
 {
 	return locks_lock_inode_wait(file_inode(filp), fl);
-- 
cgit v1.2.3


From 596cf3fe5854fe2b1703b0466ed6bf9cfb83c91e Mon Sep 17 00:00:00 2001
From: Vishwanath Pai <vpai@akamai.com>
Date: Wed, 16 Mar 2016 21:49:00 +0100
Subject: netfilter: ipset: fix race condition in ipset save, swap and delete

This fix adds a new reference counter (ref_netlink) for the struct ip_set.
The other reference counter (ref) can be swapped out by ip_set_swap and we
need a separate counter to keep track of references for netlink events
like dump. Using the same ref counter for dump causes a race condition
which can be demonstrated by the following script:

ipset create hash_ip1 hash:ip family inet hashsize 1024 maxelem 500000 \
counters
ipset create hash_ip2 hash:ip family inet hashsize 300000 maxelem 500000 \
counters
ipset create hash_ip3 hash:ip family inet hashsize 1024 maxelem 500000 \
counters

ipset save &

ipset swap hash_ip3 hash_ip2
ipset destroy hash_ip3 /* will crash the machine */

Swap will exchange the values of ref so destroy will see ref = 0 instead of
ref = 1. With this fix in place swap will not succeed because ipset save
still has ref_netlink on the set (ip_set_swap doesn't swap ref_netlink).

Both delete and swap will error out if ref_netlink != 0 on the set.

Note: The changes to *_head functions is because previously we would
increment ref whenever we called these functions, we don't do that
anymore.

Reviewed-by: Joshua Hunt <johunt@akamai.com>
Signed-off-by: Vishwanath Pai <vpai@akamai.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h  |  4 ++++
 net/netfilter/ipset/ip_set_bitmap_gen.h |  2 +-
 net/netfilter/ipset/ip_set_core.c       | 33 ++++++++++++++++++++++++++++-----
 net/netfilter/ipset/ip_set_hash_gen.h   |  2 +-
 net/netfilter/ipset/ip_set_list_set.c   |  2 +-
 5 files changed, 35 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 0e1f433cc4b7..f48b8a664b0f 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -234,6 +234,10 @@ struct ip_set {
 	spinlock_t lock;
 	/* References to the set */
 	u32 ref;
+	/* References to the set for netlink events like dump,
+	 * ref can be swapped out by ip_set_swap
+	 */
+	u32 ref_netlink;
 	/* The core set type */
 	struct ip_set_type *type;
 	/* The type variant doing the real job */
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index b0bc475f641e..2e8e7e5fb4a6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -95,7 +95,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 	if (!nested)
 		goto nla_put_failure;
 	if (mtype_do_head(skb, map) ||
-	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
 		goto nla_put_failure;
 	if (unlikely(ip_set_put_flags(skb, set)))
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 7e6568cad494..a748b0c2c981 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -497,6 +497,26 @@ __ip_set_put(struct ip_set *set)
 	write_unlock_bh(&ip_set_ref_lock);
 }
 
+/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
+ * a separate reference counter
+ */
+static inline void
+__ip_set_get_netlink(struct ip_set *set)
+{
+	write_lock_bh(&ip_set_ref_lock);
+	set->ref_netlink++;
+	write_unlock_bh(&ip_set_ref_lock);
+}
+
+static inline void
+__ip_set_put_netlink(struct ip_set *set)
+{
+	write_lock_bh(&ip_set_ref_lock);
+	BUG_ON(set->ref_netlink == 0);
+	set->ref_netlink--;
+	write_unlock_bh(&ip_set_ref_lock);
+}
+
 /* Add, del and test set entries from kernel.
  *
  * The set behind the index must exist and must be referenced
@@ -1002,7 +1022,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
 	if (!attr[IPSET_ATTR_SETNAME]) {
 		for (i = 0; i < inst->ip_set_max; i++) {
 			s = ip_set(inst, i);
-			if (s && s->ref) {
+			if (s && (s->ref || s->ref_netlink)) {
 				ret = -IPSET_ERR_BUSY;
 				goto out;
 			}
@@ -1024,7 +1044,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
 		if (!s) {
 			ret = -ENOENT;
 			goto out;
-		} else if (s->ref) {
+		} else if (s->ref || s->ref_netlink) {
 			ret = -IPSET_ERR_BUSY;
 			goto out;
 		}
@@ -1171,6 +1191,9 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 	      from->family == to->family))
 		return -IPSET_ERR_TYPE_MISMATCH;
 
+	if (from->ref_netlink || to->ref_netlink)
+		return -EBUSY;
+
 	strncpy(from_name, from->name, IPSET_MAXNAMELEN);
 	strncpy(from->name, to->name, IPSET_MAXNAMELEN);
 	strncpy(to->name, from_name, IPSET_MAXNAMELEN);
@@ -1206,7 +1229,7 @@ ip_set_dump_done(struct netlink_callback *cb)
 		if (set->variant->uref)
 			set->variant->uref(set, cb, false);
 		pr_debug("release set %s\n", set->name);
-		__ip_set_put_byindex(inst, index);
+		__ip_set_put_netlink(set);
 	}
 	return 0;
 }
@@ -1328,7 +1351,7 @@ dump_last:
 		if (!cb->args[IPSET_CB_ARG0]) {
 			/* Start listing: make sure set won't be destroyed */
 			pr_debug("reference set\n");
-			set->ref++;
+			set->ref_netlink++;
 		}
 		write_unlock_bh(&ip_set_ref_lock);
 		nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
@@ -1396,7 +1419,7 @@ release_refcount:
 		if (set->variant->uref)
 			set->variant->uref(set, cb, false);
 		pr_debug("release set %s\n", set->name);
-		__ip_set_put_byindex(inst, index);
+		__ip_set_put_netlink(set);
 		cb->args[IPSET_CB_ARG0] = 0;
 	}
 out:
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index e5336ab36d67..d32fd6b036bf 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -1082,7 +1082,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 	if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
 		goto nla_put_failure;
 #endif
-	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
 		goto nla_put_failure;
 	if (unlikely(ip_set_put_flags(skb, set)))
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 24c6c1962aea..a2a89e4e0a14 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -458,7 +458,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
 	if (!nested)
 		goto nla_put_failure;
 	if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
-	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
 			  htonl(sizeof(*map) + n * set->dsize)))
 		goto nla_put_failure;
-- 
cgit v1.2.3


From fc0c2028135c7f75fce36b90e44efb8003a9173b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Mar 2016 10:30:19 -0800
Subject: x86, pmem: use memcpy_mcsafe() for memcpy_from_pmem()

Update the definition of memcpy_from_pmem() to return 0 or a negative
error code.  Implement x86/arch_memcpy_from_pmem() with memcpy_mcsafe().

Cc: Borislav Petkov <bp@alien8.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h |  9 +++++++++
 drivers/nvdimm/pmem.c       |  4 ++--
 include/linux/pmem.h        | 22 ++++++++++++++++------
 3 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index bf8b35d2035a..fbc5e92e1ecc 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -47,6 +47,15 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
 		BUG();
 }
 
+static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
+		size_t n)
+{
+	if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
+		return memcpy_mcsafe(dst, (void __force *) src, n);
+	memcpy(dst, (void __force *) src, n);
+	return 0;
+}
+
 /**
  * arch_wmb_pmem - synchronize writes to persistent memory
  *
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index ca5721c306bb..cc31c6f1f88e 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -99,7 +99,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 		if (unlikely(bad_pmem))
 			rc = -EIO;
 		else {
-			memcpy_from_pmem(mem + off, pmem_addr, len);
+			rc = memcpy_from_pmem(mem + off, pmem_addr, len);
 			flush_dcache_page(page);
 		}
 	} else {
@@ -295,7 +295,7 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns,
 
 		if (unlikely(is_bad_pmem(&pmem->bb, offset / 512, sz_align)))
 			return -EIO;
-		memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
+		return memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
 	} else {
 		memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
 		wmb_pmem();
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 3ec5309e29f3..ac6d872ce067 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -42,6 +42,13 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
 	BUG();
 }
 
+static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
+		size_t n)
+{
+	BUG();
+	return -EFAULT;
+}
+
 static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
 		struct iov_iter *i)
 {
@@ -66,14 +73,17 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
 #endif
 
 /*
- * Architectures that define ARCH_HAS_PMEM_API must provide
- * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
- * arch_copy_from_iter_pmem(), arch_clear_pmem(), arch_wb_cache_pmem()
- * and arch_has_wmb_pmem().
+ * memcpy_from_pmem - read from persistent memory with error handling
+ * @dst: destination buffer
+ * @src: source buffer
+ * @size: transfer length
+ *
+ * Returns 0 on success negative error code on failure.
  */
-static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
+static inline int memcpy_from_pmem(void *dst, void __pmem const *src,
+		size_t size)
 {
-	memcpy(dst, (void __force const *) src, size);
+	return arch_memcpy_from_pmem(dst, src, size);
 }
 
 static inline bool arch_has_pmem_api(void)
-- 
cgit v1.2.3


From 5acba71e18833b9d06686b3751598bfa263a3ac3 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 24 Mar 2016 15:37:59 +0100
Subject: locking/atomic: Introduce atomic_fetch_or()

This is deemed to replace the type generic fetch_or() which brings a lot
of issues such as macro induced block variable aliasing and sloppy types.
Not to mention fetch_or() doesn't refer to any namespace, adding even
more confusion.

So lets provide an atomic_t version. Current and next users of fetch_or()
are thus encouraged to use atomic_t.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1458830281-4255-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/atomic.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index df4f369254c0..3d64c0852164 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -558,6 +558,27 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 }
 #endif
 
+/**
+ * atomic_fetch_or - perform *p |= mask and return old value of *p
+ * @p: pointer to atomic_t
+ * @mask: mask to OR on the atomic_t
+ */
+#ifndef atomic_fetch_or
+static inline int atomic_fetch_or(atomic_t *p, int mask)
+{
+	int old, val = atomic_read(p);
+
+	for (;;) {
+		old = atomic_cmpxchg(p, val, val | mask);
+		if (old == val)
+			break;
+		val = old;
+	}
+
+	return old;
+}
+#endif
+
 /**
  * fetch_or - perform *ptr |= mask and return old value of *ptr
  * @ptr: pointer to value
-- 
cgit v1.2.3


From f009a7a767e792d5ab0b46c08d46236ea5271dd9 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 24 Mar 2016 15:38:00 +0100
Subject: timers/nohz: Convert tick dependency mask to atomic_t

The tick dependency mask was intially unsigned long because this is the
type on which clear_bit() operates on and fetch_or() accepts it.

But now that we have atomic_fetch_or(), we can instead use
atomic_andnot() to clear the bit. This consolidates the type of our
tick dependency mask, reduce its size on structures and benefit from
possible architecture optimizations on atomic_t operations.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1458830281-4255-3-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    |  4 ++--
 kernel/time/tick-sched.c | 61 ++++++++++++++++++++++++------------------------
 kernel/time/tick-sched.h |  2 +-
 3 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 60bba7e032dc..52c4847b05e2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -720,7 +720,7 @@ struct signal_struct {
 	struct task_cputime cputime_expires;
 
 #ifdef CONFIG_NO_HZ_FULL
-	unsigned long tick_dep_mask;
+	atomic_t tick_dep_mask;
 #endif
 
 	struct list_head cpu_timers[3];
@@ -1549,7 +1549,7 @@ struct task_struct {
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
-	unsigned long tick_dep_mask;
+	atomic_t tick_dep_mask;
 #endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	u64 start_time;		/* monotonic time in nsec */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 084b79f5917e..58e3310c9b21 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -157,52 +157,50 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 cpumask_var_t tick_nohz_full_mask;
 cpumask_var_t housekeeping_mask;
 bool tick_nohz_full_running;
-static unsigned long tick_dep_mask;
+static atomic_t tick_dep_mask;
 
-static void trace_tick_dependency(unsigned long dep)
+static bool check_tick_dependency(atomic_t *dep)
 {
-	if (dep & TICK_DEP_MASK_POSIX_TIMER) {
+	int val = atomic_read(dep);
+
+	if (val & TICK_DEP_MASK_POSIX_TIMER) {
 		trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
-		return;
+		return true;
 	}
 
-	if (dep & TICK_DEP_MASK_PERF_EVENTS) {
+	if (val & TICK_DEP_MASK_PERF_EVENTS) {
 		trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
-		return;
+		return true;
 	}
 
-	if (dep & TICK_DEP_MASK_SCHED) {
+	if (val & TICK_DEP_MASK_SCHED) {
 		trace_tick_stop(0, TICK_DEP_MASK_SCHED);
-		return;
+		return true;
 	}
 
-	if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
+	if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
 		trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
+		return true;
+	}
+
+	return false;
 }
 
 static bool can_stop_full_tick(struct tick_sched *ts)
 {
 	WARN_ON_ONCE(!irqs_disabled());
 
-	if (tick_dep_mask) {
-		trace_tick_dependency(tick_dep_mask);
+	if (check_tick_dependency(&tick_dep_mask))
 		return false;
-	}
 
-	if (ts->tick_dep_mask) {
-		trace_tick_dependency(ts->tick_dep_mask);
+	if (check_tick_dependency(&ts->tick_dep_mask))
 		return false;
-	}
 
-	if (current->tick_dep_mask) {
-		trace_tick_dependency(current->tick_dep_mask);
+	if (check_tick_dependency(&current->tick_dep_mask))
 		return false;
-	}
 
-	if (current->signal->tick_dep_mask) {
-		trace_tick_dependency(current->signal->tick_dep_mask);
+	if (check_tick_dependency(&current->signal->tick_dep_mask))
 		return false;
-	}
 
 	return true;
 }
@@ -259,12 +257,12 @@ static void tick_nohz_full_kick_all(void)
 	preempt_enable();
 }
 
-static void tick_nohz_dep_set_all(unsigned long *dep,
+static void tick_nohz_dep_set_all(atomic_t *dep,
 				  enum tick_dep_bits bit)
 {
-	unsigned long prev;
+	int prev;
 
-	prev = fetch_or(dep, BIT_MASK(bit));
+	prev = atomic_fetch_or(dep, BIT(bit));
 	if (!prev)
 		tick_nohz_full_kick_all();
 }
@@ -280,7 +278,7 @@ void tick_nohz_dep_set(enum tick_dep_bits bit)
 
 void tick_nohz_dep_clear(enum tick_dep_bits bit)
 {
-	clear_bit(bit, &tick_dep_mask);
+	atomic_andnot(BIT(bit), &tick_dep_mask);
 }
 
 /*
@@ -289,12 +287,12 @@ void tick_nohz_dep_clear(enum tick_dep_bits bit)
  */
 void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
 {
-	unsigned long prev;
+	int prev;
 	struct tick_sched *ts;
 
 	ts = per_cpu_ptr(&tick_cpu_sched, cpu);
 
-	prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
+	prev = atomic_fetch_or(&ts->tick_dep_mask, BIT(bit));
 	if (!prev) {
 		preempt_disable();
 		/* Perf needs local kick that is NMI safe */
@@ -313,7 +311,7 @@ void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
 {
 	struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
 
-	clear_bit(bit, &ts->tick_dep_mask);
+	atomic_andnot(BIT(bit), &ts->tick_dep_mask);
 }
 
 /*
@@ -331,7 +329,7 @@ void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
 
 void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
 {
-	clear_bit(bit, &tsk->tick_dep_mask);
+	atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
 }
 
 /*
@@ -345,7 +343,7 @@ void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
 
 void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
 {
-	clear_bit(bit, &sig->tick_dep_mask);
+	atomic_andnot(BIT(bit), &sig->tick_dep_mask);
 }
 
 /*
@@ -366,7 +364,8 @@ void __tick_nohz_task_switch(void)
 	ts = this_cpu_ptr(&tick_cpu_sched);
 
 	if (ts->tick_stopped) {
-		if (current->tick_dep_mask || current->signal->tick_dep_mask)
+		if (atomic_read(&current->tick_dep_mask) ||
+		    atomic_read(&current->signal->tick_dep_mask))
 			tick_nohz_full_kick();
 	}
 out:
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index eb4e32566a83..bf38226e5c17 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -60,7 +60,7 @@ struct tick_sched {
 	u64				next_timer;
 	ktime_t				idle_expires;
 	int				do_timer_last;
-	unsigned long			tick_dep_mask;
+	atomic_t			tick_dep_mask;
 };
 
 extern struct tick_sched *tick_get_tick_sched(int cpu);
-- 
cgit v1.2.3


From 5529578a27288d11d4d15635c258c6dde0f0fb10 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 24 Mar 2016 15:38:01 +0100
Subject: locking/atomic, sched: Unexport fetch_or()

This patch functionally reverts:

  5fd7a09cfb8c ("atomic: Export fetch_or()")

During the merge Linus observed that the generic version of fetch_or()
was messy:

  " This makes the ugly "fetch_or()" macro that the scheduler used
    internally a new generic helper, and does a bad job at it. "

  e23604edac2a Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Now that we have introduced atomic_fetch_or(), fetch_or() is only used
by the scheduler in order to deal with thread_info flags which type
can vary across architectures.

Lets confine fetch_or() back to the scheduler so that we encourage
future users to use the more robust and well typed atomic_t version
instead.

While at it, fetch_or() gets robustified, pasting improvements from a
previous patch by Ingo Molnar that avoids needless expression
re-evaluations in the loop.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1458830281-4255-4-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/atomic.h | 21 ---------------------
 kernel/sched/core.c    | 18 ++++++++++++++++++
 2 files changed, 18 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 3d64c0852164..506c3531832e 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -579,27 +579,6 @@ static inline int atomic_fetch_or(atomic_t *p, int mask)
 }
 #endif
 
-/**
- * fetch_or - perform *ptr |= mask and return old value of *ptr
- * @ptr: pointer to value
- * @mask: mask to OR on the value
- *
- * cmpxchg based fetch_or, macro so it works for different integer types
- */
-#ifndef fetch_or
-#define fetch_or(ptr, mask)						\
-({	typeof(*(ptr)) __old, __val = *(ptr);				\
-	for (;;) {							\
-		__old = cmpxchg((ptr), __val, __val | (mask));		\
-		if (__old == __val)					\
-			break;						\
-		__val = __old;						\
-	}								\
-	__old;								\
-})
-#endif
-
-
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d8465eeab8b3..8b489fcac37b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -321,6 +321,24 @@ static inline void init_hrtick(void)
 }
 #endif	/* CONFIG_SCHED_HRTICK */
 
+/*
+ * cmpxchg based fetch_or, macro so it works for different integer types
+ */
+#define fetch_or(ptr, mask)						\
+	({								\
+		typeof(ptr) _ptr = (ptr);				\
+		typeof(mask) _mask = (mask);				\
+		typeof(*_ptr) _old, _val = *_ptr;			\
+									\
+		for (;;) {						\
+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
+			if (_old == _val)				\
+				break;					\
+			_val = _old;					\
+		}							\
+	_old;								\
+})
+
 #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
 /*
  * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
-- 
cgit v1.2.3


From 5a5abb1fa3b05dd6aa821525832644c1e7d2905f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 31 Mar 2016 02:13:18 +0200
Subject: tun, bpf: fix suspicious RCU usage in tun_{attach, detach}_filter

Sasha Levin reported a suspicious rcu_dereference_protected() warning
found while fuzzing with trinity that is similar to this one:

  [   52.765684] net/core/filter.c:2262 suspicious rcu_dereference_protected() usage!
  [   52.765688] other info that might help us debug this:
  [   52.765695] rcu_scheduler_active = 1, debug_locks = 1
  [   52.765701] 1 lock held by a.out/1525:
  [   52.765704]  #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff816a64b7>] rtnl_lock+0x17/0x20
  [   52.765721] stack backtrace:
  [   52.765728] CPU: 1 PID: 1525 Comm: a.out Not tainted 4.5.0+ #264
  [...]
  [   52.765768] Call Trace:
  [   52.765775]  [<ffffffff813e488d>] dump_stack+0x85/0xc8
  [   52.765784]  [<ffffffff810f2fa5>] lockdep_rcu_suspicious+0xd5/0x110
  [   52.765792]  [<ffffffff816afdc2>] sk_detach_filter+0x82/0x90
  [   52.765801]  [<ffffffffa0883425>] tun_detach_filter+0x35/0x90 [tun]
  [   52.765810]  [<ffffffffa0884ed4>] __tun_chr_ioctl+0x354/0x1130 [tun]
  [   52.765818]  [<ffffffff8136fed0>] ? selinux_file_ioctl+0x130/0x210
  [   52.765827]  [<ffffffffa0885ce3>] tun_chr_ioctl+0x13/0x20 [tun]
  [   52.765834]  [<ffffffff81260ea6>] do_vfs_ioctl+0x96/0x690
  [   52.765843]  [<ffffffff81364af3>] ? security_file_ioctl+0x43/0x60
  [   52.765850]  [<ffffffff81261519>] SyS_ioctl+0x79/0x90
  [   52.765858]  [<ffffffff81003ba2>] do_syscall_64+0x62/0x140
  [   52.765866]  [<ffffffff817d563f>] entry_SYSCALL64_slow_path+0x25/0x25

Same can be triggered with PROVE_RCU (+ PROVE_RCU_REPEATEDLY) enabled
from tun_attach_filter() when user space calls ioctl(tun_fd, TUN{ATTACH,
DETACH}FILTER, ...) for adding/removing a BPF filter on tap devices.

Since the fix in f91ff5b9ff52 ("net: sk_{detach|attach}_filter() rcu
fixes") sk_attach_filter()/sk_detach_filter() now dereferences the
filter with rcu_dereference_protected(), checking whether socket lock
is held in control path.

Since its introduction in 994051625981 ("tun: socket filter support"),
tap filters are managed under RTNL lock from __tun_chr_ioctl(). Thus the
sock_owned_by_user(sk) doesn't apply in this specific case and therefore
triggers the false positive.

Extend the BPF API with __sk_attach_filter()/__sk_detach_filter() pair
that is used by tap filters and pass in lockdep_rtnl_is_held() for the
rcu_dereference_protected() checks instead.

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c      |  8 +++++---
 include/linux/filter.h |  4 ++++
 net/core/filter.c      | 33 +++++++++++++++++++++------------
 3 files changed, 30 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index afdf950617c3..510e90a6bb26 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -622,7 +622,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte
 
 	/* Re-attach the filter to persist device */
 	if (!skip_filter && (tun->filter_attached == true)) {
-		err = sk_attach_filter(&tun->fprog, tfile->socket.sk);
+		err = __sk_attach_filter(&tun->fprog, tfile->socket.sk,
+					 lockdep_rtnl_is_held());
 		if (!err)
 			goto out;
 	}
@@ -1822,7 +1823,7 @@ static void tun_detach_filter(struct tun_struct *tun, int n)
 
 	for (i = 0; i < n; i++) {
 		tfile = rtnl_dereference(tun->tfiles[i]);
-		sk_detach_filter(tfile->socket.sk);
+		__sk_detach_filter(tfile->socket.sk, lockdep_rtnl_is_held());
 	}
 
 	tun->filter_attached = false;
@@ -1835,7 +1836,8 @@ static int tun_attach_filter(struct tun_struct *tun)
 
 	for (i = 0; i < tun->numqueues; i++) {
 		tfile = rtnl_dereference(tun->tfiles[i]);
-		ret = sk_attach_filter(&tun->fprog, tfile->socket.sk);
+		ret = __sk_attach_filter(&tun->fprog, tfile->socket.sk,
+					 lockdep_rtnl_is_held());
 		if (ret) {
 			tun_detach_filter(tun, i);
 			return ret;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 43aa1f8855c7..a51a5361695f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -465,10 +465,14 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
 void bpf_prog_destroy(struct bpf_prog *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
+int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
+		       bool locked);
 int sk_attach_bpf(u32 ufd, struct sock *sk);
 int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
 int sk_detach_filter(struct sock *sk);
+int __sk_detach_filter(struct sock *sk, bool locked);
+
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 4b81b71171b4..ca7f832b2980 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1149,7 +1149,8 @@ void bpf_prog_destroy(struct bpf_prog *fp)
 }
 EXPORT_SYMBOL_GPL(bpf_prog_destroy);
 
-static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
+static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk,
+			    bool locked)
 {
 	struct sk_filter *fp, *old_fp;
 
@@ -1165,10 +1166,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
 		return -ENOMEM;
 	}
 
-	old_fp = rcu_dereference_protected(sk->sk_filter,
-					   sock_owned_by_user(sk));
+	old_fp = rcu_dereference_protected(sk->sk_filter, locked);
 	rcu_assign_pointer(sk->sk_filter, fp);
-
 	if (old_fp)
 		sk_filter_uncharge(sk, old_fp);
 
@@ -1247,7 +1246,8 @@ struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
  * occurs or there is insufficient memory for the filter a negative
  * errno code is returned. On success the return is zero.
  */
-int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
+		       bool locked)
 {
 	struct bpf_prog *prog = __get_filter(fprog, sk);
 	int err;
@@ -1255,7 +1255,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
-	err = __sk_attach_prog(prog, sk);
+	err = __sk_attach_prog(prog, sk, locked);
 	if (err < 0) {
 		__bpf_prog_release(prog);
 		return err;
@@ -1263,7 +1263,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(sk_attach_filter);
+EXPORT_SYMBOL_GPL(__sk_attach_filter);
+
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+	return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk));
+}
 
 int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 {
@@ -1309,7 +1314,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
-	err = __sk_attach_prog(prog, sk);
+	err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk));
 	if (err < 0) {
 		bpf_prog_put(prog);
 		return err;
@@ -2250,7 +2255,7 @@ static int __init register_sk_filter_ops(void)
 }
 late_initcall(register_sk_filter_ops);
 
-int sk_detach_filter(struct sock *sk)
+int __sk_detach_filter(struct sock *sk, bool locked)
 {
 	int ret = -ENOENT;
 	struct sk_filter *filter;
@@ -2258,8 +2263,7 @@ int sk_detach_filter(struct sock *sk)
 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
 		return -EPERM;
 
-	filter = rcu_dereference_protected(sk->sk_filter,
-					   sock_owned_by_user(sk));
+	filter = rcu_dereference_protected(sk->sk_filter, locked);
 	if (filter) {
 		RCU_INIT_POINTER(sk->sk_filter, NULL);
 		sk_filter_uncharge(sk, filter);
@@ -2268,7 +2272,12 @@ int sk_detach_filter(struct sock *sk)
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(sk_detach_filter);
+EXPORT_SYMBOL_GPL(__sk_detach_filter);
+
+int sk_detach_filter(struct sock *sk)
+{
+	return __sk_detach_filter(sk, sock_owned_by_user(sk));
+}
 
 int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
 		  unsigned int len)
-- 
cgit v1.2.3


From d7e944c8ddc0983640a9a32868fb217485d12ca2 Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Fri, 1 Apr 2016 09:07:15 +0200
Subject: Revert "stmmac: Fix 'eth0: No PHY found' regression"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 88f8b1bb41c6208f81b6a480244533ded7b59493.
due to problems on GeekBox and Banana Pi M1 board when
connected to a real transceiver instead of a switch via
fixed-link.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Gabriel Fernandez <gabriel.fernandez@linaro.org>
Cc: Andreas Färber <afaerber@suse.de>
Cc: Frank Schäfer <fschaefer.oss@googlemail.com>
Cc: Dinh Nguyen <dinh.linux@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c     | 11 ++++++++++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c |  9 +--------
 include/linux/stmmac.h                                |  1 -
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index ea76129dafc2..af09ced53bd8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -199,12 +199,21 @@ int stmmac_mdio_register(struct net_device *ndev)
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data;
 	int addr, found;
-	struct device_node *mdio_node = priv->plat->mdio_node;
+	struct device_node *mdio_node = NULL;
+	struct device_node *child_node = NULL;
 
 	if (!mdio_bus_data)
 		return 0;
 
 	if (IS_ENABLED(CONFIG_OF)) {
+		for_each_child_of_node(priv->device->of_node, child_node) {
+			if (of_device_is_compatible(child_node,
+						    "snps,dwmac-mdio")) {
+				mdio_node = child_node;
+				break;
+			}
+		}
+
 		if (mdio_node) {
 			netdev_dbg(ndev, "FOUND MDIO subnode\n");
 		} else {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index dcbd2a1601e8..9cf181f839fd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -146,7 +146,6 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 	struct device_node *np = pdev->dev.of_node;
 	struct plat_stmmacenet_data *plat;
 	struct stmmac_dma_cfg *dma_cfg;
-	struct device_node *child_node = NULL;
 
 	plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
 	if (!plat)
@@ -177,19 +176,13 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 		plat->phy_node = of_node_get(np);
 	}
 
-	for_each_child_of_node(np, child_node)
-		if (of_device_is_compatible(child_node,	"snps,dwmac-mdio")) {
-			plat->mdio_node = child_node;
-			break;
-		}
-
 	/* "snps,phy-addr" is not a standard property. Mark it as deprecated
 	 * and warn of its use. Remove this when phy node support is added.
 	 */
 	if (of_property_read_u32(np, "snps,phy-addr", &plat->phy_addr) == 0)
 		dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n");
 
-	if ((plat->phy_node && !of_phy_is_fixed_link(np)) || !plat->mdio_node)
+	if ((plat->phy_node && !of_phy_is_fixed_link(np)) || plat->phy_bus_name)
 		plat->mdio_bus_data = NULL;
 	else
 		plat->mdio_bus_data =
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 4bcf5a61aada..6e53fa8942a4 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -114,7 +114,6 @@ struct plat_stmmacenet_data {
 	int interface;
 	struct stmmac_mdio_bus_data *mdio_bus_data;
 	struct device_node *phy_node;
-	struct device_node *mdio_node;
 	struct stmmac_dma_cfg *dma_cfg;
 	int clk_csr;
 	int has_gmac;
-- 
cgit v1.2.3


From a7657f128c279ae5796ab2ca7d04a7819f4259f0 Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Fri, 1 Apr 2016 09:07:16 +0200
Subject: stmmac: fix MDIO settings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Initially the phy_bus_name was added to manipulate the
driver name but it was recently just used to manage the
fixed-link and then to take some decision at run-time.
So the patch uses the is_pseudo_fixed_link and removes
the phy_bus_name variable not necessary anymore.

The driver can manage the mdio registration by using phy-handle,
dwmac-mdio and own parameter e.g. snps,phy-addr.
This patch takes care about all these possible configurations
and fixes the mdio registration in case of there is a real
transceiver or a switch (that needs to be managed by using
fixed-link).

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Reviewed-by: Andreas Färber <afaerber@suse.de>
Tested-by: Frank Schäfer <fschaefer.oss@googlemail.com>
Cc: Gabriel Fernandez <gabriel.fernandez@linaro.org>
Cc: Dinh Nguyen <dinh.linux@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Phil Reid <preid@electromag.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 16 ++---
 drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c  | 19 +----
 .../net/ethernet/stmicro/stmmac/stmmac_platform.c  | 84 +++++++++++++++++-----
 include/linux/stmmac.h                             |  2 +-
 4 files changed, 73 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 4c5ce9848ca9..78464fa7fe1f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -278,7 +278,6 @@ static void stmmac_eee_ctrl_timer(unsigned long arg)
  */
 bool stmmac_eee_init(struct stmmac_priv *priv)
 {
-	char *phy_bus_name = priv->plat->phy_bus_name;
 	unsigned long flags;
 	bool ret = false;
 
@@ -290,7 +289,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
 		goto out;
 
 	/* Never init EEE in case of a switch is attached */
-	if (phy_bus_name && (!strcmp(phy_bus_name, "fixed")))
+	if (priv->phydev->is_pseudo_fixed_link)
 		goto out;
 
 	/* MAC core supports the EEE feature. */
@@ -827,12 +826,8 @@ static int stmmac_init_phy(struct net_device *dev)
 		phydev = of_phy_connect(dev, priv->plat->phy_node,
 					&stmmac_adjust_link, 0, interface);
 	} else {
-		if (priv->plat->phy_bus_name)
-			snprintf(bus_id, MII_BUS_ID_SIZE, "%s-%x",
-				 priv->plat->phy_bus_name, priv->plat->bus_id);
-		else
-			snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x",
-				 priv->plat->bus_id);
+		snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x",
+			 priv->plat->bus_id);
 
 		snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id,
 			 priv->plat->phy_addr);
@@ -871,9 +866,8 @@ static int stmmac_init_phy(struct net_device *dev)
 	}
 
 	/* If attached to a switch, there is no reason to poll phy handler */
-	if (priv->plat->phy_bus_name)
-		if (!strcmp(priv->plat->phy_bus_name, "fixed"))
-			phydev->irq = PHY_IGNORE_INTERRUPT;
+	if (phydev->is_pseudo_fixed_link)
+		phydev->irq = PHY_IGNORE_INTERRUPT;
 
 	pr_debug("stmmac_init_phy:  %s: attached to PHY (UID 0x%x)"
 		 " Link = %d\n", dev->name, phydev->phy_id, phydev->link);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index af09ced53bd8..06704ca6f9ca 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -198,29 +198,12 @@ int stmmac_mdio_register(struct net_device *ndev)
 	struct mii_bus *new_bus;
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data;
+	struct device_node *mdio_node = priv->plat->mdio_node;
 	int addr, found;
-	struct device_node *mdio_node = NULL;
-	struct device_node *child_node = NULL;
 
 	if (!mdio_bus_data)
 		return 0;
 
-	if (IS_ENABLED(CONFIG_OF)) {
-		for_each_child_of_node(priv->device->of_node, child_node) {
-			if (of_device_is_compatible(child_node,
-						    "snps,dwmac-mdio")) {
-				mdio_node = child_node;
-				break;
-			}
-		}
-
-		if (mdio_node) {
-			netdev_dbg(ndev, "FOUND MDIO subnode\n");
-		} else {
-			netdev_warn(ndev, "No MDIO subnode found\n");
-		}
-	}
-
 	new_bus = mdiobus_alloc();
 	if (new_bus == NULL)
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 9cf181f839fd..cf37ea558ecc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -131,6 +131,69 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
 	return axi;
 }
 
+/**
+ * stmmac_dt_phy - parse device-tree driver parameters to allocate PHY resources
+ * @plat: driver data platform structure
+ * @np: device tree node
+ * @dev: device pointer
+ * Description:
+ * The mdio bus will be allocated in case of a phy transceiver is on board;
+ * it will be NULL if the fixed-link is configured.
+ * If there is the "snps,dwmac-mdio" sub-node the mdio will be allocated
+ * in any case (for DSA, mdio must be registered even if fixed-link).
+ * The table below sums the supported configurations:
+ *	-------------------------------
+ *	snps,phy-addr	|     Y
+ *	-------------------------------
+ *	phy-handle	|     Y
+ *	-------------------------------
+ *	fixed-link	|     N
+ *	-------------------------------
+ *	snps,dwmac-mdio	|
+ *	  even if	|     Y
+ *	fixed-link	|
+ *	-------------------------------
+ *
+ * It returns 0 in case of success otherwise -ENODEV.
+ */
+static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
+			 struct device_node *np, struct device *dev)
+{
+	bool mdio = true;
+
+	/* If phy-handle property is passed from DT, use it as the PHY */
+	plat->phy_node = of_parse_phandle(np, "phy-handle", 0);
+	if (plat->phy_node)
+		dev_dbg(dev, "Found phy-handle subnode\n");
+
+	/* If phy-handle is not specified, check if we have a fixed-phy */
+	if (!plat->phy_node && of_phy_is_fixed_link(np)) {
+		if ((of_phy_register_fixed_link(np) < 0))
+			return -ENODEV;
+
+		dev_dbg(dev, "Found fixed-link subnode\n");
+		plat->phy_node = of_node_get(np);
+		mdio = false;
+	}
+
+	/* If snps,dwmac-mdio is passed from DT, always register the MDIO */
+	for_each_child_of_node(np, plat->mdio_node) {
+		if (of_device_is_compatible(plat->mdio_node, "snps,dwmac-mdio"))
+			break;
+	}
+
+	if (plat->mdio_node) {
+		dev_dbg(dev, "Found MDIO subnode\n");
+		mdio = true;
+	}
+
+	if (mdio)
+		plat->mdio_bus_data =
+			devm_kzalloc(dev, sizeof(struct stmmac_mdio_bus_data),
+				     GFP_KERNEL);
+	return 0;
+}
+
 /**
  * stmmac_probe_config_dt - parse device-tree driver parameters
  * @pdev: platform_device structure
@@ -165,30 +228,15 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 	/* Default to phy auto-detection */
 	plat->phy_addr = -1;
 
-	/* If we find a phy-handle property, use it as the PHY */
-	plat->phy_node = of_parse_phandle(np, "phy-handle", 0);
-
-	/* If phy-handle is not specified, check if we have a fixed-phy */
-	if (!plat->phy_node && of_phy_is_fixed_link(np)) {
-		if ((of_phy_register_fixed_link(np) < 0))
-			return ERR_PTR(-ENODEV);
-
-		plat->phy_node = of_node_get(np);
-	}
-
 	/* "snps,phy-addr" is not a standard property. Mark it as deprecated
 	 * and warn of its use. Remove this when phy node support is added.
 	 */
 	if (of_property_read_u32(np, "snps,phy-addr", &plat->phy_addr) == 0)
 		dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n");
 
-	if ((plat->phy_node && !of_phy_is_fixed_link(np)) || plat->phy_bus_name)
-		plat->mdio_bus_data = NULL;
-	else
-		plat->mdio_bus_data =
-			devm_kzalloc(&pdev->dev,
-				     sizeof(struct stmmac_mdio_bus_data),
-				     GFP_KERNEL);
+	/* To Configure PHY by using all device-tree supported properties */
+	if (stmmac_dt_phy(plat, np, &pdev->dev))
+		return ERR_PTR(-ENODEV);
 
 	of_property_read_u32(np, "tx-fifo-depth", &plat->tx_fifo_size);
 
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 6e53fa8942a4..e6bc30a42a74 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -108,12 +108,12 @@ struct stmmac_axi {
 };
 
 struct plat_stmmacenet_data {
-	char *phy_bus_name;
 	int bus_id;
 	int phy_addr;
 	int interface;
 	struct stmmac_mdio_bus_data *mdio_bus_data;
 	struct device_node *phy_node;
+	struct device_node *mdio_node;
 	struct stmmac_dma_cfg *dma_cfg;
 	int clk_csr;
 	int has_gmac;
-- 
cgit v1.2.3


From 969e8d7e47f93ef693028667480558de8f70523f Mon Sep 17 00:00:00 2001
From: Chen Gang <chengang@emindsoft.com.cn>
Date: Fri, 1 Apr 2016 14:31:17 -0700
Subject: include/linux/huge_mm.h: return NULL instead of false for
 pmd_trans_huge_lock()

The return value of pmd_trans_huge_lock() is a pointer, not a boolean
value, so use NULL instead of false as the return value.

Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 79b0ef6aaa14..7008623e24b1 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -127,7 +127,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
 	if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
 		return __pmd_trans_huge_lock(pmd, vma);
 	else
-		return false;
+		return NULL;
 }
 static inline int hpage_nr_pages(struct page *page)
 {
-- 
cgit v1.2.3


From 09cbfeaf1a5a67bfb3201e0c83c810cecb2efa5a Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 1 Apr 2016 15:29:47 +0300
Subject: mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros

PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.

This promise never materialized.  And unlikely will.

We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE.  And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.

Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.

Let's stop pretending that pages in page cache are special.  They are
not.

The changes are pretty straight-forward:

 - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;

 - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;

 - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};

 - page_cache_get() -> get_page();

 - page_cache_release() -> put_page();

This patch contains automated changes generated with coccinelle using
script below.  For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.

The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.

There are few places in the code where coccinelle didn't reach.  I'll
fix them manually in a separate patch.  Comments and documentation also
will be addressed with the separate patch.

virtual patch

@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E

@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E

@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT

@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE

@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK

@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)

@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)

@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/mm/cache.c                                |   2 +-
 arch/arm/mm/flush.c                                |   4 +-
 arch/parisc/kernel/cache.c                         |   2 +-
 arch/powerpc/platforms/cell/spufs/inode.c          |   4 +-
 arch/s390/hypfs/inode.c                            |   4 +-
 block/bio.c                                        |   8 +-
 block/blk-core.c                                   |   2 +-
 block/blk-settings.c                               |  12 +-
 block/blk-sysfs.c                                  |   8 +-
 block/cfq-iosched.c                                |   2 +-
 block/compat_ioctl.c                               |   4 +-
 block/ioctl.c                                      |   4 +-
 block/partition-generic.c                          |   8 +-
 drivers/block/aoe/aoeblk.c                         |   2 +-
 drivers/block/brd.c                                |   2 +-
 drivers/block/drbd/drbd_nl.c                       |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c            |   2 +-
 drivers/gpu/drm/armada/armada_gem.c                |   4 +-
 drivers/gpu/drm/drm_gem.c                          |   4 +-
 drivers/gpu/drm/i915/i915_gem.c                    |   8 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c            |   2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c                |   2 +-
 drivers/gpu/drm/ttm/ttm_tt.c                       |   4 +-
 drivers/gpu/drm/via/via_dmablit.c                  |   2 +-
 drivers/md/bitmap.c                                |   2 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c          |   2 +-
 drivers/misc/ibmasm/ibmasmfs.c                     |   4 +-
 drivers/misc/vmw_vmci/vmci_queue_pair.c            |   2 +-
 drivers/mmc/core/host.c                            |   6 +-
 drivers/mmc/host/sh_mmcif.c                        |   2 +-
 drivers/mmc/host/tmio_mmc_dma.c                    |   4 +-
 drivers/mmc/host/tmio_mmc_pio.c                    |   2 +-
 drivers/mmc/host/usdhi6rol0.c                      |   2 +-
 drivers/mtd/devices/block2mtd.c                    |   6 +-
 drivers/mtd/nand/nandsim.c                         |   6 +-
 drivers/nvdimm/btt.c                               |   2 +-
 drivers/nvdimm/pmem.c                              |   2 +-
 drivers/oprofile/oprofilefs.c                      |   4 +-
 drivers/scsi/sd.c                                  |   2 +-
 drivers/scsi/st.c                                  |   4 +-
 .../lustre/include/linux/libcfs/libcfs_private.h   |   2 +-
 .../lustre/include/linux/libcfs/linux/linux-mem.h  |   4 +-
 .../lustre/lnet/klnds/socklnd/socklnd_lib.c        |   2 +-
 drivers/staging/lustre/lnet/libcfs/debug.c         |   2 +-
 drivers/staging/lustre/lnet/libcfs/tracefile.c     |  16 +-
 drivers/staging/lustre/lnet/libcfs/tracefile.h     |   6 +-
 drivers/staging/lustre/lnet/lnet/lib-md.c          |   2 +-
 drivers/staging/lustre/lnet/lnet/lib-move.c        |   6 +-
 drivers/staging/lustre/lnet/lnet/lib-socket.c      |   4 +-
 drivers/staging/lustre/lnet/lnet/router.c          |   6 +-
 drivers/staging/lustre/lnet/selftest/brw_test.c    |  20 +-
 drivers/staging/lustre/lnet/selftest/conctl.c      |   4 +-
 drivers/staging/lustre/lnet/selftest/conrpc.c      |  10 +-
 drivers/staging/lustre/lnet/selftest/framework.c   |   2 +-
 drivers/staging/lustre/lnet/selftest/rpc.c         |   2 +-
 drivers/staging/lustre/lnet/selftest/selftest.h    |   2 +-
 .../lustre/include/linux/lustre_patchless_compat.h |   2 +-
 .../lustre/lustre/include/lustre/lustre_idl.h      |   2 +-
 drivers/staging/lustre/lustre/include/lustre_mdc.h |   4 +-
 drivers/staging/lustre/lustre/include/lustre_net.h |   6 +-
 drivers/staging/lustre/lustre/include/obd.h        |   2 +-
 .../staging/lustre/lustre/include/obd_support.h    |   2 +-
 drivers/staging/lustre/lustre/lclient/lcommon_cl.c |   4 +-
 drivers/staging/lustre/lustre/ldlm/ldlm_lib.c      |  12 +-
 drivers/staging/lustre/lustre/ldlm/ldlm_pool.c     |   2 +-
 drivers/staging/lustre/lustre/ldlm/ldlm_request.c  |   2 +-
 drivers/staging/lustre/lustre/llite/dir.c          |  18 +-
 .../staging/lustre/lustre/llite/llite_internal.h   |   8 +-
 drivers/staging/lustre/lustre/llite/llite_lib.c    |   8 +-
 drivers/staging/lustre/lustre/llite/llite_mmap.c   |   8 +-
 drivers/staging/lustre/lustre/llite/lloop.c        |  12 +-
 drivers/staging/lustre/lustre/llite/lproc_llite.c  |  18 +-
 drivers/staging/lustre/lustre/llite/rw.c           |  22 +-
 drivers/staging/lustre/lustre/llite/rw26.c         |  28 +--
 drivers/staging/lustre/lustre/llite/vvp_io.c       |   8 +-
 drivers/staging/lustre/lustre/llite/vvp_page.c     |   8 +-
 drivers/staging/lustre/lustre/lmv/lmv_obd.c        |   4 +-
 drivers/staging/lustre/lustre/mdc/mdc_request.c    |   6 +-
 drivers/staging/lustre/lustre/mgc/mgc_request.c    |  22 +-
 drivers/staging/lustre/lustre/obdclass/cl_page.c   |   6 +-
 drivers/staging/lustre/lustre/obdclass/class_obd.c |   6 +-
 .../lustre/lustre/obdclass/linux/linux-obdo.c      |   4 +-
 .../lustre/lustre/obdclass/linux/linux-sysctl.c    |   6 +-
 drivers/staging/lustre/lustre/obdclass/lu_object.c |   6 +-
 .../staging/lustre/lustre/obdecho/echo_client.c    |  30 +--
 drivers/staging/lustre/lustre/osc/lproc_osc.c      |  16 +-
 drivers/staging/lustre/lustre/osc/osc_cache.c      |  42 ++--
 drivers/staging/lustre/lustre/osc/osc_page.c       |   6 +-
 drivers/staging/lustre/lustre/osc/osc_request.c    |  26 +-
 drivers/staging/lustre/lustre/ptlrpc/client.c      |   6 +-
 drivers/staging/lustre/lustre/ptlrpc/import.c      |   2 +-
 .../staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c    |   4 +-
 drivers/staging/lustre/lustre/ptlrpc/recover.c     |   2 +-
 drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c    |   2 +-
 drivers/usb/gadget/function/f_fs.c                 |   4 +-
 drivers/usb/gadget/legacy/inode.c                  |   4 +-
 drivers/usb/storage/scsiglue.c                     |   2 +-
 drivers/video/fbdev/pvr2fb.c                       |   2 +-
 fs/9p/vfs_addr.c                                   |  18 +-
 fs/9p/vfs_file.c                                   |   4 +-
 fs/9p/vfs_super.c                                  |   2 +-
 fs/affs/file.c                                     |  26 +-
 fs/afs/dir.c                                       |   2 +-
 fs/afs/file.c                                      |   4 +-
 fs/afs/mntpt.c                                     |   6 +-
 fs/afs/super.c                                     |   4 +-
 fs/afs/write.c                                     |  26 +-
 fs/binfmt_elf.c                                    |   2 +-
 fs/binfmt_elf_fdpic.c                              |   2 +-
 fs/block_dev.c                                     |   4 +-
 fs/btrfs/check-integrity.c                         |  60 ++---
 fs/btrfs/compression.c                             |  84 +++----
 fs/btrfs/disk-io.c                                 |  14 +-
 fs/btrfs/extent-tree.c                             |   4 +-
 fs/btrfs/extent_io.c                               | 262 ++++++++++-----------
 fs/btrfs/extent_io.h                               |   6 +-
 fs/btrfs/file-item.c                               |   4 +-
 fs/btrfs/file.c                                    |  40 ++--
 fs/btrfs/free-space-cache.c                        |  30 +--
 fs/btrfs/inode-map.c                               |  10 +-
 fs/btrfs/inode.c                                   | 104 ++++----
 fs/btrfs/ioctl.c                                   |  84 +++----
 fs/btrfs/lzo.c                                     |  32 +--
 fs/btrfs/raid56.c                                  |  28 +--
 fs/btrfs/reada.c                                   |  30 +--
 fs/btrfs/relocation.c                              |  16 +-
 fs/btrfs/scrub.c                                   |  24 +-
 fs/btrfs/send.c                                    |  16 +-
 fs/btrfs/tests/extent-io-tests.c                   |  42 ++--
 fs/btrfs/tests/free-space-tests.c                  |   2 +-
 fs/btrfs/volumes.c                                 |  14 +-
 fs/btrfs/zlib.c                                    |  38 +--
 fs/buffer.c                                        | 100 ++++----
 fs/cachefiles/rdwr.c                               |  38 +--
 fs/ceph/addr.c                                     | 114 ++++-----
 fs/ceph/caps.c                                     |   2 +-
 fs/ceph/dir.c                                      |   4 +-
 fs/ceph/file.c                                     |  32 +--
 fs/ceph/inode.c                                    |   6 +-
 fs/ceph/mds_client.c                               |   2 +-
 fs/ceph/mds_client.h                               |   2 +-
 fs/ceph/super.c                                    |   8 +-
 fs/cifs/cifsfs.c                                   |   2 +-
 fs/cifs/cifssmb.c                                  |  16 +-
 fs/cifs/connect.c                                  |   2 +-
 fs/cifs/file.c                                     |  94 ++++----
 fs/cifs/inode.c                                    |  10 +-
 fs/configfs/mount.c                                |   4 +-
 fs/cramfs/inode.c                                  |  30 +--
 fs/crypto/crypto.c                                 |   8 +-
 fs/dax.c                                           |  30 +--
 fs/direct-io.c                                     |  26 +-
 fs/dlm/lowcomms.c                                  |   8 +-
 fs/ecryptfs/crypto.c                               |  22 +-
 fs/ecryptfs/inode.c                                |   4 +-
 fs/ecryptfs/keystore.c                             |   2 +-
 fs/ecryptfs/main.c                                 |   8 +-
 fs/ecryptfs/mmap.c                                 |  44 ++--
 fs/ecryptfs/read_write.c                           |  14 +-
 fs/efivarfs/super.c                                |   4 +-
 fs/exofs/dir.c                                     |  30 +--
 fs/exofs/inode.c                                   |  34 +--
 fs/exofs/namei.c                                   |   4 +-
 fs/ext2/dir.c                                      |  32 +--
 fs/ext2/namei.c                                    |   6 +-
 fs/ext4/crypto.c                                   |   8 +-
 fs/ext4/dir.c                                      |   4 +-
 fs/ext4/file.c                                     |   4 +-
 fs/ext4/inline.c                                   |  18 +-
 fs/ext4/inode.c                                    | 116 ++++-----
 fs/ext4/mballoc.c                                  |  36 +--
 fs/ext4/move_extent.c                              |  16 +-
 fs/ext4/page-io.c                                  |   4 +-
 fs/ext4/readpage.c                                 |  10 +-
 fs/ext4/super.c                                    |   4 +-
 fs/ext4/symlink.c                                  |   4 +-
 fs/f2fs/data.c                                     |  52 ++--
 fs/f2fs/debug.c                                    |   6 +-
 fs/f2fs/dir.c                                      |   4 +-
 fs/f2fs/f2fs.h                                     |   2 +-
 fs/f2fs/file.c                                     |  74 +++---
 fs/f2fs/inline.c                                   |  10 +-
 fs/f2fs/namei.c                                    |   4 +-
 fs/f2fs/node.c                                     |  10 +-
 fs/f2fs/recovery.c                                 |   2 +-
 fs/f2fs/segment.c                                  |  16 +-
 fs/f2fs/super.c                                    |   4 +-
 fs/freevxfs/vxfs_immed.c                           |   4 +-
 fs/freevxfs/vxfs_lookup.c                          |  12 +-
 fs/freevxfs/vxfs_subr.c                            |   2 +-
 fs/fs-writeback.c                                  |   2 +-
 fs/fscache/page.c                                  |  10 +-
 fs/fuse/dev.c                                      |  26 +-
 fs/fuse/file.c                                     |  72 +++---
 fs/fuse/inode.c                                    |  16 +-
 fs/gfs2/aops.c                                     |  44 ++--
 fs/gfs2/bmap.c                                     |  12 +-
 fs/gfs2/file.c                                     |  16 +-
 fs/gfs2/meta_io.c                                  |   4 +-
 fs/gfs2/quota.c                                    |  14 +-
 fs/gfs2/rgrp.c                                     |   5 +-
 fs/hfs/bnode.c                                     |  12 +-
 fs/hfs/btree.c                                     |  20 +-
 fs/hfs/inode.c                                     |   8 +-
 fs/hfsplus/bitmap.c                                |   2 +-
 fs/hfsplus/bnode.c                                 |  90 +++----
 fs/hfsplus/btree.c                                 |  22 +-
 fs/hfsplus/inode.c                                 |   8 +-
 fs/hfsplus/super.c                                 |   2 +-
 fs/hfsplus/xattr.c                                 |   6 +-
 fs/hostfs/hostfs_kern.c                            |  18 +-
 fs/hugetlbfs/inode.c                               |   8 +-
 fs/isofs/compress.c                                |  36 +--
 fs/isofs/inode.c                                   |   2 +-
 fs/jbd2/commit.c                                   |   4 +-
 fs/jbd2/journal.c                                  |   2 +-
 fs/jbd2/transaction.c                              |   4 +-
 fs/jffs2/debug.c                                   |   8 +-
 fs/jffs2/file.c                                    |  23 +-
 fs/jffs2/fs.c                                      |   8 +-
 fs/jffs2/gc.c                                      |   8 +-
 fs/jffs2/nodelist.c                                |   8 +-
 fs/jffs2/write.c                                   |   7 +-
 fs/jfs/jfs_metapage.c                              |  42 ++--
 fs/jfs/jfs_metapage.h                              |   4 +-
 fs/jfs/super.c                                     |   2 +-
 fs/kernfs/mount.c                                  |   4 +-
 fs/libfs.c                                         |  24 +-
 fs/logfs/dev_bdev.c                                |   2 +-
 fs/logfs/dev_mtd.c                                 |  10 +-
 fs/logfs/dir.c                                     |  12 +-
 fs/logfs/file.c                                    |  26 +-
 fs/logfs/readwrite.c                               |  20 +-
 fs/logfs/segment.c                                 |  28 +--
 fs/logfs/super.c                                   |  16 +-
 fs/minix/dir.c                                     |  18 +-
 fs/minix/namei.c                                   |   4 +-
 fs/mpage.c                                         |  20 +-
 fs/ncpfs/dir.c                                     |  10 +-
 fs/ncpfs/ncplib_kernel.h                           |   2 +-
 fs/nfs/blocklayout/blocklayout.c                   |  24 +-
 fs/nfs/blocklayout/blocklayout.h                   |   4 +-
 fs/nfs/client.c                                    |   8 +-
 fs/nfs/dir.c                                       |   4 +-
 fs/nfs/direct.c                                    |   8 +-
 fs/nfs/file.c                                      |  20 +-
 fs/nfs/internal.h                                  |   6 +-
 fs/nfs/nfs4xdr.c                                   |   2 +-
 fs/nfs/objlayout/objio_osd.c                       |   2 +-
 fs/nfs/pagelist.c                                  |   6 +-
 fs/nfs/pnfs.c                                      |   6 +-
 fs/nfs/read.c                                      |  16 +-
 fs/nfs/write.c                                     |   4 +-
 fs/nilfs2/bmap.c                                   |   2 +-
 fs/nilfs2/btnode.c                                 |  10 +-
 fs/nilfs2/dir.c                                    |  32 +--
 fs/nilfs2/gcinode.c                                |   2 +-
 fs/nilfs2/inode.c                                  |   4 +-
 fs/nilfs2/mdt.c                                    |  14 +-
 fs/nilfs2/namei.c                                  |   4 +-
 fs/nilfs2/page.c                                   |  18 +-
 fs/nilfs2/recovery.c                               |   4 +-
 fs/nilfs2/segment.c                                |   2 +-
 fs/ntfs/aops.c                                     |  48 ++--
 fs/ntfs/aops.h                                     |   2 +-
 fs/ntfs/attrib.c                                   |  28 +--
 fs/ntfs/bitmap.c                                   |  10 +-
 fs/ntfs/compress.c                                 |  56 ++---
 fs/ntfs/dir.c                                      |  40 ++--
 fs/ntfs/file.c                                     |  54 ++---
 fs/ntfs/index.c                                    |  12 +-
 fs/ntfs/inode.c                                    |   8 +-
 fs/ntfs/lcnalloc.c                                 |   6 +-
 fs/ntfs/logfile.c                                  |  16 +-
 fs/ntfs/mft.c                                      |  34 +--
 fs/ntfs/ntfs.h                                     |   2 +-
 fs/ntfs/super.c                                    |  58 ++---
 fs/ocfs2/alloc.c                                   |  28 +--
 fs/ocfs2/aops.c                                    |  48 ++--
 fs/ocfs2/cluster/heartbeat.c                       |  10 +-
 fs/ocfs2/dlmfs/dlmfs.c                             |   4 +-
 fs/ocfs2/file.c                                    |  14 +-
 fs/ocfs2/mmap.c                                    |   6 +-
 fs/ocfs2/ocfs2.h                                   |  20 +-
 fs/ocfs2/refcounttree.c                            |  22 +-
 fs/ocfs2/super.c                                   |   4 +-
 fs/orangefs/inode.c                                |  10 +-
 fs/orangefs/orangefs-bufmap.c                      |   4 +-
 fs/orangefs/orangefs-utils.c                       |   2 +-
 fs/pipe.c                                          |   6 +-
 fs/proc/task_mmu.c                                 |   2 +-
 fs/proc/vmcore.c                                   |   4 +-
 fs/pstore/inode.c                                  |   4 +-
 fs/qnx6/dir.c                                      |  16 +-
 fs/qnx6/inode.c                                    |   4 +-
 fs/qnx6/qnx6.h                                     |   2 +-
 fs/ramfs/inode.c                                   |   4 +-
 fs/reiserfs/file.c                                 |   4 +-
 fs/reiserfs/inode.c                                |  44 ++--
 fs/reiserfs/ioctl.c                                |   4 +-
 fs/reiserfs/journal.c                              |   4 +-
 fs/reiserfs/stree.c                                |   4 +-
 fs/reiserfs/tail_conversion.c                      |   4 +-
 fs/reiserfs/xattr.c                                |  18 +-
 fs/splice.c                                        |  32 +--
 fs/squashfs/block.c                                |   4 +-
 fs/squashfs/cache.c                                |  14 +-
 fs/squashfs/decompressor.c                         |   2 +-
 fs/squashfs/file.c                                 |  22 +-
 fs/squashfs/file_direct.c                          |  22 +-
 fs/squashfs/lz4_wrapper.c                          |   8 +-
 fs/squashfs/lzo_wrapper.c                          |   8 +-
 fs/squashfs/page_actor.c                           |   4 +-
 fs/squashfs/page_actor.h                           |   2 +-
 fs/squashfs/super.c                                |   2 +-
 fs/squashfs/symlink.c                              |   6 +-
 fs/squashfs/xz_wrapper.c                           |   4 +-
 fs/squashfs/zlib_wrapper.c                         |   4 +-
 fs/sync.c                                          |   4 +-
 fs/sysv/dir.c                                      |  18 +-
 fs/sysv/namei.c                                    |   4 +-
 fs/ubifs/file.c                                    |  52 ++--
 fs/ubifs/super.c                                   |   4 +-
 fs/ubifs/ubifs.h                                   |   4 +-
 fs/udf/file.c                                      |   6 +-
 fs/udf/inode.c                                     |   4 +-
 fs/ufs/balloc.c                                    |   6 +-
 fs/ufs/dir.c                                       |  32 +--
 fs/ufs/inode.c                                     |   4 +-
 fs/ufs/namei.c                                     |   6 +-
 fs/ufs/util.c                                      |   4 +-
 fs/ufs/util.h                                      |   2 +-
 fs/xfs/libxfs/xfs_bmap.c                           |   4 +-
 fs/xfs/xfs_aops.c                                  |  18 +-
 fs/xfs/xfs_bmap_util.c                             |   4 +-
 fs/xfs/xfs_file.c                                  |  12 +-
 fs/xfs/xfs_linux.h                                 |   2 +-
 fs/xfs/xfs_mount.c                                 |   2 +-
 fs/xfs/xfs_mount.h                                 |   4 +-
 fs/xfs/xfs_pnfs.c                                  |   4 +-
 fs/xfs/xfs_super.c                                 |   4 +-
 include/linux/bio.h                                |   2 +-
 include/linux/blkdev.h                             |   2 +-
 include/linux/buffer_head.h                        |   4 +-
 include/linux/ceph/libceph.h                       |   4 +-
 include/linux/f2fs_fs.h                            |   4 +-
 include/linux/fs.h                                 |   4 +-
 include/linux/nfs_page.h                           |   2 +-
 include/linux/pagemap.h                            |  14 +-
 include/linux/swap.h                               |   2 +-
 ipc/mqueue.c                                       |   4 +-
 kernel/events/uprobes.c                            |   8 +-
 mm/fadvise.c                                       |   8 +-
 mm/filemap.c                                       | 126 +++++-----
 mm/hugetlb.c                                       |   8 +-
 mm/madvise.c                                       |   6 +-
 mm/memory-failure.c                                |   2 +-
 mm/memory.c                                        |  54 ++---
 mm/mincore.c                                       |   4 +-
 mm/nommu.c                                         |   2 +-
 mm/page-writeback.c                                |  12 +-
 mm/page_io.c                                       |   2 +-
 mm/readahead.c                                     |  20 +-
 mm/rmap.c                                          |   2 +-
 mm/shmem.c                                         | 130 +++++-----
 mm/swap.c                                          |  12 +-
 mm/swap_state.c                                    |  12 +-
 mm/swapfile.c                                      |  12 +-
 mm/truncate.c                                      |  40 ++--
 mm/userfaultfd.c                                   |   4 +-
 mm/zswap.c                                         |   4 +-
 net/ceph/messenger.c                               |   6 +-
 net/ceph/pagelist.c                                |   4 +-
 net/ceph/pagevec.c                                 |  30 +--
 net/sunrpc/auth_gss/auth_gss.c                     |   8 +-
 net/sunrpc/auth_gss/gss_krb5_crypto.c              |   2 +-
 net/sunrpc/auth_gss/gss_krb5_wrap.c                |   4 +-
 net/sunrpc/cache.c                                 |   4 +-
 net/sunrpc/rpc_pipe.c                              |   4 +-
 net/sunrpc/socklib.c                               |   6 +-
 net/sunrpc/xdr.c                                   |  48 ++--
 381 files changed, 2722 insertions(+), 2721 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index d7709e3930a3..9e5eddbb856f 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -628,7 +628,7 @@ void flush_dcache_page(struct page *page)
 
 		/* kernel reading from page with U-mapping */
 		phys_addr_t paddr = (unsigned long)page_address(page);
-		unsigned long vaddr = page->index << PAGE_CACHE_SHIFT;
+		unsigned long vaddr = page->index << PAGE_SHIFT;
 
 		if (addr_not_cache_congruent(paddr, vaddr))
 			__flush_dcache_page(paddr, vaddr);
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index d0ba3551d49a..3cced8455727 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -235,7 +235,7 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
 	 */
 	if (mapping && cache_is_vipt_aliasing())
 		flush_pfn_alias(page_to_pfn(page),
-				page->index << PAGE_CACHE_SHIFT);
+				page->index << PAGE_SHIFT);
 }
 
 static void __flush_dcache_aliases(struct address_space *mapping, struct page *page)
@@ -250,7 +250,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
 	 *   data in the current VM view associated with this page.
 	 * - aliasing VIPT: we only need to find one mapping of this page.
 	 */
-	pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	pgoff = page->index;
 
 	flush_dcache_mmap_lock(mapping);
 	vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 91c2a39cd5aa..67001277256c 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -319,7 +319,7 @@ void flush_dcache_page(struct page *page)
 	if (!mapping)
 		return;
 
-	pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	pgoff = page->index;
 
 	/* We have carefully arranged in arch_get_unmapped_area() that
 	 * *any* mappings of a file are always congruently mapped (whether
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index dfa863876778..6ca5f0525e57 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -732,8 +732,8 @@ spufs_fill_super(struct super_block *sb, void *data, int silent)
 		return -ENOMEM;
 
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = SPUFS_MAGIC;
 	sb->s_op = &s_ops;
 	sb->s_fs_info = info;
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 0f3da2cb2bd6..255c7eec4481 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -278,8 +278,8 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->uid = current_uid();
 	sbi->gid = current_gid();
 	sb->s_fs_info = sbi;
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = HYPFS_MAGIC;
 	sb->s_op = &hypfs_s_ops;
 	if (hypfs_parse_options(data, sb))
diff --git a/block/bio.c b/block/bio.c
index f124a0a624fc..168531517694 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1339,7 +1339,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
 		 * release the pages we didn't map into the bio, if any
 		 */
 		while (j < page_limit)
-			page_cache_release(pages[j++]);
+			put_page(pages[j++]);
 	}
 
 	kfree(pages);
@@ -1365,7 +1365,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
 	for (j = 0; j < nr_pages; j++) {
 		if (!pages[j])
 			break;
-		page_cache_release(pages[j]);
+		put_page(pages[j]);
 	}
  out:
 	kfree(pages);
@@ -1385,7 +1385,7 @@ static void __bio_unmap_user(struct bio *bio)
 		if (bio_data_dir(bio) == READ)
 			set_page_dirty_lock(bvec->bv_page);
 
-		page_cache_release(bvec->bv_page);
+		put_page(bvec->bv_page);
 	}
 
 	bio_put(bio);
@@ -1658,7 +1658,7 @@ void bio_check_pages_dirty(struct bio *bio)
 		struct page *page = bvec->bv_page;
 
 		if (PageDirty(page) || PageCompound(page)) {
-			page_cache_release(page);
+			put_page(page);
 			bvec->bv_page = NULL;
 		} else {
 			nr_clean_pages++;
diff --git a/block/blk-core.c b/block/blk-core.c
index 827f8badd143..b60537b2c35b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -706,7 +706,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 		goto fail_id;
 
 	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+			(VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
 	q->backing_dev_info.capabilities = BDI_CAP_CGROUP_WRITEBACK;
 	q->backing_dev_info.name = "block";
 	q->node = node_id;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index c7bb666aafd1..331e4eee0dda 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -239,8 +239,8 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
 	struct queue_limits *limits = &q->limits;
 	unsigned int max_sectors;
 
-	if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) {
-		max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
+	if ((max_hw_sectors << 9) < PAGE_SIZE) {
+		max_hw_sectors = 1 << (PAGE_SHIFT - 9);
 		printk(KERN_INFO "%s: set to minimum %d\n",
 		       __func__, max_hw_sectors);
 	}
@@ -329,8 +329,8 @@ EXPORT_SYMBOL(blk_queue_max_segments);
  **/
 void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
 {
-	if (max_size < PAGE_CACHE_SIZE) {
-		max_size = PAGE_CACHE_SIZE;
+	if (max_size < PAGE_SIZE) {
+		max_size = PAGE_SIZE;
 		printk(KERN_INFO "%s: set to minimum %d\n",
 		       __func__, max_size);
 	}
@@ -760,8 +760,8 @@ EXPORT_SYMBOL_GPL(blk_queue_dma_drain);
  **/
 void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)
 {
-	if (mask < PAGE_CACHE_SIZE - 1) {
-		mask = PAGE_CACHE_SIZE - 1;
+	if (mask < PAGE_SIZE - 1) {
+		mask = PAGE_SIZE - 1;
 		printk(KERN_INFO "%s: set to minimum %lx\n",
 		       __func__, mask);
 	}
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index dd93763057ce..995b58d46ed1 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -76,7 +76,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
 static ssize_t queue_ra_show(struct request_queue *q, char *page)
 {
 	unsigned long ra_kb = q->backing_dev_info.ra_pages <<
-					(PAGE_CACHE_SHIFT - 10);
+					(PAGE_SHIFT - 10);
 
 	return queue_var_show(ra_kb, (page));
 }
@@ -90,7 +90,7 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
 	if (ret < 0)
 		return ret;
 
-	q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
+	q->backing_dev_info.ra_pages = ra_kb >> (PAGE_SHIFT - 10);
 
 	return ret;
 }
@@ -117,7 +117,7 @@ static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
 	if (blk_queue_cluster(q))
 		return queue_var_show(queue_max_segment_size(q), (page));
 
-	return queue_var_show(PAGE_CACHE_SIZE, (page));
+	return queue_var_show(PAGE_SIZE, (page));
 }
 
 static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
@@ -198,7 +198,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 {
 	unsigned long max_sectors_kb,
 		max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1,
-			page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
+			page_kb = 1 << (PAGE_SHIFT - 10);
 	ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
 
 	if (ret < 0)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e3c591dd8f19..4a349787bc62 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -4075,7 +4075,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 * idle timer unplug to continue working.
 		 */
 		if (cfq_cfqq_wait_request(cfqq)) {
-			if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
+			if (blk_rq_bytes(rq) > PAGE_SIZE ||
 			    cfqd->busy_queues > 1) {
 				cfq_del_timer(cfqd, cfqq);
 				cfq_clear_cfqq_wait_request(cfqq);
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index f678c733df40..556826ac7cb4 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -710,7 +710,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 			return -EINVAL;
 		bdi = blk_get_backing_dev_info(bdev);
 		return compat_put_long(arg,
-				       (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
+				       (bdi->ra_pages * PAGE_SIZE) / 512);
 	case BLKROGET: /* compatible */
 		return compat_put_int(arg, bdev_read_only(bdev) != 0);
 	case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */
@@ -729,7 +729,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 		if (!capable(CAP_SYS_ADMIN))
 			return -EACCES;
 		bdi = blk_get_backing_dev_info(bdev);
-		bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
+		bdi->ra_pages = (arg * 512) / PAGE_SIZE;
 		return 0;
 	case BLKGETSIZE:
 		size = i_size_read(bdev->bd_inode);
diff --git a/block/ioctl.c b/block/ioctl.c
index d8996bbd7f12..4ff1f92f89ca 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -550,7 +550,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		if (!arg)
 			return -EINVAL;
 		bdi = blk_get_backing_dev_info(bdev);
-		return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
+		return put_long(arg, (bdi->ra_pages * PAGE_SIZE) / 512);
 	case BLKROGET:
 		return put_int(arg, bdev_read_only(bdev) != 0);
 	case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
@@ -578,7 +578,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		if(!capable(CAP_SYS_ADMIN))
 			return -EACCES;
 		bdi = blk_get_backing_dev_info(bdev);
-		bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
+		bdi->ra_pages = (arg * 512) / PAGE_SIZE;
 		return 0;
 	case BLKBSZSET:
 		return blkdev_bszset(bdev, mode, argp);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 5d8701941054..2c6ae2aed2c4 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -566,8 +566,8 @@ static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
 {
 	struct address_space *mapping = bdev->bd_inode->i_mapping;
 
-	return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
-			NULL);
+	return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)),
+				 NULL);
 }
 
 unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
@@ -584,9 +584,9 @@ unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
 		if (PageError(page))
 			goto fail;
 		p->v = page;
-		return (unsigned char *)page_address(page) +  ((n & ((1 << (PAGE_CACHE_SHIFT - 9)) - 1)) << 9);
+		return (unsigned char *)page_address(page) +  ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9);
 fail:
-		page_cache_release(page);
+		put_page(page);
 	}
 	p->v = NULL;
 	return NULL;
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index dd73e1ff1759..ec9d8610b25f 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -397,7 +397,7 @@ aoeblk_gdalloc(void *vp)
 	WARN_ON(d->flags & DEVFL_UP);
 	blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
 	q->backing_dev_info.name = "aoe";
-	q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
+	q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_SIZE;
 	d->bufpool = mp;
 	d->blkq = gd->queue = q;
 	q->queuedata = d;
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index f7ecc287d733..51a071e32221 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -374,7 +374,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
 		       struct page *page, int rw)
 {
 	struct brd_device *brd = bdev->bd_disk->private_data;
-	int err = brd_do_bvec(brd, page, PAGE_CACHE_SIZE, 0, rw, sector);
+	int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, rw, sector);
 	page_endio(page, rw & WRITE, err);
 	return err;
 }
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 226eb0c9f0fb..1fd1dccebb6b 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1178,7 +1178,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
 	blk_queue_max_hw_sectors(q, max_hw_sectors);
 	/* This is the workaround for "bio would need to, but cannot, be split" */
 	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
-	blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
+	blk_queue_segment_boundary(q, PAGE_SIZE-1);
 
 	if (b) {
 		struct drbd_connection *connection = first_peer_device(device)->connection;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index ab34190859a8..c018b021707c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -616,7 +616,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 			set_page_dirty(page);
 
 		mark_page_accessed(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	sg_free_table(ttm->sg);
diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
index 6e731db31aa4..aca7f9cc6109 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -481,7 +481,7 @@ armada_gem_prime_map_dma_buf(struct dma_buf_attachment *attach,
 
  release:
 	for_each_sg(sgt->sgl, sg, num, i)
-		page_cache_release(sg_page(sg));
+		put_page(sg_page(sg));
  free_table:
 	sg_free_table(sgt);
  free_sgt:
@@ -502,7 +502,7 @@ static void armada_gem_prime_unmap_dma_buf(struct dma_buf_attachment *attach,
 	if (dobj->obj.filp) {
 		struct scatterlist *sg;
 		for_each_sg(sgt->sgl, sg, sgt->nents, i)
-			page_cache_release(sg_page(sg));
+			put_page(sg_page(sg));
 	}
 
 	sg_free_table(sgt);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 2e8c77e71e1f..da0c5320789f 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -534,7 +534,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj)
 
 fail:
 	while (i--)
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 
 	drm_free_large(pages);
 	return ERR_CAST(p);
@@ -569,7 +569,7 @@ void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
 			mark_page_accessed(pages[i]);
 
 		/* Undo the reference we took when populating the table */
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 	}
 
 	drm_free_large(pages);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3d31d3ac589e..dabc08987b5e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -177,7 +177,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
 		kunmap_atomic(src);
 
-		page_cache_release(page);
+		put_page(page);
 		vaddr += PAGE_SIZE;
 	}
 
@@ -243,7 +243,7 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
 			set_page_dirty(page);
 			if (obj->madv == I915_MADV_WILLNEED)
 				mark_page_accessed(page);
-			page_cache_release(page);
+			put_page(page);
 			vaddr += PAGE_SIZE;
 		}
 		obj->dirty = 0;
@@ -2206,7 +2206,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 		if (obj->madv == I915_MADV_WILLNEED)
 			mark_page_accessed(page);
 
-		page_cache_release(page);
+		put_page(page);
 	}
 	obj->dirty = 0;
 
@@ -2346,7 +2346,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 err_pages:
 	sg_mark_end(sg);
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
-		page_cache_release(sg_page_iter_page(&sg_iter));
+		put_page(sg_page_iter_page(&sg_iter));
 	sg_free_table(st);
 	kfree(st);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 6be40f3ba2c7..18ba8139e922 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -683,7 +683,7 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
 			set_page_dirty(page);
 
 		mark_page_accessed(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	obj->dirty = 0;
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 6d8c32377c6f..0deb7f047be0 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -609,7 +609,7 @@ static void radeon_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 			set_page_dirty(page);
 
 		mark_page_accessed(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	sg_free_table(ttm->sg);
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 4e19d0f9cc30..077ae9b2865d 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -311,7 +311,7 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
 			goto out_err;
 
 		copy_highpage(to_page, from_page);
-		page_cache_release(from_page);
+		put_page(from_page);
 	}
 
 	if (!(ttm->page_flags & TTM_PAGE_FLAG_PERSISTENT_SWAP))
@@ -361,7 +361,7 @@ int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage)
 		copy_highpage(to_page, from_page);
 		set_page_dirty(to_page);
 		mark_page_accessed(to_page);
-		page_cache_release(to_page);
+		put_page(to_page);
 	}
 
 	ttm_tt_unpopulate(ttm);
diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c
index e797dfc07ae3..7e2a12c4fed2 100644
--- a/drivers/gpu/drm/via/via_dmablit.c
+++ b/drivers/gpu/drm/via/via_dmablit.c
@@ -188,7 +188,7 @@ via_free_sg_info(struct pci_dev *pdev, drm_via_sg_info_t *vsg)
 			if (NULL != (page = vsg->pages[i])) {
 				if (!PageReserved(page) && (DMA_FROM_DEVICE == vsg->direction))
 					SetPageDirty(page);
-				page_cache_release(page);
+				put_page(page);
 			}
 		}
 	case dr_via_pages_alloc:
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 7df6b4f1548a..bef71751aade 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -322,7 +322,7 @@ __clear_page_buffers(struct page *page)
 {
 	ClearPagePrivate(page);
 	set_page_private(page, 0);
-	page_cache_release(page);
+	put_page(page);
 }
 static void free_buffers(struct page *page)
 {
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index df4c052c6bd6..f300f060b3f3 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -349,7 +349,7 @@ int videobuf_dma_free(struct videobuf_dmabuf *dma)
 
 	if (dma->pages) {
 		for (i = 0; i < dma->nr_pages; i++)
-			page_cache_release(dma->pages[i]);
+			put_page(dma->pages[i]);
 		kfree(dma->pages);
 		dma->pages = NULL;
 	}
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index e8b933111e0d..9c677f3f3c26 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -116,8 +116,8 @@ static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent)
 {
 	struct inode *root;
 
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = IBMASMFS_MAGIC;
 	sb->s_op = &ibmasmfs_s_ops;
 	sb->s_time_gran = 1;
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index f42d9c4e4561..f84a4275ca29 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -728,7 +728,7 @@ static void qp_release_pages(struct page **pages,
 		if (dirty)
 			set_page_dirty(pages[i]);
 
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 		pages[i] = NULL;
 	}
 }
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 1d94607611d8..6e4c55a4aab5 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -356,11 +356,11 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
 	 * They have to set these according to their abilities.
 	 */
 	host->max_segs = 1;
-	host->max_seg_size = PAGE_CACHE_SIZE;
+	host->max_seg_size = PAGE_SIZE;
 
-	host->max_req_size = PAGE_CACHE_SIZE;
+	host->max_req_size = PAGE_SIZE;
 	host->max_blk_size = 512;
-	host->max_blk_count = PAGE_CACHE_SIZE / 512;
+	host->max_blk_count = PAGE_SIZE / 512;
 
 	return host;
 }
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 8d870ce9f944..d9a655f47d41 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -1513,7 +1513,7 @@ static int sh_mmcif_probe(struct platform_device *pdev)
 		mmc->caps |= pd->caps;
 	mmc->max_segs = 32;
 	mmc->max_blk_size = 512;
-	mmc->max_req_size = PAGE_CACHE_SIZE * mmc->max_segs;
+	mmc->max_req_size = PAGE_SIZE * mmc->max_segs;
 	mmc->max_blk_count = mmc->max_req_size / mmc->max_blk_size;
 	mmc->max_seg_size = mmc->max_req_size;
 
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index 675435873823..7fb0c034dcb6 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -63,7 +63,7 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
 		}
 	}
 
-	if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_CACHE_SIZE ||
+	if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_SIZE ||
 			  (align & PAGE_MASK))) || !multiple) {
 		ret = -EINVAL;
 		goto pio;
@@ -133,7 +133,7 @@ static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
 		}
 	}
 
-	if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_CACHE_SIZE ||
+	if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_SIZE ||
 			  (align & PAGE_MASK))) || !multiple) {
 		ret = -EINVAL;
 		goto pio;
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 03f6e74c1906..0521b4662748 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -1125,7 +1125,7 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host,
 	mmc->caps2 |= pdata->capabilities2;
 	mmc->max_segs = 32;
 	mmc->max_blk_size = 512;
-	mmc->max_blk_count = (PAGE_CACHE_SIZE / mmc->max_blk_size) *
+	mmc->max_blk_count = (PAGE_SIZE / mmc->max_blk_size) *
 		mmc->max_segs;
 	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
 	mmc->max_seg_size = mmc->max_req_size;
diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index b2752fe711f2..807c06e203c3 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c
@@ -1789,7 +1789,7 @@ static int usdhi6_probe(struct platform_device *pdev)
 	/* Set .max_segs to some random number. Feel free to adjust. */
 	mmc->max_segs = 32;
 	mmc->max_blk_size = 512;
-	mmc->max_req_size = PAGE_CACHE_SIZE * mmc->max_segs;
+	mmc->max_req_size = PAGE_SIZE * mmc->max_segs;
 	mmc->max_blk_count = mmc->max_req_size / mmc->max_blk_size;
 	/*
 	 * Setting .max_seg_size to 1 page would simplify our page-mapping code,
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index e2c0057737e6..7c887f111a7d 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -75,7 +75,7 @@ static int _block2mtd_erase(struct block2mtd_dev *dev, loff_t to, size_t len)
 				break;
 			}
 
-		page_cache_release(page);
+		put_page(page);
 		pages--;
 		index++;
 	}
@@ -124,7 +124,7 @@ static int block2mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
 			return PTR_ERR(page);
 
 		memcpy(buf, page_address(page) + offset, cpylen);
-		page_cache_release(page);
+		put_page(page);
 
 		if (retlen)
 			*retlen += cpylen;
@@ -164,7 +164,7 @@ static int _block2mtd_write(struct block2mtd_dev *dev, const u_char *buf,
 			unlock_page(page);
 			balance_dirty_pages_ratelimited(mapping);
 		}
-		page_cache_release(page);
+		put_page(page);
 
 		if (retlen)
 			*retlen += cpylen;
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 1fd519503bb1..a58169a28741 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -1339,7 +1339,7 @@ static void put_pages(struct nandsim *ns)
 	int i;
 
 	for (i = 0; i < ns->held_cnt; i++)
-		page_cache_release(ns->held_pages[i]);
+		put_page(ns->held_pages[i]);
 }
 
 /* Get page cache pages in advance to provide NOFS memory allocation */
@@ -1349,8 +1349,8 @@ static int get_pages(struct nandsim *ns, struct file *file, size_t count, loff_t
 	struct page *page;
 	struct address_space *mapping = file->f_mapping;
 
-	start_index = pos >> PAGE_CACHE_SHIFT;
-	end_index = (pos + count - 1) >> PAGE_CACHE_SHIFT;
+	start_index = pos >> PAGE_SHIFT;
+	end_index = (pos + count - 1) >> PAGE_SHIFT;
 	if (end_index - start_index + 1 > NS_MAX_HELD_PAGES)
 		return -EINVAL;
 	ns->held_cnt = 0;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index c32cbb593600..f068b6513cd2 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1204,7 +1204,7 @@ static int btt_rw_page(struct block_device *bdev, sector_t sector,
 {
 	struct btt *btt = bdev->bd_disk->private_data;
 
-	btt_do_bvec(btt, NULL, page, PAGE_CACHE_SIZE, 0, rw, sector);
+	btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, rw, sector);
 	page_endio(page, rw & WRITE, 0);
 	return 0;
 }
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index cc31c6f1f88e..12c86fa80c5f 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -151,7 +151,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	struct pmem_device *pmem = bdev->bd_disk->private_data;
 	int rc;
 
-	rc = pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector);
+	rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
 	if (rw & WRITE)
 		wmb_pmem();
 
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index b48ac6300c79..a0e5260bd006 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -239,8 +239,8 @@ static int oprofilefs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *root_inode;
 
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = OPROFILEFS_MAGIC;
 	sb->s_op = &s_ops;
 	sb->s_time_gran = 1;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 5a5457ac9cdb..1bd0753f678a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2891,7 +2891,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
 	if (sdkp->opt_xfer_blocks &&
 	    sdkp->opt_xfer_blocks <= dev_max &&
 	    sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
-	    sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_CACHE_SIZE)
+	    sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE)
 		rw_max = q->limits.io_opt =
 			sdkp->opt_xfer_blocks * sdp->sector_size;
 	else
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 71c5138ddf94..dbf1882cfbac 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -4941,7 +4941,7 @@ static int sgl_map_user_pages(struct st_buffer *STbp,
  out_unmap:
 	if (res > 0) {
 		for (j=0; j < res; j++)
-			page_cache_release(pages[j]);
+			put_page(pages[j]);
 		res = 0;
 	}
 	kfree(pages);
@@ -4963,7 +4963,7 @@ static int sgl_unmap_user_pages(struct st_buffer *STbp,
 		/* FIXME: cache flush missing for rw==READ
 		 * FIXME: call the correct reference counting function
 		 */
-		page_cache_release(page);
+		put_page(page);
 	}
 	kfree(STbp->mapped_pages);
 	STbp->mapped_pages = NULL;
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index dab486261154..13335437c69c 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -88,7 +88,7 @@ do {								    \
 } while (0)
 
 #ifndef LIBCFS_VMALLOC_SIZE
-#define LIBCFS_VMALLOC_SIZE	(2 << PAGE_CACHE_SHIFT) /* 2 pages */
+#define LIBCFS_VMALLOC_SIZE	(2 << PAGE_SHIFT) /* 2 pages */
 #endif
 
 #define LIBCFS_ALLOC_PRE(size, mask)					    \
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
index 0f2fd79e5ec8..837eb22749c3 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
@@ -57,7 +57,7 @@
 #include "../libcfs_cpu.h"
 #endif
 
-#define CFS_PAGE_MASK		   (~((__u64)PAGE_CACHE_SIZE-1))
+#define CFS_PAGE_MASK		   (~((__u64)PAGE_SIZE-1))
 #define page_index(p)       ((p)->index)
 
 #define memory_pressure_get() (current->flags & PF_MEMALLOC)
@@ -67,7 +67,7 @@
 #if BITS_PER_LONG == 32
 /* limit to lowmem on 32-bit systems */
 #define NUM_CACHEPAGES \
-	min(totalram_pages, 1UL << (30 - PAGE_CACHE_SHIFT) * 3 / 4)
+	min(totalram_pages, 1UL << (30 - PAGE_SHIFT) * 3 / 4)
 #else
 #define NUM_CACHEPAGES totalram_pages
 #endif
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
index 3e1f24e77f64..d4ce06d0aeeb 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
@@ -291,7 +291,7 @@ ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
 
 	for (nob = i = 0; i < niov; i++) {
 		if ((kiov[i].kiov_offset && i > 0) ||
-		    (kiov[i].kiov_offset + kiov[i].kiov_len != PAGE_CACHE_SIZE && i < niov - 1))
+		    (kiov[i].kiov_offset + kiov[i].kiov_len != PAGE_SIZE && i < niov - 1))
 			return NULL;
 
 		pages[i] = kiov[i].kiov_page;
diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c
index c90e5102fe06..c3d628bac5b8 100644
--- a/drivers/staging/lustre/lnet/libcfs/debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/debug.c
@@ -517,7 +517,7 @@ int libcfs_debug_init(unsigned long bufsize)
 		max = TCD_MAX_PAGES;
 	} else {
 		max = max / num_possible_cpus();
-		max <<= (20 - PAGE_CACHE_SHIFT);
+		max <<= (20 - PAGE_SHIFT);
 	}
 	rc = cfs_tracefile_init(max);
 
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c
index ec3bc04bd89f..244eb89eef68 100644
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.c
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c
@@ -182,7 +182,7 @@ cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
 	if (tcd->tcd_cur_pages > 0) {
 		__LASSERT(!list_empty(&tcd->tcd_pages));
 		tage = cfs_tage_from_list(tcd->tcd_pages.prev);
-		if (tage->used + len <= PAGE_CACHE_SIZE)
+		if (tage->used + len <= PAGE_SIZE)
 			return tage;
 	}
 
@@ -260,7 +260,7 @@ static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
 	 * from here: this will lead to infinite recursion.
 	 */
 
-	if (len > PAGE_CACHE_SIZE) {
+	if (len > PAGE_SIZE) {
 		pr_err("cowardly refusing to write %lu bytes in a page\n", len);
 		return NULL;
 	}
@@ -349,7 +349,7 @@ int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
 	for (i = 0; i < 2; i++) {
 		tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
 		if (!tage) {
-			if (needed + known_size > PAGE_CACHE_SIZE)
+			if (needed + known_size > PAGE_SIZE)
 				mask |= D_ERROR;
 
 			cfs_trace_put_tcd(tcd);
@@ -360,7 +360,7 @@ int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
 		string_buf = (char *)page_address(tage->page) +
 					tage->used + known_size;
 
-		max_nob = PAGE_CACHE_SIZE - tage->used - known_size;
+		max_nob = PAGE_SIZE - tage->used - known_size;
 		if (max_nob <= 0) {
 			printk(KERN_EMERG "negative max_nob: %d\n",
 			       max_nob);
@@ -424,7 +424,7 @@ int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
 	__LASSERT(debug_buf == string_buf);
 
 	tage->used += needed;
-	__LASSERT(tage->used <= PAGE_CACHE_SIZE);
+	__LASSERT(tage->used <= PAGE_SIZE);
 
 console:
 	if ((mask & libcfs_printk) == 0) {
@@ -835,7 +835,7 @@ EXPORT_SYMBOL(cfs_trace_copyout_string);
 
 int cfs_trace_allocate_string_buffer(char **str, int nob)
 {
-	if (nob > 2 * PAGE_CACHE_SIZE)	    /* string must be "sensible" */
+	if (nob > 2 * PAGE_SIZE)	    /* string must be "sensible" */
 		return -EINVAL;
 
 	*str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
@@ -951,7 +951,7 @@ int cfs_trace_set_debug_mb(int mb)
 	}
 
 	mb /= num_possible_cpus();
-	pages = mb << (20 - PAGE_CACHE_SHIFT);
+	pages = mb << (20 - PAGE_SHIFT);
 
 	cfs_tracefile_write_lock();
 
@@ -977,7 +977,7 @@ int cfs_trace_get_debug_mb(void)
 
 	cfs_tracefile_read_unlock();
 
-	return (total_pages >> (20 - PAGE_CACHE_SHIFT)) + 1;
+	return (total_pages >> (20 - PAGE_SHIFT)) + 1;
 }
 
 static int tracefiled(void *arg)
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.h b/drivers/staging/lustre/lnet/libcfs/tracefile.h
index 4c77f9044dd3..ac84e7f4c859 100644
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.h
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.h
@@ -87,7 +87,7 @@ void libcfs_unregister_panic_notifier(void);
 extern int  libcfs_panic_in_progress;
 int cfs_trace_max_debug_mb(void);
 
-#define TCD_MAX_PAGES (5 << (20 - PAGE_CACHE_SHIFT))
+#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
 #define TCD_STOCK_PAGES (TCD_MAX_PAGES)
 #define CFS_TRACEFILE_SIZE (500 << 20)
 
@@ -96,7 +96,7 @@ int cfs_trace_max_debug_mb(void);
 /*
  * Private declare for tracefile
  */
-#define TCD_MAX_PAGES (5 << (20 - PAGE_CACHE_SHIFT))
+#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
 #define TCD_STOCK_PAGES (TCD_MAX_PAGES)
 
 #define CFS_TRACEFILE_SIZE (500 << 20)
@@ -257,7 +257,7 @@ do {								    \
 do {								    \
 	__LASSERT(tage);					\
 	__LASSERT(tage->page);				  \
-	__LASSERT(tage->used <= PAGE_CACHE_SIZE);			 \
+	__LASSERT(tage->used <= PAGE_SIZE);			 \
 	__LASSERT(page_count(tage->page) > 0);		      \
 } while (0)
 
diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c
index c74514f99f90..75d31217bf92 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-md.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-md.c
@@ -139,7 +139,7 @@ lnet_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink)
 		for (i = 0; i < (int)niov; i++) {
 			/* We take the page pointer on trust */
 			if (lmd->md_iov.kiov[i].kiov_offset +
-			    lmd->md_iov.kiov[i].kiov_len > PAGE_CACHE_SIZE)
+			    lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE)
 				return -EINVAL; /* invalid length */
 
 			total_length += lmd->md_iov.kiov[i].kiov_len;
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
index 0009a8de77d5..f19aa9320e34 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -549,12 +549,12 @@ lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst,
 		if (len <= frag_len) {
 			dst->kiov_len = len;
 			LASSERT(dst->kiov_offset + dst->kiov_len
-					<= PAGE_CACHE_SIZE);
+					<= PAGE_SIZE);
 			return niov;
 		}
 
 		dst->kiov_len = frag_len;
-		LASSERT(dst->kiov_offset + dst->kiov_len <= PAGE_CACHE_SIZE);
+		LASSERT(dst->kiov_offset + dst->kiov_len <= PAGE_SIZE);
 
 		len -= frag_len;
 		dst++;
@@ -887,7 +887,7 @@ lnet_msg2bufpool(lnet_msg_t *msg)
 	rbp = &the_lnet.ln_rtrpools[cpt][0];
 
 	LASSERT(msg->msg_len <= LNET_MTU);
-	while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_CACHE_SIZE) {
+	while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_SIZE) {
 		rbp++;
 		LASSERT(rbp < &the_lnet.ln_rtrpools[cpt][LNET_NRBPOOLS]);
 	}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index cc0c2753dd63..891fd59401d7 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -166,9 +166,9 @@ lnet_ipif_enumerate(char ***namesp)
 	nalloc = 16;	/* first guess at max interfaces */
 	toobig = 0;
 	for (;;) {
-		if (nalloc * sizeof(*ifr) > PAGE_CACHE_SIZE) {
+		if (nalloc * sizeof(*ifr) > PAGE_SIZE) {
 			toobig = 1;
-			nalloc = PAGE_CACHE_SIZE / sizeof(*ifr);
+			nalloc = PAGE_SIZE / sizeof(*ifr);
 			CWARN("Too many interfaces: only enumerating first %d\n",
 			      nalloc);
 		}
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index 61459cf9d58f..b01dc424c514 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -27,8 +27,8 @@
 #define LNET_NRB_SMALL_PAGES	1
 #define LNET_NRB_LARGE_MIN	256	/* min value for each CPT */
 #define LNET_NRB_LARGE		(LNET_NRB_LARGE_MIN * 4)
-#define LNET_NRB_LARGE_PAGES   ((LNET_MTU + PAGE_CACHE_SIZE - 1) >> \
-				 PAGE_CACHE_SHIFT)
+#define LNET_NRB_LARGE_PAGES   ((LNET_MTU + PAGE_SIZE - 1) >> \
+				 PAGE_SHIFT)
 
 static char *forwarding = "";
 module_param(forwarding, charp, 0444);
@@ -1338,7 +1338,7 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt)
 			return NULL;
 		}
 
-		rb->rb_kiov[i].kiov_len = PAGE_CACHE_SIZE;
+		rb->rb_kiov[i].kiov_len = PAGE_SIZE;
 		rb->rb_kiov[i].kiov_offset = 0;
 		rb->rb_kiov[i].kiov_page = page;
 	}
diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c
index eebc92412061..dcb6e506f592 100644
--- a/drivers/staging/lustre/lnet/selftest/brw_test.c
+++ b/drivers/staging/lustre/lnet/selftest/brw_test.c
@@ -90,7 +90,7 @@ brw_client_init(sfw_test_instance_t *tsi)
 		 * NB: this is not going to work for variable page size,
 		 * but we have to keep it for compatibility
 		 */
-		len = npg * PAGE_CACHE_SIZE;
+		len = npg * PAGE_SIZE;
 
 	} else {
 		test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1;
@@ -104,7 +104,7 @@ brw_client_init(sfw_test_instance_t *tsi)
 		opc = breq->blk_opc;
 		flags = breq->blk_flags;
 		len = breq->blk_len;
-		npg = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	}
 
 	if (npg > LNET_MAX_IOV || npg <= 0)
@@ -167,13 +167,13 @@ brw_fill_page(struct page *pg, int pattern, __u64 magic)
 
 	if (pattern == LST_BRW_CHECK_SIMPLE) {
 		memcpy(addr, &magic, BRW_MSIZE);
-		addr += PAGE_CACHE_SIZE - BRW_MSIZE;
+		addr += PAGE_SIZE - BRW_MSIZE;
 		memcpy(addr, &magic, BRW_MSIZE);
 		return;
 	}
 
 	if (pattern == LST_BRW_CHECK_FULL) {
-		for (i = 0; i < PAGE_CACHE_SIZE / BRW_MSIZE; i++)
+		for (i = 0; i < PAGE_SIZE / BRW_MSIZE; i++)
 			memcpy(addr + i * BRW_MSIZE, &magic, BRW_MSIZE);
 		return;
 	}
@@ -198,7 +198,7 @@ brw_check_page(struct page *pg, int pattern, __u64 magic)
 		if (data != magic)
 			goto bad_data;
 
-		addr += PAGE_CACHE_SIZE - BRW_MSIZE;
+		addr += PAGE_SIZE - BRW_MSIZE;
 		data = *((__u64 *)addr);
 		if (data != magic)
 			goto bad_data;
@@ -207,7 +207,7 @@ brw_check_page(struct page *pg, int pattern, __u64 magic)
 	}
 
 	if (pattern == LST_BRW_CHECK_FULL) {
-		for (i = 0; i < PAGE_CACHE_SIZE / BRW_MSIZE; i++) {
+		for (i = 0; i < PAGE_SIZE / BRW_MSIZE; i++) {
 			data = *(((__u64 *)addr) + i);
 			if (data != magic)
 				goto bad_data;
@@ -278,7 +278,7 @@ brw_client_prep_rpc(sfw_test_unit_t *tsu,
 		opc = breq->blk_opc;
 		flags = breq->blk_flags;
 		npg = breq->blk_npg;
-		len = npg * PAGE_CACHE_SIZE;
+		len = npg * PAGE_SIZE;
 
 	} else {
 		test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1;
@@ -292,7 +292,7 @@ brw_client_prep_rpc(sfw_test_unit_t *tsu,
 		opc = breq->blk_opc;
 		flags = breq->blk_flags;
 		len = breq->blk_len;
-		npg = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	}
 
 	rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc);
@@ -463,10 +463,10 @@ brw_server_handle(struct srpc_server_rpc *rpc)
 			reply->brw_status = EINVAL;
 			return 0;
 		}
-		npg = reqst->brw_len >> PAGE_CACHE_SHIFT;
+		npg = reqst->brw_len >> PAGE_SHIFT;
 
 	} else {
-		npg = (reqst->brw_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		npg = (reqst->brw_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	}
 
 	replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
diff --git a/drivers/staging/lustre/lnet/selftest/conctl.c b/drivers/staging/lustre/lnet/selftest/conctl.c
index 5c7cb72eac9a..79ee6c0bf7c1 100644
--- a/drivers/staging/lustre/lnet/selftest/conctl.c
+++ b/drivers/staging/lustre/lnet/selftest/conctl.c
@@ -743,7 +743,7 @@ static int lst_test_add_ioctl(lstio_test_args_t *args)
 	if (args->lstio_tes_param &&
 	    (args->lstio_tes_param_len <= 0 ||
 	     args->lstio_tes_param_len >
-	     PAGE_CACHE_SIZE - sizeof(lstcon_test_t)))
+	     PAGE_SIZE - sizeof(lstcon_test_t)))
 		return -EINVAL;
 
 	LIBCFS_ALLOC(batch_name, args->lstio_tes_bat_nmlen + 1);
@@ -819,7 +819,7 @@ lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr)
 
 	opc = data->ioc_u32[0];
 
-	if (data->ioc_plen1 > PAGE_CACHE_SIZE)
+	if (data->ioc_plen1 > PAGE_SIZE)
 		return -EINVAL;
 
 	LIBCFS_ALLOC(buf, data->ioc_plen1);
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
index bcd78888f9cc..35a227d0c657 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c
@@ -786,8 +786,8 @@ lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req)
 	test_bulk_req_t *brq = &req->tsr_u.bulk_v0;
 
 	brq->blk_opc = param->blk_opc;
-	brq->blk_npg = (param->blk_size + PAGE_CACHE_SIZE - 1) /
-			PAGE_CACHE_SIZE;
+	brq->blk_npg = (param->blk_size + PAGE_SIZE - 1) /
+			PAGE_SIZE;
 	brq->blk_flags = param->blk_flags;
 
 	return 0;
@@ -822,7 +822,7 @@ lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
 	if (transop == LST_TRANS_TSBCLIADD) {
 		npg = sfw_id_pages(test->tes_span);
 		nob = !(feats & LST_FEAT_BULK_LEN) ?
-		      npg * PAGE_CACHE_SIZE :
+		      npg * PAGE_SIZE :
 		      sizeof(lnet_process_id_packed_t) * test->tes_span;
 	}
 
@@ -851,8 +851,8 @@ lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
 			LASSERT(nob > 0);
 
 			len = !(feats & LST_FEAT_BULK_LEN) ?
-			      PAGE_CACHE_SIZE :
-			      min_t(int, nob, PAGE_CACHE_SIZE);
+			      PAGE_SIZE :
+			      min_t(int, nob, PAGE_SIZE);
 			nob -= len;
 
 			bulk->bk_iovs[i].kiov_offset = 0;
diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c
index 926c3970c498..e2c532399366 100644
--- a/drivers/staging/lustre/lnet/selftest/framework.c
+++ b/drivers/staging/lustre/lnet/selftest/framework.c
@@ -1161,7 +1161,7 @@ sfw_add_test(struct srpc_server_rpc *rpc)
 		int len;
 
 		if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
-			len = npg * PAGE_CACHE_SIZE;
+			len = npg * PAGE_SIZE;
 
 		} else {
 			len = sizeof(lnet_process_id_packed_t) *
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
index 69be7d6f48fa..7d7748d96332 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -90,7 +90,7 @@ void srpc_set_counters(const srpc_counters_t *cnt)
 static int
 srpc_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i, int nob)
 {
-	nob = min_t(int, nob, PAGE_CACHE_SIZE);
+	nob = min_t(int, nob, PAGE_SIZE);
 
 	LASSERT(nob > 0);
 	LASSERT(i >= 0 && i < bk->bk_niov);
diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h
index 288522d4d7b9..5321ddec9580 100644
--- a/drivers/staging/lustre/lnet/selftest/selftest.h
+++ b/drivers/staging/lustre/lnet/selftest/selftest.h
@@ -393,7 +393,7 @@ typedef struct sfw_test_instance {
 /* XXX: trailing (PAGE_CACHE_SIZE % sizeof(lnet_process_id_t)) bytes at
  * the end of pages are not used */
 #define SFW_MAX_CONCUR	   LST_MAX_CONCUR
-#define SFW_ID_PER_PAGE    (PAGE_CACHE_SIZE / sizeof(lnet_process_id_packed_t))
+#define SFW_ID_PER_PAGE    (PAGE_SIZE / sizeof(lnet_process_id_packed_t))
 #define SFW_MAX_NDESTS	   (LNET_MAX_IOV * SFW_ID_PER_PAGE)
 #define sfw_id_pages(n)    (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE)
 
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
index 33e0b99e1fb4..c6c7f54637fb 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
@@ -52,7 +52,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
 		return;
 
 	if (PagePrivate(page))
-		page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
 
 	cancel_dirty_page(page);
 	ClearPageMappedToDisk(page);
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index da8bc6eadd13..1e2ebe5cc998 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -1031,7 +1031,7 @@ static inline int lu_dirent_size(struct lu_dirent *ent)
 #define LU_PAGE_SIZE  (1UL << LU_PAGE_SHIFT)
 #define LU_PAGE_MASK  (~(LU_PAGE_SIZE - 1))
 
-#define LU_PAGE_COUNT (1 << (PAGE_CACHE_SHIFT - LU_PAGE_SHIFT))
+#define LU_PAGE_COUNT (1 << (PAGE_SHIFT - LU_PAGE_SHIFT))
 
 /** @} lu_dir */
 
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
index df94f9f3bef2..af77eb359c43 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mdc.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h
@@ -155,12 +155,12 @@ static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
 		if (cli->cl_max_mds_easize < body->max_mdsize) {
 			cli->cl_max_mds_easize = body->max_mdsize;
 			cli->cl_default_mds_easize =
-			    min_t(__u32, body->max_mdsize, PAGE_CACHE_SIZE);
+			    min_t(__u32, body->max_mdsize, PAGE_SIZE);
 		}
 		if (cli->cl_max_mds_cookiesize < body->max_cookiesize) {
 			cli->cl_max_mds_cookiesize = body->max_cookiesize;
 			cli->cl_default_mds_cookiesize =
-			    min_t(__u32, body->max_cookiesize, PAGE_CACHE_SIZE);
+			    min_t(__u32, body->max_cookiesize, PAGE_SIZE);
 		}
 	}
 }
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index 4fa1a18b7d15..a5e9095fbf36 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -99,13 +99,13 @@
  */
 #define PTLRPC_MAX_BRW_BITS	(LNET_MTU_BITS + PTLRPC_BULK_OPS_BITS)
 #define PTLRPC_MAX_BRW_SIZE	(1 << PTLRPC_MAX_BRW_BITS)
-#define PTLRPC_MAX_BRW_PAGES	(PTLRPC_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define PTLRPC_MAX_BRW_PAGES	(PTLRPC_MAX_BRW_SIZE >> PAGE_SHIFT)
 
 #define ONE_MB_BRW_SIZE		(1 << LNET_MTU_BITS)
 #define MD_MAX_BRW_SIZE		(1 << LNET_MTU_BITS)
-#define MD_MAX_BRW_PAGES	(MD_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define MD_MAX_BRW_PAGES	(MD_MAX_BRW_SIZE >> PAGE_SHIFT)
 #define DT_MAX_BRW_SIZE		PTLRPC_MAX_BRW_SIZE
-#define DT_MAX_BRW_PAGES	(DT_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define DT_MAX_BRW_PAGES	(DT_MAX_BRW_SIZE >> PAGE_SHIFT)
 #define OFD_MAX_BRW_SIZE	(1 << LNET_MTU_BITS)
 
 /* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index 4a0f2e8b19f6..f4167db65b5d 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -1318,7 +1318,7 @@ bad_format:
 
 static inline int cli_brw_size(struct obd_device *obd)
 {
-	return obd->u.cli.cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+	return obd->u.cli.cl_max_pages_per_rpc << PAGE_SHIFT;
 }
 
 #endif /* __OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index 225262fa67b6..f8ee3a3254ba 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -500,7 +500,7 @@ extern char obd_jobid_var[];
 
 #ifdef POISON_BULK
 #define POISON_PAGE(page, val) do {		  \
-	memset(kmap(page), val, PAGE_CACHE_SIZE); \
+	memset(kmap(page), val, PAGE_SIZE); \
 	kunmap(page);				  \
 } while (0)
 #else
diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
index aced41ab93a1..96141d17d07f 100644
--- a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
@@ -758,9 +758,9 @@ int ccc_prep_size(const struct lu_env *env, struct cl_object *obj,
 				 * --bug 17336
 				 */
 				loff_t size = cl_isize_read(inode);
-				loff_t cur_index = start >> PAGE_CACHE_SHIFT;
+				loff_t cur_index = start >> PAGE_SHIFT;
 				loff_t size_index = (size - 1) >>
-						    PAGE_CACHE_SHIFT;
+						    PAGE_SHIFT;
 
 				if ((size == 0 && cur_index != 0) ||
 				    size_index < cur_index)
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
index b586d5a88d00..7dd7df59aa1f 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
@@ -307,8 +307,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	cli->cl_avail_grant = 0;
 	/* FIXME: Should limit this for the sum of all cl_dirty_max. */
 	cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024;
-	if (cli->cl_dirty_max >> PAGE_CACHE_SHIFT > totalram_pages / 8)
-		cli->cl_dirty_max = totalram_pages << (PAGE_CACHE_SHIFT - 3);
+	if (cli->cl_dirty_max >> PAGE_SHIFT > totalram_pages / 8)
+		cli->cl_dirty_max = totalram_pages << (PAGE_SHIFT - 3);
 	INIT_LIST_HEAD(&cli->cl_cache_waiters);
 	INIT_LIST_HEAD(&cli->cl_loi_ready_list);
 	INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
@@ -353,15 +353,15 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	 * In the future this should likely be increased. LU-1431
 	 */
 	cli->cl_max_pages_per_rpc = min_t(int, PTLRPC_MAX_BRW_PAGES,
-					  LNET_MTU >> PAGE_CACHE_SHIFT);
+					  LNET_MTU >> PAGE_SHIFT);
 
 	if (!strcmp(name, LUSTRE_MDC_NAME)) {
 		cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT;
-	} else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 128 /* MB */) {
+	} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 128 /* MB */) {
 		cli->cl_max_rpcs_in_flight = 2;
-	} else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 256 /* MB */) {
+	} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 256 /* MB */) {
 		cli->cl_max_rpcs_in_flight = 3;
-	} else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 512 /* MB */) {
+	} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 512 /* MB */) {
 		cli->cl_max_rpcs_in_flight = 4;
 	} else {
 		cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT;
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
index 3e937b050203..b913ba9cf97c 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
@@ -107,7 +107,7 @@
 /*
  * 50 ldlm locks for 1MB of RAM.
  */
-#define LDLM_POOL_HOST_L ((NUM_CACHEPAGES >> (20 - PAGE_CACHE_SHIFT)) * 50)
+#define LDLM_POOL_HOST_L ((NUM_CACHEPAGES >> (20 - PAGE_SHIFT)) * 50)
 
 /*
  * Maximal possible grant step plan in %.
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
index c7904a96f9af..74e193e52cd6 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
@@ -546,7 +546,7 @@ static inline int ldlm_req_handles_avail(int req_size, int off)
 {
 	int avail;
 
-	avail = min_t(int, LDLM_MAXREQSIZE, PAGE_CACHE_SIZE - 512) - req_size;
+	avail = min_t(int, LDLM_MAXREQSIZE, PAGE_SIZE - 512) - req_size;
 	if (likely(avail >= 0))
 		avail /= (int)sizeof(struct lustre_handle);
 	else
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index 4e0a3e583330..a7c02e07fd75 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -153,7 +153,7 @@ static int ll_dir_filler(void *_hash, struct page *page0)
 	struct page **page_pool;
 	struct page *page;
 	struct lu_dirpage *dp;
-	int max_pages = ll_i2sbi(inode)->ll_md_brw_size >> PAGE_CACHE_SHIFT;
+	int max_pages = ll_i2sbi(inode)->ll_md_brw_size >> PAGE_SHIFT;
 	int nrdpgs = 0; /* number of pages read actually */
 	int npages;
 	int i;
@@ -193,8 +193,8 @@ static int ll_dir_filler(void *_hash, struct page *page0)
 		if (body->valid & OBD_MD_FLSIZE)
 			cl_isize_write(inode, body->size);
 
-		nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_CACHE_SIZE-1)
-			 >> PAGE_CACHE_SHIFT;
+		nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_SIZE-1)
+			 >> PAGE_SHIFT;
 		SetPageUptodate(page0);
 	}
 	unlock_page(page0);
@@ -209,7 +209,7 @@ static int ll_dir_filler(void *_hash, struct page *page0)
 		page = page_pool[i];
 
 		if (rc < 0 || i >= nrdpgs) {
-			page_cache_release(page);
+			put_page(page);
 			continue;
 		}
 
@@ -230,7 +230,7 @@ static int ll_dir_filler(void *_hash, struct page *page0)
 			CDEBUG(D_VFSTRACE, "page %lu add to page cache failed: %d\n",
 			       offset, ret);
 		}
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	if (page_pool != &page0)
@@ -247,7 +247,7 @@ void ll_release_page(struct page *page, int remove)
 			truncate_complete_page(page->mapping, page);
 		unlock_page(page);
 	}
-	page_cache_release(page);
+	put_page(page);
 }
 
 /*
@@ -273,7 +273,7 @@ static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
 	if (found > 0 && !radix_tree_exceptional_entry(page)) {
 		struct lu_dirpage *dp;
 
-		page_cache_get(page);
+		get_page(page);
 		spin_unlock_irq(&mapping->tree_lock);
 		/*
 		 * In contrast to find_lock_page() we are sure that directory
@@ -313,7 +313,7 @@ static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
 				page = NULL;
 			}
 		} else {
-			page_cache_release(page);
+			put_page(page);
 			page = ERR_PTR(-EIO);
 		}
 
@@ -1507,7 +1507,7 @@ skip_lmm:
 			st.st_gid     = body->gid;
 			st.st_rdev    = body->rdev;
 			st.st_size    = body->size;
-			st.st_blksize = PAGE_CACHE_SIZE;
+			st.st_blksize = PAGE_SIZE;
 			st.st_blocks  = body->blocks;
 			st.st_atime   = body->atime;
 			st.st_mtime   = body->mtime;
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 3e1572cb457b..e3c0f1dd4d31 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -310,10 +310,10 @@ static inline struct ll_inode_info *ll_i2info(struct inode *inode)
 /* default to about 40meg of readahead on a given system.  That much tied
  * up in 512k readahead requests serviced at 40ms each is about 1GB/s.
  */
-#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_CACHE_SHIFT))
+#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_SHIFT))
 
 /* default to read-ahead full files smaller than 2MB on the second read */
-#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_CACHE_SHIFT))
+#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_SHIFT))
 
 enum ra_stat {
 	RA_STAT_HIT = 0,
@@ -975,13 +975,13 @@ struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
 static inline void ll_invalidate_page(struct page *vmpage)
 {
 	struct address_space *mapping = vmpage->mapping;
-	loff_t offset = vmpage->index << PAGE_CACHE_SHIFT;
+	loff_t offset = vmpage->index << PAGE_SHIFT;
 
 	LASSERT(PageLocked(vmpage));
 	if (!mapping)
 		return;
 
-	ll_teardown_mmaps(mapping, offset, offset + PAGE_CACHE_SIZE);
+	ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE);
 	truncate_complete_page(mapping, vmpage);
 }
 
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 6d6bb33e3655..b57a992688a8 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -85,7 +85,7 @@ static struct ll_sb_info *ll_init_sbi(struct super_block *sb)
 
 	si_meminfo(&si);
 	pages = si.totalram - si.totalhigh;
-	if (pages >> (20 - PAGE_CACHE_SHIFT) < 512)
+	if (pages >> (20 - PAGE_SHIFT) < 512)
 		lru_page_max = pages / 2;
 	else
 		lru_page_max = (pages / 4) * 3;
@@ -272,12 +272,12 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	    valid != CLIENT_CONNECT_MDT_REQD) {
 		char *buf;
 
-		buf = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+		buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
 		if (!buf) {
 			err = -ENOMEM;
 			goto out_md_fid;
 		}
-		obd_connect_flags2str(buf, PAGE_CACHE_SIZE,
+		obd_connect_flags2str(buf, PAGE_SIZE,
 				      valid ^ CLIENT_CONNECT_MDT_REQD, ",");
 		LCONSOLE_ERROR_MSG(0x170, "Server %s does not support feature(s) needed for correct operation of this client (%s). Please upgrade server or downgrade client.\n",
 				   sbi->ll_md_exp->exp_obd->obd_name, buf);
@@ -335,7 +335,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
 		sbi->ll_md_brw_size = data->ocd_brw_size;
 	else
-		sbi->ll_md_brw_size = PAGE_CACHE_SIZE;
+		sbi->ll_md_brw_size = PAGE_SIZE;
 
 	if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) {
 		LCONSOLE_INFO("Layout lock feature supported.\n");
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index 69445a9f2011..5b484e62ffd0 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -58,7 +58,7 @@ void policy_from_vma(ldlm_policy_data_t *policy,
 		     size_t count)
 {
 	policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) +
-				 (vma->vm_pgoff << PAGE_CACHE_SHIFT);
+				 (vma->vm_pgoff << PAGE_SHIFT);
 	policy->l_extent.end = (policy->l_extent.start + count - 1) |
 			       ~CFS_PAGE_MASK;
 }
@@ -321,7 +321,7 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 		vmpage = vio->u.fault.ft_vmpage;
 		if (result != 0 && vmpage) {
-			page_cache_release(vmpage);
+			put_page(vmpage);
 			vmf->page = NULL;
 		}
 	}
@@ -360,7 +360,7 @@ restart:
 		lock_page(vmpage);
 		if (unlikely(!vmpage->mapping)) { /* unlucky */
 			unlock_page(vmpage);
-			page_cache_release(vmpage);
+			put_page(vmpage);
 			vmf->page = NULL;
 
 			if (!printed && ++count > 16) {
@@ -457,7 +457,7 @@ int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last)
 	LASSERTF(last > first, "last %llu first %llu\n", last, first);
 	if (mapping_mapped(mapping)) {
 		rc = 0;
-		unmap_mapping_range(mapping, first + PAGE_CACHE_SIZE - 1,
+		unmap_mapping_range(mapping, first + PAGE_SIZE - 1,
 				    last - first + 1, 0);
 	}
 
diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
index b725fc16cf49..f169c0db63b4 100644
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ b/drivers/staging/lustre/lustre/llite/lloop.c
@@ -218,7 +218,7 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
 		offset = (pgoff_t)(bio->bi_iter.bi_sector << 9) + lo->lo_offset;
 		bio_for_each_segment(bvec, bio, iter) {
 			BUG_ON(bvec.bv_offset != 0);
-			BUG_ON(bvec.bv_len != PAGE_CACHE_SIZE);
+			BUG_ON(bvec.bv_len != PAGE_SIZE);
 
 			pages[page_count] = bvec.bv_page;
 			offsets[page_count] = offset;
@@ -232,7 +232,7 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
 			(rw == WRITE) ? LPROC_LL_BRW_WRITE : LPROC_LL_BRW_READ,
 			page_count);
 
-	pvec->ldp_size = page_count << PAGE_CACHE_SHIFT;
+	pvec->ldp_size = page_count << PAGE_SHIFT;
 	pvec->ldp_nr = page_count;
 
 	/* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to
@@ -507,7 +507,7 @@ static int loop_set_fd(struct lloop_device *lo, struct file *unused,
 
 	set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
 
-	lo->lo_blocksize = PAGE_CACHE_SIZE;
+	lo->lo_blocksize = PAGE_SIZE;
 	lo->lo_device = bdev;
 	lo->lo_flags = lo_flags;
 	lo->lo_backing_file = file;
@@ -525,11 +525,11 @@ static int loop_set_fd(struct lloop_device *lo, struct file *unused,
 	lo->lo_queue->queuedata = lo;
 
 	/* queue parameters */
-	CLASSERT(PAGE_CACHE_SIZE < (1 << (sizeof(unsigned short) * 8)));
+	CLASSERT(PAGE_SIZE < (1 << (sizeof(unsigned short) * 8)));
 	blk_queue_logical_block_size(lo->lo_queue,
-				     (unsigned short)PAGE_CACHE_SIZE);
+				     (unsigned short)PAGE_SIZE);
 	blk_queue_max_hw_sectors(lo->lo_queue,
-				 LLOOP_MAX_SEGMENTS << (PAGE_CACHE_SHIFT - 9));
+				 LLOOP_MAX_SEGMENTS << (PAGE_SHIFT - 9));
 	blk_queue_max_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS);
 
 	set_capacity(disks[lo->lo_number], size);
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
index 45941a6600fe..27ab1261400e 100644
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c
@@ -233,7 +233,7 @@ static ssize_t max_read_ahead_mb_show(struct kobject *kobj,
 	pages_number = sbi->ll_ra_info.ra_max_pages;
 	spin_unlock(&sbi->ll_lock);
 
-	mult = 1 << (20 - PAGE_CACHE_SHIFT);
+	mult = 1 << (20 - PAGE_SHIFT);
 	return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
 }
 
@@ -251,12 +251,12 @@ static ssize_t max_read_ahead_mb_store(struct kobject *kobj,
 	if (rc)
 		return rc;
 
-	pages_number *= 1 << (20 - PAGE_CACHE_SHIFT); /* MB -> pages */
+	pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */
 
 	if (pages_number > totalram_pages / 2) {
 
 		CERROR("can't set file readahead more than %lu MB\n",
-		       totalram_pages >> (20 - PAGE_CACHE_SHIFT + 1)); /*1/2 of RAM*/
+		       totalram_pages >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/
 		return -ERANGE;
 	}
 
@@ -281,7 +281,7 @@ static ssize_t max_read_ahead_per_file_mb_show(struct kobject *kobj,
 	pages_number = sbi->ll_ra_info.ra_max_pages_per_file;
 	spin_unlock(&sbi->ll_lock);
 
-	mult = 1 << (20 - PAGE_CACHE_SHIFT);
+	mult = 1 << (20 - PAGE_SHIFT);
 	return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
 }
 
@@ -326,7 +326,7 @@ static ssize_t max_read_ahead_whole_mb_show(struct kobject *kobj,
 	pages_number = sbi->ll_ra_info.ra_max_read_ahead_whole_pages;
 	spin_unlock(&sbi->ll_lock);
 
-	mult = 1 << (20 - PAGE_CACHE_SHIFT);
+	mult = 1 << (20 - PAGE_SHIFT);
 	return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
 }
 
@@ -349,7 +349,7 @@ static ssize_t max_read_ahead_whole_mb_store(struct kobject *kobj,
 	 */
 	if (pages_number > sbi->ll_ra_info.ra_max_pages_per_file) {
 		CERROR("can't set max_read_ahead_whole_mb more than max_read_ahead_per_file_mb: %lu\n",
-		       sbi->ll_ra_info.ra_max_pages_per_file >> (20 - PAGE_CACHE_SHIFT));
+		       sbi->ll_ra_info.ra_max_pages_per_file >> (20 - PAGE_SHIFT));
 		return -ERANGE;
 	}
 
@@ -366,7 +366,7 @@ static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v)
 	struct super_block     *sb    = m->private;
 	struct ll_sb_info      *sbi   = ll_s2sbi(sb);
 	struct cl_client_cache *cache = &sbi->ll_cache;
-	int shift = 20 - PAGE_CACHE_SHIFT;
+	int shift = 20 - PAGE_SHIFT;
 	int max_cached_mb;
 	int unused_mb;
 
@@ -405,7 +405,7 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 		return -EFAULT;
 	kernbuf[count] = 0;
 
-	mult = 1 << (20 - PAGE_CACHE_SHIFT);
+	mult = 1 << (20 - PAGE_SHIFT);
 	buffer += lprocfs_find_named_value(kernbuf, "max_cached_mb:", &count) -
 		  kernbuf;
 	rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
@@ -415,7 +415,7 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 	if (pages_number < 0 || pages_number > totalram_pages) {
 		CERROR("%s: can't set max cache more than %lu MB\n",
 		       ll_get_fsname(sb, NULL, 0),
-		       totalram_pages >> (20 - PAGE_CACHE_SHIFT));
+		       totalram_pages >> (20 - PAGE_SHIFT));
 		return -ERANGE;
 	}
 
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index 34614acf3f8e..4c7250ab54e6 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -146,10 +146,10 @@ static struct ll_cl_context *ll_cl_init(struct file *file,
 		 */
 		io->ci_lockreq = CILR_NEVER;
 
-		pos = vmpage->index << PAGE_CACHE_SHIFT;
+		pos = vmpage->index << PAGE_SHIFT;
 
 		/* Create a temp IO to serve write. */
-		result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_CACHE_SIZE);
+		result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_SIZE);
 		if (result == 0) {
 			cio->cui_fd = LUSTRE_FPRIVATE(file);
 			cio->cui_iter = NULL;
@@ -498,7 +498,7 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
 		}
 		if (rc != 1)
 			unlock_page(vmpage);
-		page_cache_release(vmpage);
+		put_page(vmpage);
 	} else {
 		which = RA_STAT_FAILED_GRAB_PAGE;
 		msg   = "g_c_p_n failed";
@@ -527,7 +527,7 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
  * and max_read_ahead_per_file_mb otherwise the readahead budget can be used
  * up quickly which will affect read performance significantly. See LU-2816
  */
-#define RAS_INCREASE_STEP(inode) (ONE_MB_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define RAS_INCREASE_STEP(inode) (ONE_MB_BRW_SIZE >> PAGE_SHIFT)
 
 static inline int stride_io_mode(struct ll_readahead_state *ras)
 {
@@ -739,7 +739,7 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
 			end = rpc_boundary;
 
 		/* Truncate RA window to end of file */
-		end = min(end, (unsigned long)((kms - 1) >> PAGE_CACHE_SHIFT));
+		end = min(end, (unsigned long)((kms - 1) >> PAGE_SHIFT));
 
 		ras->ras_next_readahead = max(end, end + 1);
 		RAS_CDEBUG(ras);
@@ -776,7 +776,7 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
 	if (reserved != 0)
 		ll_ra_count_put(ll_i2sbi(inode), reserved);
 
-	if (ra_end == end + 1 && ra_end == (kms >> PAGE_CACHE_SHIFT))
+	if (ra_end == end + 1 && ra_end == (kms >> PAGE_SHIFT))
 		ll_ra_stats_inc(mapping, RA_STAT_EOF);
 
 	/* if we didn't get to the end of the region we reserved from
@@ -985,8 +985,8 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
 	if (ras->ras_requests == 2 && !ras->ras_request_index) {
 		__u64 kms_pages;
 
-		kms_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
-			    PAGE_CACHE_SHIFT;
+		kms_pages = (i_size_read(inode) + PAGE_SIZE - 1) >>
+			    PAGE_SHIFT;
 
 		CDEBUG(D_READA, "kmsp %llu mwp %lu mp %lu\n", kms_pages,
 		       ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages_per_file);
@@ -1173,7 +1173,7 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc)
 		 * PageWriteback or clean the page.
 		 */
 		result = cl_sync_file_range(inode, offset,
-					    offset + PAGE_CACHE_SIZE - 1,
+					    offset + PAGE_SIZE - 1,
 					    CL_FSYNC_LOCAL, 1);
 		if (result > 0) {
 			/* actually we may have written more than one page.
@@ -1211,7 +1211,7 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	int ignore_layout = 0;
 
 	if (wbc->range_cyclic) {
-		start = mapping->writeback_index << PAGE_CACHE_SHIFT;
+		start = mapping->writeback_index << PAGE_SHIFT;
 		end = OBD_OBJECT_EOF;
 	} else {
 		start = wbc->range_start;
@@ -1241,7 +1241,7 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) {
 		if (end == OBD_OBJECT_EOF)
 			end = i_size_read(inode);
-		mapping->writeback_index = (end >> PAGE_CACHE_SHIFT) + 1;
+		mapping->writeback_index = (end >> PAGE_SHIFT) + 1;
 	}
 	return result;
 }
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 7a5db67bc680..69aa15e8e3ef 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -87,7 +87,7 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
 	 * below because they are run with page locked and all our io is
 	 * happening with locked page too
 	 */
-	if (offset == 0 && length == PAGE_CACHE_SIZE) {
+	if (offset == 0 && length == PAGE_SIZE) {
 		env = cl_env_get(&refcheck);
 		if (!IS_ERR(env)) {
 			inode = vmpage->mapping->host;
@@ -193,8 +193,8 @@ static inline int ll_get_user_pages(int rw, unsigned long user_addr,
 		return -EFBIG;
 	}
 
-	*max_pages = (user_addr + size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	*max_pages -= user_addr >> PAGE_CACHE_SHIFT;
+	*max_pages = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	*max_pages -= user_addr >> PAGE_SHIFT;
 
 	*pages = libcfs_kvzalloc(*max_pages * sizeof(**pages), GFP_NOFS);
 	if (*pages) {
@@ -217,7 +217,7 @@ static void ll_free_user_pages(struct page **pages, int npages, int do_dirty)
 	for (i = 0; i < npages; i++) {
 		if (do_dirty)
 			set_page_dirty_lock(pages[i]);
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 	}
 	kvfree(pages);
 }
@@ -357,7 +357,7 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
  * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc.
  */
 #define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) *	  \
-		       PAGE_CACHE_SIZE) & ~(DT_MAX_BRW_SIZE - 1))
+		       PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1))
 static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 			       loff_t file_offset)
 {
@@ -382,8 +382,8 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 	CDEBUG(D_VFSTRACE,
 	       "VFS Op:inode=%lu/%u(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n",
 	       inode->i_ino, inode->i_generation, inode, count, MAX_DIO_SIZE,
-	       file_offset, file_offset, count >> PAGE_CACHE_SHIFT,
-	       MAX_DIO_SIZE >> PAGE_CACHE_SHIFT);
+	       file_offset, file_offset, count >> PAGE_SHIFT,
+	       MAX_DIO_SIZE >> PAGE_SHIFT);
 
 	/* Check that all user buffers are aligned as well */
 	if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK)
@@ -432,8 +432,8 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 			 * page worth of page pointers = 4MB on i386.
 			 */
 			if (result == -ENOMEM &&
-			    size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
-				   PAGE_CACHE_SIZE) {
+			    size > (PAGE_SIZE / sizeof(*pages)) *
+			    PAGE_SIZE) {
 				size = ((((size / 2) - 1) |
 					 ~CFS_PAGE_MASK) + 1) &
 					CFS_PAGE_MASK;
@@ -474,10 +474,10 @@ static int ll_write_begin(struct file *file, struct address_space *mapping,
 			  loff_t pos, unsigned len, unsigned flags,
 			  struct page **pagep, void **fsdata)
 {
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	struct page *page;
 	int rc;
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from = pos & (PAGE_SIZE - 1);
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
@@ -488,7 +488,7 @@ static int ll_write_begin(struct file *file, struct address_space *mapping,
 	rc = ll_prepare_write(file, page, from, from + len);
 	if (rc) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return rc;
 }
@@ -497,12 +497,12 @@ static int ll_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata)
 {
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from = pos & (PAGE_SIZE - 1);
 	int rc;
 
 	rc = ll_commit_write(file, page, from, from + copied);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return rc ?: copied;
 }
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index fb0c26ee7ff3..75d4df71cab8 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -514,7 +514,7 @@ static int vvp_io_read_start(const struct lu_env *env,
 		/*
 		 * XXX: explicit PAGE_CACHE_SIZE
 		 */
-		bead->lrr_count = cl_index(obj, tot + PAGE_CACHE_SIZE - 1);
+		bead->lrr_count = cl_index(obj, tot + PAGE_SIZE - 1);
 		ll_ra_read_in(file, bead);
 	}
 
@@ -959,7 +959,7 @@ static int vvp_io_prepare_write(const struct lu_env *env,
 		 * We're completely overwriting an existing page, so _don't_
 		 * set it up to date until commit_write
 		 */
-		if (from == 0 && to == PAGE_CACHE_SIZE) {
+		if (from == 0 && to == PAGE_SIZE) {
 			CL_PAGE_HEADER(D_PAGE, env, pg, "full page write\n");
 			POISON_PAGE(page, 0x11);
 		} else
@@ -1022,7 +1022,7 @@ static int vvp_io_commit_write(const struct lu_env *env,
 			set_page_dirty(vmpage);
 			vvp_write_pending(cl2ccc(obj), cp);
 		} else if (result == -EDQUOT) {
-			pgoff_t last_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+			pgoff_t last_index = i_size_read(inode) >> PAGE_SHIFT;
 			bool need_clip = true;
 
 			/*
@@ -1040,7 +1040,7 @@ static int vvp_io_commit_write(const struct lu_env *env,
 			 * being.
 			 */
 			if (last_index > pg->cp_index) {
-				to = PAGE_CACHE_SIZE;
+				to = PAGE_SIZE;
 				need_clip = false;
 			} else if (last_index == pg->cp_index) {
 				int size_to = i_size_read(inode) & ~CFS_PAGE_MASK;
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
index 850bae734075..33ca3eb34965 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_page.c
@@ -57,7 +57,7 @@ static void vvp_page_fini_common(struct ccc_page *cp)
 	struct page *vmpage = cp->cpg_page;
 
 	LASSERT(vmpage);
-	page_cache_release(vmpage);
+	put_page(vmpage);
 }
 
 static void vvp_page_fini(const struct lu_env *env,
@@ -164,12 +164,12 @@ static int vvp_page_unmap(const struct lu_env *env,
 	LASSERT(vmpage);
 	LASSERT(PageLocked(vmpage));
 
-	offset = vmpage->index << PAGE_CACHE_SHIFT;
+	offset = vmpage->index << PAGE_SHIFT;
 
 	/*
 	 * XXX is it safe to call this with the page lock held?
 	 */
-	ll_teardown_mmaps(vmpage->mapping, offset, offset + PAGE_CACHE_SIZE);
+	ll_teardown_mmaps(vmpage->mapping, offset, offset + PAGE_SIZE);
 	return 0;
 }
 
@@ -537,7 +537,7 @@ int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
 	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
 
 	cpg->cpg_page = vmpage;
-	page_cache_get(vmpage);
+	get_page(vmpage);
 
 	INIT_LIST_HEAD(&cpg->cpg_pending_linkage);
 	if (page->cp_type == CPT_CACHEABLE) {
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index 0f776cf8a5aa..ce7e8b70dd44 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -2129,8 +2129,8 @@ static int lmv_readpage(struct obd_export *exp, struct md_op_data *op_data,
 	if (rc != 0)
 		return rc;
 
-	ncfspgs = ((*request)->rq_bulk->bd_nob_transferred + PAGE_CACHE_SIZE - 1)
-		 >> PAGE_CACHE_SHIFT;
+	ncfspgs = ((*request)->rq_bulk->bd_nob_transferred + PAGE_SIZE - 1)
+		 >> PAGE_SHIFT;
 	nlupgs = (*request)->rq_bulk->bd_nob_transferred >> LU_PAGE_SHIFT;
 	LASSERT(!((*request)->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK));
 	LASSERT(ncfspgs > 0 && ncfspgs <= op_data->op_npages);
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index 55dd8ef9525b..b91d3ff18b02 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -1002,10 +1002,10 @@ restart_bulk:
 
 	/* NB req now owns desc and will free it when it gets freed */
 	for (i = 0; i < op_data->op_npages; i++)
-		ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_CACHE_SIZE);
+		ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_SIZE);
 
 	mdc_readdir_pack(req, op_data->op_offset,
-			 PAGE_CACHE_SIZE * op_data->op_npages,
+			 PAGE_SIZE * op_data->op_npages,
 			 &op_data->op_fid1);
 
 	ptlrpc_request_set_replen(req);
@@ -1037,7 +1037,7 @@ restart_bulk:
 	if (req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK) {
 		CERROR("Unexpected # bytes transferred: %d (%ld expected)\n",
 		       req->rq_bulk->bd_nob_transferred,
-		       PAGE_CACHE_SIZE * op_data->op_npages);
+		       PAGE_SIZE * op_data->op_npages);
 		ptlrpc_req_finished(req);
 		return -EPROTO;
 	}
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
index b7dc87248032..3924b095bfb0 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c
@@ -1113,7 +1113,7 @@ static int mgc_import_event(struct obd_device *obd,
 }
 
 enum {
-	CONFIG_READ_NRPAGES_INIT = 1 << (20 - PAGE_CACHE_SHIFT),
+	CONFIG_READ_NRPAGES_INIT = 1 << (20 - PAGE_SHIFT),
 	CONFIG_READ_NRPAGES      = 4
 };
 
@@ -1137,19 +1137,19 @@ static int mgc_apply_recover_logs(struct obd_device *mgc,
 	LASSERT(cfg->cfg_instance);
 	LASSERT(cfg->cfg_sb == cfg->cfg_instance);
 
-	inst = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+	inst = kzalloc(PAGE_SIZE, GFP_KERNEL);
 	if (!inst)
 		return -ENOMEM;
 
-	pos = snprintf(inst, PAGE_CACHE_SIZE, "%p", cfg->cfg_instance);
-	if (pos >= PAGE_CACHE_SIZE) {
+	pos = snprintf(inst, PAGE_SIZE, "%p", cfg->cfg_instance);
+	if (pos >= PAGE_SIZE) {
 		kfree(inst);
 		return -E2BIG;
 	}
 
 	++pos;
 	buf   = inst + pos;
-	bufsz = PAGE_CACHE_SIZE - pos;
+	bufsz = PAGE_SIZE - pos;
 
 	while (datalen > 0) {
 		int   entry_len = sizeof(*entry);
@@ -1181,7 +1181,7 @@ static int mgc_apply_recover_logs(struct obd_device *mgc,
 		/* Keep this swab for normal mixed endian handling. LU-1644 */
 		if (mne_swab)
 			lustre_swab_mgs_nidtbl_entry(entry);
-		if (entry->mne_length > PAGE_CACHE_SIZE) {
+		if (entry->mne_length > PAGE_SIZE) {
 			CERROR("MNE too large (%u)\n", entry->mne_length);
 			break;
 		}
@@ -1371,7 +1371,7 @@ again:
 	}
 	body->mcb_offset = cfg->cfg_last_idx + 1;
 	body->mcb_type   = cld->cld_type;
-	body->mcb_bits   = PAGE_CACHE_SHIFT;
+	body->mcb_bits   = PAGE_SHIFT;
 	body->mcb_units  = nrpages;
 
 	/* allocate bulk transfer descriptor */
@@ -1383,7 +1383,7 @@ again:
 	}
 
 	for (i = 0; i < nrpages; i++)
-		ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_CACHE_SIZE);
+		ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_SIZE);
 
 	ptlrpc_request_set_replen(req);
 	rc = ptlrpc_queue_wait(req);
@@ -1411,7 +1411,7 @@ again:
 		goto out;
 	}
 
-	if (ealen > nrpages << PAGE_CACHE_SHIFT) {
+	if (ealen > nrpages << PAGE_SHIFT) {
 		rc = -EINVAL;
 		goto out;
 	}
@@ -1439,7 +1439,7 @@ again:
 
 		ptr = kmap(pages[i]);
 		rc2 = mgc_apply_recover_logs(obd, cld, res->mcr_offset, ptr,
-					     min_t(int, ealen, PAGE_CACHE_SIZE),
+					     min_t(int, ealen, PAGE_SIZE),
 					     mne_swab);
 		kunmap(pages[i]);
 		if (rc2 < 0) {
@@ -1448,7 +1448,7 @@ again:
 			break;
 		}
 
-		ealen -= PAGE_CACHE_SIZE;
+		ealen -= PAGE_SIZE;
 	}
 
 out:
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c
index 231a2f26c693..394580016638 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_page.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c
@@ -1477,7 +1477,7 @@ loff_t cl_offset(const struct cl_object *obj, pgoff_t idx)
 	/*
 	 * XXX for now.
 	 */
-	return (loff_t)idx << PAGE_CACHE_SHIFT;
+	return (loff_t)idx << PAGE_SHIFT;
 }
 EXPORT_SYMBOL(cl_offset);
 
@@ -1489,13 +1489,13 @@ pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
 	/*
 	 * XXX for now.
 	 */
-	return offset >> PAGE_CACHE_SHIFT;
+	return offset >> PAGE_SHIFT;
 }
 EXPORT_SYMBOL(cl_index);
 
 int cl_page_size(const struct cl_object *obj)
 {
-	return 1 << PAGE_CACHE_SHIFT;
+	return 1 << PAGE_SHIFT;
 }
 EXPORT_SYMBOL(cl_page_size);
 
diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c
index 1a938e1376f9..c2cf015962dd 100644
--- a/drivers/staging/lustre/lustre/obdclass/class_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c
@@ -461,9 +461,9 @@ static int obd_init_checks(void)
 		CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len);
 		ret = -EINVAL;
 	}
-	if ((u64val & ~CFS_PAGE_MASK) >= PAGE_CACHE_SIZE) {
+	if ((u64val & ~CFS_PAGE_MASK) >= PAGE_SIZE) {
 		CWARN("mask failed: u64val %llu >= %llu\n", u64val,
-		      (__u64)PAGE_CACHE_SIZE);
+		      (__u64)PAGE_SIZE);
 		ret = -EINVAL;
 	}
 
@@ -509,7 +509,7 @@ static int __init obdclass_init(void)
 	 * For clients with less memory, a larger fraction is needed
 	 * for other purposes (mostly for BGL).
 	 */
-	if (totalram_pages <= 512 << (20 - PAGE_CACHE_SHIFT))
+	if (totalram_pages <= 512 << (20 - PAGE_SHIFT))
 		obd_max_dirty_pages = totalram_pages / 4;
 	else
 		obd_max_dirty_pages = totalram_pages / 2;
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
index 9496c09b2b69..4a2baaff3dc1 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
@@ -71,8 +71,8 @@ void obdo_refresh_inode(struct inode *dst, struct obdo *src, u32 valid)
 	if (valid & OBD_MD_FLBLKSZ && src->o_blksize > (1 << dst->i_blkbits))
 		dst->i_blkbits = ffs(src->o_blksize) - 1;
 
-	if (dst->i_blkbits < PAGE_CACHE_SHIFT)
-		dst->i_blkbits = PAGE_CACHE_SHIFT;
+	if (dst->i_blkbits < PAGE_SHIFT)
+		dst->i_blkbits = PAGE_SHIFT;
 
 	/* allocation of space */
 	if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks)
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
index fd333b9e968c..e6bf414a4444 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
@@ -100,7 +100,7 @@ static ssize_t max_dirty_mb_show(struct kobject *kobj, struct attribute *attr,
 				 char *buf)
 {
 	return sprintf(buf, "%ul\n",
-			obd_max_dirty_pages / (1 << (20 - PAGE_CACHE_SHIFT)));
+			obd_max_dirty_pages / (1 << (20 - PAGE_SHIFT)));
 }
 
 static ssize_t max_dirty_mb_store(struct kobject *kobj, struct attribute *attr,
@@ -113,14 +113,14 @@ static ssize_t max_dirty_mb_store(struct kobject *kobj, struct attribute *attr,
 	if (rc)
 		return rc;
 
-	val *= 1 << (20 - PAGE_CACHE_SHIFT); /* convert to pages */
+	val *= 1 << (20 - PAGE_SHIFT); /* convert to pages */
 
 	if (val > ((totalram_pages / 10) * 9)) {
 		/* Somebody wants to assign too much memory to dirty pages */
 		return -EINVAL;
 	}
 
-	if (val < 4 << (20 - PAGE_CACHE_SHIFT)) {
+	if (val < 4 << (20 - PAGE_SHIFT)) {
 		/* Less than 4 Mb for dirty cache is also bad */
 		return -EINVAL;
 	}
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 65a4746c89ca..978568ada8e9 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -840,8 +840,8 @@ static int lu_htable_order(void)
 
 #if BITS_PER_LONG == 32
 	/* limit hashtable size for lowmem systems to low RAM */
-	if (cache_size > 1 << (30 - PAGE_CACHE_SHIFT))
-		cache_size = 1 << (30 - PAGE_CACHE_SHIFT) * 3 / 4;
+	if (cache_size > 1 << (30 - PAGE_SHIFT))
+		cache_size = 1 << (30 - PAGE_SHIFT) * 3 / 4;
 #endif
 
 	/* clear off unreasonable cache setting. */
@@ -853,7 +853,7 @@ static int lu_htable_order(void)
 		lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
 	}
 	cache_size = cache_size / 100 * lu_cache_percent *
-		(PAGE_CACHE_SIZE / 1024);
+		(PAGE_SIZE / 1024);
 
 	for (bits = 1; (1 << bits) < cache_size; ++bits) {
 		;
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index 64ffe243f870..1e83669c204d 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -278,7 +278,7 @@ static void echo_page_fini(const struct lu_env *env,
 	struct page *vmpage      = ep->ep_vmpage;
 
 	atomic_dec(&eco->eo_npages);
-	page_cache_release(vmpage);
+	put_page(vmpage);
 }
 
 static int echo_page_prep(const struct lu_env *env,
@@ -373,7 +373,7 @@ static int echo_page_init(const struct lu_env *env, struct cl_object *obj,
 	struct echo_object *eco = cl2echo_obj(obj);
 
 	ep->ep_vmpage = vmpage;
-	page_cache_get(vmpage);
+	get_page(vmpage);
 	mutex_init(&ep->ep_lock);
 	cl_page_slice_add(page, &ep->ep_cl, obj, &echo_page_ops);
 	atomic_inc(&eco->eo_npages);
@@ -1138,7 +1138,7 @@ static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
 	LASSERT(rc == 0);
 
 	rc = cl_echo_enqueue0(env, eco, offset,
-			      offset + npages * PAGE_CACHE_SIZE - 1,
+			      offset + npages * PAGE_SIZE - 1,
 			      rw == READ ? LCK_PR : LCK_PW, &lh.cookie,
 			      CEF_NEVER);
 	if (rc < 0)
@@ -1311,11 +1311,11 @@ echo_client_page_debug_setup(struct page *page, int rw, u64 id,
 	int      delta;
 
 	/* no partial pages on the client */
-	LASSERT(count == PAGE_CACHE_SIZE);
+	LASSERT(count == PAGE_SIZE);
 
 	addr = kmap(page);
 
-	for (delta = 0; delta < PAGE_CACHE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
+	for (delta = 0; delta < PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
 		if (rw == OBD_BRW_WRITE) {
 			stripe_off = offset + delta;
 			stripe_id = id;
@@ -1341,11 +1341,11 @@ static int echo_client_page_debug_check(struct page *page, u64 id,
 	int     rc2;
 
 	/* no partial pages on the client */
-	LASSERT(count == PAGE_CACHE_SIZE);
+	LASSERT(count == PAGE_SIZE);
 
 	addr = kmap(page);
 
-	for (rc = delta = 0; delta < PAGE_CACHE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
+	for (rc = delta = 0; delta < PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
 		stripe_off = offset + delta;
 		stripe_id = id;
 
@@ -1391,7 +1391,7 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
 		return -EINVAL;
 
 	/* XXX think again with misaligned I/O */
-	npages = count >> PAGE_CACHE_SHIFT;
+	npages = count >> PAGE_SHIFT;
 
 	if (rw == OBD_BRW_WRITE)
 		brw_flags = OBD_BRW_ASYNC;
@@ -1408,7 +1408,7 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
 
 	for (i = 0, pgp = pga, off = offset;
 	     i < npages;
-	     i++, pgp++, off += PAGE_CACHE_SIZE) {
+	     i++, pgp++, off += PAGE_SIZE) {
 
 		LASSERT(!pgp->pg);      /* for cleanup */
 
@@ -1418,7 +1418,7 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
 			goto out;
 
 		pages[i] = pgp->pg;
-		pgp->count = PAGE_CACHE_SIZE;
+		pgp->count = PAGE_SIZE;
 		pgp->off = off;
 		pgp->flag = brw_flags;
 
@@ -1473,8 +1473,8 @@ static int echo_client_prep_commit(const struct lu_env *env,
 	if (count <= 0 || (count & (~CFS_PAGE_MASK)) != 0)
 		return -EINVAL;
 
-	npages = batch >> PAGE_CACHE_SHIFT;
-	tot_pages = count >> PAGE_CACHE_SHIFT;
+	npages = batch >> PAGE_SHIFT;
+	tot_pages = count >> PAGE_SHIFT;
 
 	lnb = kcalloc(npages, sizeof(struct niobuf_local), GFP_NOFS);
 	rnb = kcalloc(npages, sizeof(struct niobuf_remote), GFP_NOFS);
@@ -1497,9 +1497,9 @@ static int echo_client_prep_commit(const struct lu_env *env,
 		if (tot_pages < npages)
 			npages = tot_pages;
 
-		for (i = 0; i < npages; i++, off += PAGE_CACHE_SIZE) {
+		for (i = 0; i < npages; i++, off += PAGE_SIZE) {
 			rnb[i].offset = off;
-			rnb[i].len = PAGE_CACHE_SIZE;
+			rnb[i].len = PAGE_SIZE;
 			rnb[i].flags = brw_flags;
 		}
 
@@ -1878,7 +1878,7 @@ static int __init obdecho_init(void)
 {
 	LCONSOLE_INFO("Echo OBD driver; http://www.lustre.org/\n");
 
-	LASSERT(PAGE_CACHE_SIZE % OBD_ECHO_BLOCK_SIZE == 0);
+	LASSERT(PAGE_SIZE % OBD_ECHO_BLOCK_SIZE == 0);
 
 	return echo_client_init();
 }
diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c
index 57c43c506ef2..a3358c39b2f1 100644
--- a/drivers/staging/lustre/lustre/osc/lproc_osc.c
+++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c
@@ -162,15 +162,15 @@ static ssize_t max_dirty_mb_store(struct kobject *kobj,
 	if (rc)
 		return rc;
 
-	pages_number *= 1 << (20 - PAGE_CACHE_SHIFT); /* MB -> pages */
+	pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */
 
 	if (pages_number <= 0 ||
-	    pages_number > OSC_MAX_DIRTY_MB_MAX << (20 - PAGE_CACHE_SHIFT) ||
+	    pages_number > OSC_MAX_DIRTY_MB_MAX << (20 - PAGE_SHIFT) ||
 	    pages_number > totalram_pages / 4) /* 1/4 of RAM */
 		return -ERANGE;
 
 	client_obd_list_lock(&cli->cl_loi_list_lock);
-	cli->cl_dirty_max = (u32)(pages_number << PAGE_CACHE_SHIFT);
+	cli->cl_dirty_max = (u32)(pages_number << PAGE_SHIFT);
 	osc_wake_cache_waiters(cli);
 	client_obd_list_unlock(&cli->cl_loi_list_lock);
 
@@ -182,7 +182,7 @@ static int osc_cached_mb_seq_show(struct seq_file *m, void *v)
 {
 	struct obd_device *dev = m->private;
 	struct client_obd *cli = &dev->u.cli;
-	int shift = 20 - PAGE_CACHE_SHIFT;
+	int shift = 20 - PAGE_SHIFT;
 
 	seq_printf(m,
 		   "used_mb: %d\n"
@@ -211,7 +211,7 @@ static ssize_t osc_cached_mb_seq_write(struct file *file,
 		return -EFAULT;
 	kernbuf[count] = 0;
 
-	mult = 1 << (20 - PAGE_CACHE_SHIFT);
+	mult = 1 << (20 - PAGE_SHIFT);
 	buffer += lprocfs_find_named_value(kernbuf, "used_mb:", &count) -
 		  kernbuf;
 	rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
@@ -569,12 +569,12 @@ static ssize_t max_pages_per_rpc_store(struct kobject *kobj,
 
 	/* if the max_pages is specified in bytes, convert to pages */
 	if (val >= ONE_MB_BRW_SIZE)
-		val >>= PAGE_CACHE_SHIFT;
+		val >>= PAGE_SHIFT;
 
-	chunk_mask = ~((1 << (cli->cl_chunkbits - PAGE_CACHE_SHIFT)) - 1);
+	chunk_mask = ~((1 << (cli->cl_chunkbits - PAGE_SHIFT)) - 1);
 	/* max_pages_per_rpc must be chunk aligned */
 	val = (val + ~chunk_mask) & chunk_mask;
-	if (val == 0 || val > ocd->ocd_brw_size >> PAGE_CACHE_SHIFT) {
+	if (val == 0 || val > ocd->ocd_brw_size >> PAGE_SHIFT) {
 		return -ERANGE;
 	}
 	client_obd_list_lock(&cli->cl_loi_list_lock);
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index 63363111380c..4e0a357ed43d 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -544,7 +544,7 @@ static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur,
 		return -ERANGE;
 
 	LASSERT(cur->oe_osclock == victim->oe_osclock);
-	ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_CACHE_SHIFT;
+	ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_SHIFT;
 	chunk_start = cur->oe_start >> ppc_bits;
 	chunk_end = cur->oe_end >> ppc_bits;
 	if (chunk_start != (victim->oe_end >> ppc_bits) + 1 &&
@@ -647,8 +647,8 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env,
 	lock = cl_lock_at_pgoff(env, osc2cl(obj), index, NULL, 1, 0);
 	LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
 
-	LASSERT(cli->cl_chunkbits >= PAGE_CACHE_SHIFT);
-	ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
+	LASSERT(cli->cl_chunkbits >= PAGE_SHIFT);
+	ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
 	chunk_mask = ~((1 << ppc_bits) - 1);
 	chunksize = 1 << cli->cl_chunkbits;
 	chunk = index >> ppc_bits;
@@ -871,8 +871,8 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
 
 	if (!sent) {
 		lost_grant = ext->oe_grants;
-	} else if (blocksize < PAGE_CACHE_SIZE &&
-		   last_count != PAGE_CACHE_SIZE) {
+	} else if (blocksize < PAGE_SIZE &&
+		   last_count != PAGE_SIZE) {
 		/* For short writes we shouldn't count parts of pages that
 		 * span a whole chunk on the OST side, or our accounting goes
 		 * wrong.  Should match the code in filter_grant_check.
@@ -884,7 +884,7 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
 		if (end)
 			count += blocksize - end;
 
-		lost_grant = PAGE_CACHE_SIZE - count;
+		lost_grant = PAGE_SIZE - count;
 	}
 	if (ext->oe_grants > 0)
 		osc_free_grant(cli, nr_pages, lost_grant);
@@ -967,7 +967,7 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
 	struct osc_async_page *oap;
 	struct osc_async_page *tmp;
 	int pages_in_chunk = 0;
-	int ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
+	int ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
 	__u64 trunc_chunk = trunc_index >> ppc_bits;
 	int grants = 0;
 	int nr_pages = 0;
@@ -1125,7 +1125,7 @@ static int osc_extent_make_ready(const struct lu_env *env,
 	if (!(last->oap_async_flags & ASYNC_COUNT_STABLE)) {
 		last->oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE);
 		LASSERT(last->oap_count > 0);
-		LASSERT(last->oap_page_off + last->oap_count <= PAGE_CACHE_SIZE);
+		LASSERT(last->oap_page_off + last->oap_count <= PAGE_SIZE);
 		last->oap_async_flags |= ASYNC_COUNT_STABLE;
 	}
 
@@ -1134,7 +1134,7 @@ static int osc_extent_make_ready(const struct lu_env *env,
 	 */
 	list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
 		if (!(oap->oap_async_flags & ASYNC_COUNT_STABLE)) {
-			oap->oap_count = PAGE_CACHE_SIZE - oap->oap_page_off;
+			oap->oap_count = PAGE_SIZE - oap->oap_page_off;
 			oap->oap_async_flags |= ASYNC_COUNT_STABLE;
 		}
 	}
@@ -1158,7 +1158,7 @@ static int osc_extent_expand(struct osc_extent *ext, pgoff_t index, int *grants)
 	struct osc_object *obj = ext->oe_obj;
 	struct client_obd *cli = osc_cli(obj);
 	struct osc_extent *next;
-	int ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
+	int ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
 	pgoff_t chunk = index >> ppc_bits;
 	pgoff_t end_chunk;
 	pgoff_t end_index;
@@ -1293,9 +1293,9 @@ static int osc_refresh_count(const struct lu_env *env,
 		return 0;
 	else if (cl_offset(obj, page->cp_index + 1) > kms)
 		/* catch sub-page write at end of file */
-		return kms % PAGE_CACHE_SIZE;
+		return kms % PAGE_SIZE;
 	else
-		return PAGE_CACHE_SIZE;
+		return PAGE_SIZE;
 }
 
 static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
@@ -1376,10 +1376,10 @@ static void osc_consume_write_grant(struct client_obd *cli,
 	assert_spin_locked(&cli->cl_loi_list_lock.lock);
 	LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT));
 	atomic_inc(&obd_dirty_pages);
-	cli->cl_dirty += PAGE_CACHE_SIZE;
+	cli->cl_dirty += PAGE_SIZE;
 	pga->flag |= OBD_BRW_FROM_GRANT;
 	CDEBUG(D_CACHE, "using %lu grant credits for brw %p page %p\n",
-	       PAGE_CACHE_SIZE, pga, pga->pg);
+	       PAGE_SIZE, pga, pga->pg);
 	osc_update_next_shrink(cli);
 }
 
@@ -1396,11 +1396,11 @@ static void osc_release_write_grant(struct client_obd *cli,
 
 	pga->flag &= ~OBD_BRW_FROM_GRANT;
 	atomic_dec(&obd_dirty_pages);
-	cli->cl_dirty -= PAGE_CACHE_SIZE;
+	cli->cl_dirty -= PAGE_SIZE;
 	if (pga->flag & OBD_BRW_NOCACHE) {
 		pga->flag &= ~OBD_BRW_NOCACHE;
 		atomic_dec(&obd_dirty_transit_pages);
-		cli->cl_dirty_transit -= PAGE_CACHE_SIZE;
+		cli->cl_dirty_transit -= PAGE_SIZE;
 	}
 }
 
@@ -1469,7 +1469,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
 
 	client_obd_list_lock(&cli->cl_loi_list_lock);
 	atomic_sub(nr_pages, &obd_dirty_pages);
-	cli->cl_dirty -= nr_pages << PAGE_CACHE_SHIFT;
+	cli->cl_dirty -= nr_pages << PAGE_SHIFT;
 	cli->cl_lost_grant += lost_grant;
 	if (cli->cl_avail_grant < grant && cli->cl_lost_grant >= grant) {
 		/* borrow some grant from truncate to avoid the case that
@@ -1512,11 +1512,11 @@ static int osc_enter_cache_try(struct client_obd *cli,
 	if (rc < 0)
 		return 0;
 
-	if (cli->cl_dirty + PAGE_CACHE_SIZE <= cli->cl_dirty_max &&
+	if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max &&
 	    atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
 		osc_consume_write_grant(cli, &oap->oap_brw_page);
 		if (transient) {
-			cli->cl_dirty_transit += PAGE_CACHE_SIZE;
+			cli->cl_dirty_transit += PAGE_SIZE;
 			atomic_inc(&obd_dirty_transit_pages);
 			oap->oap_brw_flags |= OBD_BRW_NOCACHE;
 		}
@@ -1562,7 +1562,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
 	 * of queued writes and create a discontiguous rpc stream
 	 */
 	if (OBD_FAIL_CHECK(OBD_FAIL_OSC_NO_GRANT) ||
-	    cli->cl_dirty_max < PAGE_CACHE_SIZE     ||
+	    cli->cl_dirty_max < PAGE_SIZE     ||
 	    cli->cl_ar.ar_force_sync || loi->loi_ar.ar_force_sync) {
 		rc = -EDQUOT;
 		goto out;
@@ -1632,7 +1632,7 @@ void osc_wake_cache_waiters(struct client_obd *cli)
 
 		ocw->ocw_rc = -EDQUOT;
 		/* we can't dirty more */
-		if ((cli->cl_dirty + PAGE_CACHE_SIZE > cli->cl_dirty_max) ||
+		if ((cli->cl_dirty + PAGE_SIZE > cli->cl_dirty_max) ||
 		    (atomic_read(&obd_dirty_pages) + 1 >
 		     obd_max_dirty_pages)) {
 			CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %d\n",
diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c
index d720b1a1c18c..ce9ddd515f64 100644
--- a/drivers/staging/lustre/lustre/osc/osc_page.c
+++ b/drivers/staging/lustre/lustre/osc/osc_page.c
@@ -410,7 +410,7 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj,
 	int result;
 
 	opg->ops_from = 0;
-	opg->ops_to = PAGE_CACHE_SIZE;
+	opg->ops_to = PAGE_SIZE;
 
 	result = osc_prep_async_page(osc, opg, vmpage,
 				     cl_offset(obj, page->cp_index));
@@ -487,9 +487,9 @@ static atomic_t osc_lru_waiters = ATOMIC_INIT(0);
 /* LRU pages are freed in batch mode. OSC should at least free this
  * number of pages to avoid running out of LRU budget, and..
  */
-static const int lru_shrink_min = 2 << (20 - PAGE_CACHE_SHIFT);  /* 2M */
+static const int lru_shrink_min = 2 << (20 - PAGE_SHIFT);  /* 2M */
 /* free this number at most otherwise it will take too long time to finish. */
-static const int lru_shrink_max = 32 << (20 - PAGE_CACHE_SHIFT); /* 32M */
+static const int lru_shrink_max = 32 << (20 - PAGE_SHIFT); /* 32M */
 
 /* Check if we can free LRU slots from this OSC. If there exists LRU waiters,
  * we should free slots aggressively. In this way, slots are freed in a steady
diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c
index 74805f1ae888..30526ebcad04 100644
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -826,7 +826,7 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
 		oa->o_undirty = 0;
 	} else {
 		long max_in_flight = (cli->cl_max_pages_per_rpc <<
-				      PAGE_CACHE_SHIFT)*
+				      PAGE_SHIFT)*
 				     (cli->cl_max_rpcs_in_flight + 1);
 		oa->o_undirty = max(cli->cl_dirty_max, max_in_flight);
 	}
@@ -909,11 +909,11 @@ static void osc_shrink_grant_local(struct client_obd *cli, struct obdo *oa)
 static int osc_shrink_grant(struct client_obd *cli)
 {
 	__u64 target_bytes = (cli->cl_max_rpcs_in_flight + 1) *
-			     (cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT);
+			     (cli->cl_max_pages_per_rpc << PAGE_SHIFT);
 
 	client_obd_list_lock(&cli->cl_loi_list_lock);
 	if (cli->cl_avail_grant <= target_bytes)
-		target_bytes = cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+		target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
 	client_obd_list_unlock(&cli->cl_loi_list_lock);
 
 	return osc_shrink_grant_to_target(cli, target_bytes);
@@ -929,8 +929,8 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
 	 * We don't want to shrink below a single RPC, as that will negatively
 	 * impact block allocation and long-term performance.
 	 */
-	if (target_bytes < cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT)
-		target_bytes = cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+	if (target_bytes < cli->cl_max_pages_per_rpc << PAGE_SHIFT)
+		target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
 
 	if (target_bytes >= cli->cl_avail_grant) {
 		client_obd_list_unlock(&cli->cl_loi_list_lock);
@@ -978,7 +978,7 @@ static int osc_should_shrink_grant(struct client_obd *client)
 		 * cli_brw_size(obd->u.cli.cl_import->imp_obd->obd_self_export)
 		 * Keep comment here so that it can be found by searching.
 		 */
-		int brw_size = client->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+		int brw_size = client->cl_max_pages_per_rpc << PAGE_SHIFT;
 
 		if (client->cl_import->imp_state == LUSTRE_IMP_FULL &&
 		    client->cl_avail_grant > brw_size)
@@ -1052,7 +1052,7 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
 	}
 
 	/* determine the appropriate chunk size used by osc_extent. */
-	cli->cl_chunkbits = max_t(int, PAGE_CACHE_SHIFT, ocd->ocd_blocksize);
+	cli->cl_chunkbits = max_t(int, PAGE_SHIFT, ocd->ocd_blocksize);
 	client_obd_list_unlock(&cli->cl_loi_list_lock);
 
 	CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld chunk bits: %d\n",
@@ -1317,9 +1317,9 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
 		LASSERT(pg->count > 0);
 		/* make sure there is no gap in the middle of page array */
 		LASSERTF(page_count == 1 ||
-			 (ergo(i == 0, poff + pg->count == PAGE_CACHE_SIZE) &&
+			 (ergo(i == 0, poff + pg->count == PAGE_SIZE) &&
 			  ergo(i > 0 && i < page_count - 1,
-			       poff == 0 && pg->count == PAGE_CACHE_SIZE)   &&
+			       poff == 0 && pg->count == PAGE_SIZE)   &&
 			  ergo(i == page_count - 1, poff == 0)),
 			 "i: %d/%d pg: %p off: %llu, count: %u\n",
 			 i, page_count, pg, pg->off, pg->count);
@@ -1877,7 +1877,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 						oap->oap_count;
 			else
 				LASSERT(oap->oap_page_off + oap->oap_count ==
-					PAGE_CACHE_SIZE);
+					PAGE_SIZE);
 		}
 	}
 
@@ -1993,7 +1993,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 		tmp->oap_request = ptlrpc_request_addref(req);
 
 	client_obd_list_lock(&cli->cl_loi_list_lock);
-	starting_offset >>= PAGE_CACHE_SHIFT;
+	starting_offset >>= PAGE_SHIFT;
 	if (cmd == OBD_BRW_READ) {
 		cli->cl_r_in_flight++;
 		lprocfs_oh_tally_log2(&cli->cl_read_page_hist, page_count);
@@ -2790,12 +2790,12 @@ out:
 						CFS_PAGE_MASK;
 
 		if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <=
-		    fm_key->fiemap.fm_start + PAGE_CACHE_SIZE - 1)
+		    fm_key->fiemap.fm_start + PAGE_SIZE - 1)
 			policy.l_extent.end = OBD_OBJECT_EOF;
 		else
 			policy.l_extent.end = (fm_key->fiemap.fm_start +
 				fm_key->fiemap.fm_length +
-				PAGE_CACHE_SIZE - 1) & CFS_PAGE_MASK;
+				PAGE_SIZE - 1) & CFS_PAGE_MASK;
 
 		ostid_build_res_name(&fm_key->oa.o_oi, &res_id);
 		mode = ldlm_lock_match(exp->exp_obd->obd_namespace,
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c
index 1b7673eec4d7..cf3ac8eee9ee 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/client.c
@@ -174,12 +174,12 @@ void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
 	LASSERT(page);
 	LASSERT(pageoffset >= 0);
 	LASSERT(len > 0);
-	LASSERT(pageoffset + len <= PAGE_CACHE_SIZE);
+	LASSERT(pageoffset + len <= PAGE_SIZE);
 
 	desc->bd_nob += len;
 
 	if (pin)
-		page_cache_get(page);
+		get_page(page);
 
 	ptlrpc_add_bulk_page(desc, page, pageoffset, len);
 }
@@ -206,7 +206,7 @@ void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc, int unpin)
 
 	if (unpin) {
 		for (i = 0; i < desc->bd_iov_count; i++)
-			page_cache_release(desc->bd_iov[i].kiov_page);
+			put_page(desc->bd_iov[i].kiov_page);
 	}
 
 	kfree(desc);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c
index b4eddf291269..cd94fed0ffdf 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/import.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/import.c
@@ -1092,7 +1092,7 @@ finish:
 
 		if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
 			cli->cl_max_pages_per_rpc =
-				min(ocd->ocd_brw_size >> PAGE_CACHE_SHIFT,
+				min(ocd->ocd_brw_size >> PAGE_SHIFT,
 				    cli->cl_max_pages_per_rpc);
 		else if (imp->imp_connect_op == MDS_CONNECT ||
 			 imp->imp_connect_op == MGS_CONNECT)
diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
index cee04efb6fb5..c95a91ce26c9 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
@@ -308,7 +308,7 @@ ptlrpc_lprocfs_req_history_max_seq_write(struct file *file,
 	 * hose a kernel by allowing the request history to grow too
 	 * far.
 	 */
-	bufpages = (svc->srv_buf_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	bufpages = (svc->srv_buf_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (val > totalram_pages / (2 * bufpages))
 		return -ERANGE;
 
@@ -1226,7 +1226,7 @@ int lprocfs_wr_import(struct file *file, const char __user *buffer,
 	const char prefix[] = "connection=";
 	const int prefix_len = sizeof(prefix) - 1;
 
-	if (count > PAGE_CACHE_SIZE - 1 || count <= prefix_len)
+	if (count > PAGE_SIZE - 1 || count <= prefix_len)
 		return -EINVAL;
 
 	kbuf = kzalloc(count + 1, GFP_NOFS);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c
index 5f27d9c2e4ef..30d9a164e52d 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/recover.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c
@@ -195,7 +195,7 @@ int ptlrpc_resend(struct obd_import *imp)
 	}
 
 	list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) {
-		LASSERTF((long)req > PAGE_CACHE_SIZE && req != LP_POISON,
+		LASSERTF((long)req > PAGE_SIZE && req != LP_POISON,
 			 "req %p bad\n", req);
 		LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
 		if (!ptlrpc_no_resend(req))
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
index 72d5b9bf5b29..d3872b8c9a6e 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
@@ -58,7 +58,7 @@
  * bulk encryption page pools	   *
  ****************************************/
 
-#define POINTERS_PER_PAGE	(PAGE_CACHE_SIZE / sizeof(void *))
+#define POINTERS_PER_PAGE	(PAGE_SIZE / sizeof(void *))
 #define PAGES_PER_POOL		(POINTERS_PER_PAGE)
 
 #define IDLE_IDX_MAX	 (100)
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 8cfce105c7ee..e21ca2bd6839 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1147,8 +1147,8 @@ static int ffs_sb_fill(struct super_block *sb, void *_data, int silent)
 	ffs->sb              = sb;
 	data->ffs_data       = NULL;
 	sb->s_fs_info        = ffs;
-	sb->s_blocksize      = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize      = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic          = FUNCTIONFS_MAGIC;
 	sb->s_op             = &ffs_sb_operations;
 	sb->s_time_gran      = 1;
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 5cdaf0150a4e..e64479f882a5 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1954,8 +1954,8 @@ gadgetfs_fill_super (struct super_block *sb, void *opts, int silent)
 		return -ENODEV;
 
 	/* superblock */
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = GADGETFS_MAGIC;
 	sb->s_op = &gadget_fs_operations;
 	sb->s_time_gran = 1;
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index dba51362d2e2..90901861bfc0 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -123,7 +123,7 @@ static int slave_configure(struct scsi_device *sdev)
 		unsigned int max_sectors = 64;
 
 		if (us->fflags & US_FL_MAX_SECTORS_MIN)
-			max_sectors = PAGE_CACHE_SIZE >> 9;
+			max_sectors = PAGE_SIZE >> 9;
 		if (queue_max_hw_sectors(sdev->request_queue) > max_sectors)
 			blk_queue_max_hw_sectors(sdev->request_queue,
 					      max_sectors);
diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c
index 71a923e53f93..3b1ca4411073 100644
--- a/drivers/video/fbdev/pvr2fb.c
+++ b/drivers/video/fbdev/pvr2fb.c
@@ -735,7 +735,7 @@ out:
 
 out_unmap:
 	for (i = 0; i < nr_pages; i++)
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 
 	kfree(pages);
 
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index e9e04376c52c..ac9225e86bf3 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -153,7 +153,7 @@ static void v9fs_invalidate_page(struct page *page, unsigned int offset,
 	 * If called with zero offset, we should release
 	 * the private state assocated with the page
 	 */
-	if (offset == 0 && length == PAGE_CACHE_SIZE)
+	if (offset == 0 && length == PAGE_SIZE)
 		v9fs_fscache_invalidate_page(page);
 }
 
@@ -166,10 +166,10 @@ static int v9fs_vfs_writepage_locked(struct page *page)
 	struct bio_vec bvec;
 	int err, len;
 
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
+	if (page->index == size >> PAGE_SHIFT)
+		len = size & ~PAGE_MASK;
 	else
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 
 	bvec.bv_page = page;
 	bvec.bv_offset = 0;
@@ -271,7 +271,7 @@ static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
 	int retval = 0;
 	struct page *page;
 	struct v9fs_inode *v9inode;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	struct inode *inode = mapping->host;
 
 
@@ -288,11 +288,11 @@ start:
 	if (PageUptodate(page))
 		goto out;
 
-	if (len == PAGE_CACHE_SIZE)
+	if (len == PAGE_SIZE)
 		goto out;
 
 	retval = v9fs_fid_readpage(v9inode->writeback_fid, page);
-	page_cache_release(page);
+	put_page(page);
 	if (!retval)
 		goto start;
 out:
@@ -313,7 +313,7 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping,
 		/*
 		 * zero out the rest of the area
 		 */
-		unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+		unsigned from = pos & (PAGE_SIZE - 1);
 
 		zero_user(page, from + copied, len - copied);
 		flush_dcache_page(page);
@@ -331,7 +331,7 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping,
 	}
 	set_page_dirty(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return copied;
 }
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index eadc894faea2..b84c291ba1eb 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -421,8 +421,8 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		struct inode *inode = file_inode(file);
 		loff_t i_size;
 		unsigned long pg_start, pg_end;
-		pg_start = origin >> PAGE_CACHE_SHIFT;
-		pg_end = (origin + retval - 1) >> PAGE_CACHE_SHIFT;
+		pg_start = origin >> PAGE_SHIFT;
+		pg_end = (origin + retval - 1) >> PAGE_SHIFT;
 		if (inode->i_mapping && inode->i_mapping->nrpages)
 			invalidate_inode_pages2_range(inode->i_mapping,
 						      pg_start, pg_end);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index bf495cedec26..de3ed8629196 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -87,7 +87,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
 		sb->s_op = &v9fs_super_ops;
 	sb->s_bdi = &v9ses->bdi;
 	if (v9ses->cache)
-		sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE;
+		sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE;
 
 	sb->s_flags |= MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
 	if (!v9ses->cache)
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 22fc7c802d69..0cde550050e8 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -510,9 +510,9 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
 
 	pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino,
 		 page->index, to);
-	BUG_ON(to > PAGE_CACHE_SIZE);
+	BUG_ON(to > PAGE_SIZE);
 	bsize = AFFS_SB(sb)->s_data_blksize;
-	tmp = page->index << PAGE_CACHE_SHIFT;
+	tmp = page->index << PAGE_SHIFT;
 	bidx = tmp / bsize;
 	boff = tmp % bsize;
 
@@ -613,10 +613,10 @@ affs_readpage_ofs(struct file *file, struct page *page)
 	int err;
 
 	pr_debug("%s(%lu, %ld)\n", __func__, inode->i_ino, page->index);
-	to = PAGE_CACHE_SIZE;
-	if (((page->index + 1) << PAGE_CACHE_SHIFT) > inode->i_size) {
-		to = inode->i_size & ~PAGE_CACHE_MASK;
-		memset(page_address(page) + to, 0, PAGE_CACHE_SIZE - to);
+	to = PAGE_SIZE;
+	if (((page->index + 1) << PAGE_SHIFT) > inode->i_size) {
+		to = inode->i_size & ~PAGE_MASK;
+		memset(page_address(page) + to, 0, PAGE_SIZE - to);
 	}
 
 	err = affs_do_readpage_ofs(page, to);
@@ -646,7 +646,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
 			return err;
 	}
 
-	index = pos >> PAGE_CACHE_SHIFT;
+	index = pos >> PAGE_SHIFT;
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
@@ -656,10 +656,10 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
 		return 0;
 
 	/* XXX: inefficient but safe in the face of short writes */
-	err = affs_do_readpage_ofs(page, PAGE_CACHE_SIZE);
+	err = affs_do_readpage_ofs(page, PAGE_SIZE);
 	if (err) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return err;
 }
@@ -677,7 +677,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
 	u32 tmp;
 	int written;
 
-	from = pos & (PAGE_CACHE_SIZE - 1);
+	from = pos & (PAGE_SIZE - 1);
 	to = pos + len;
 	/*
 	 * XXX: not sure if this can handle short copies (len < copied), but
@@ -692,7 +692,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
 
 	bh = NULL;
 	written = 0;
-	tmp = (page->index << PAGE_CACHE_SHIFT) + from;
+	tmp = (page->index << PAGE_SHIFT) + from;
 	bidx = tmp / bsize;
 	boff = tmp % bsize;
 	if (boff) {
@@ -788,13 +788,13 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
 
 done:
 	affs_brelse(bh);
-	tmp = (page->index << PAGE_CACHE_SHIFT) + from;
+	tmp = (page->index << PAGE_SHIFT) + from;
 	if (tmp > inode->i_size)
 		inode->i_size = AFFS_I(inode)->mmu_private = tmp;
 
 err_first_bh:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return written;
 
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index e10e17788f06..5fda2bc53cd7 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -181,7 +181,7 @@ error:
 static inline void afs_dir_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 /*
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 999bc3caec92..6344aee4ac4b 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -164,7 +164,7 @@ int afs_page_filler(void *data, struct page *page)
 		_debug("cache said ENOBUFS");
 	default:
 	go_on:
-		offset = page->index << PAGE_CACHE_SHIFT;
+		offset = page->index << PAGE_SHIFT;
 		len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE);
 
 		/* read the contents of the file from the server into the
@@ -319,7 +319,7 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
 	BUG_ON(!PageLocked(page));
 
 	/* we clean up only if the entire page is being invalidated */
-	if (offset == 0 && length == PAGE_CACHE_SIZE) {
+	if (offset == 0 && length == PAGE_SIZE) {
 #ifdef CONFIG_AFS_FSCACHE
 		if (PageFsCache(page)) {
 			struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index ccd0b212e82a..81dd075356b9 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -93,7 +93,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
 
 	kunmap(page);
 out_free:
-	page_cache_release(page);
+	put_page(page);
 out:
 	_leave(" = %d", ret);
 	return ret;
@@ -189,7 +189,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 		buf = kmap_atomic(page);
 		memcpy(devname, buf, size);
 		kunmap_atomic(buf);
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 	}
 
@@ -211,7 +211,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 	return mnt;
 
 error:
-	page_cache_release(page);
+	put_page(page);
 error_no_page:
 	free_page((unsigned long) options);
 error_no_options:
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 81afefe7d8a6..fbdb022b75a2 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -315,8 +315,8 @@ static int afs_fill_super(struct super_block *sb,
 	_enter("");
 
 	/* fill in the superblock */
-	sb->s_blocksize		= PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits	= PAGE_CACHE_SHIFT;
+	sb->s_blocksize		= PAGE_SIZE;
+	sb->s_blocksize_bits	= PAGE_SHIFT;
 	sb->s_magic		= AFS_FS_MAGIC;
 	sb->s_op		= &afs_super_ops;
 	sb->s_bdi		= &as->volume->bdi;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index dfef94f70667..65de439bdc4f 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -93,10 +93,10 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
 	_enter(",,%llu", (unsigned long long)pos);
 
 	i_size = i_size_read(&vnode->vfs_inode);
-	if (pos + PAGE_CACHE_SIZE > i_size)
+	if (pos + PAGE_SIZE > i_size)
 		len = i_size - pos;
 	else
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 
 	ret = afs_vnode_fetch_data(vnode, key, pos, len, page);
 	if (ret < 0) {
@@ -123,9 +123,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 	struct page *page;
 	struct key *key = file->private_data;
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from = pos & (PAGE_SIZE - 1);
 	unsigned to = from + len;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	int ret;
 
 	_enter("{%x:%u},{%lx},%u,%u",
@@ -151,8 +151,8 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 	*pagep = page;
 	/* page won't leak in error case: it eventually gets cleaned off LRU */
 
-	if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) {
-		ret = afs_fill_page(vnode, key, index << PAGE_CACHE_SHIFT, page);
+	if (!PageUptodate(page) && len != PAGE_SIZE) {
+		ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page);
 		if (ret < 0) {
 			kfree(candidate);
 			_leave(" = %d [prep]", ret);
@@ -266,7 +266,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 	if (PageDirty(page))
 		_debug("dirtied");
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return copied;
 }
@@ -480,7 +480,7 @@ static int afs_writepages_region(struct address_space *mapping,
 
 		if (page->index > end) {
 			*_next = index;
-			page_cache_release(page);
+			put_page(page);
 			_leave(" = 0 [%lx]", *_next);
 			return 0;
 		}
@@ -494,7 +494,7 @@ static int afs_writepages_region(struct address_space *mapping,
 
 		if (page->mapping != mapping) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			continue;
 		}
 
@@ -515,7 +515,7 @@ static int afs_writepages_region(struct address_space *mapping,
 
 		ret = afs_write_back_from_locked_page(wb, page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		if (ret < 0) {
 			_leave(" = %d", ret);
 			return ret;
@@ -551,13 +551,13 @@ int afs_writepages(struct address_space *mapping,
 						    &next);
 		mapping->writeback_index = next;
 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
-		end = (pgoff_t)(LLONG_MAX >> PAGE_CACHE_SHIFT);
+		end = (pgoff_t)(LLONG_MAX >> PAGE_SHIFT);
 		ret = afs_writepages_region(mapping, wbc, 0, end, &next);
 		if (wbc->nr_to_write > 0)
 			mapping->writeback_index = next;
 	} else {
-		start = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		start = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		ret = afs_writepages_region(mapping, wbc, start, end, &next);
 	}
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7d914c67a9d0..81381cc0dd17 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2292,7 +2292,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 				void *kaddr = kmap(page);
 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
 				kunmap(page);
-				page_cache_release(page);
+				put_page(page);
 			} else
 				stop = !dump_skip(cprm, PAGE_SIZE);
 			if (stop)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index b1adb92e69de..083ea2bc60ab 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1533,7 +1533,7 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
 				void *kaddr = kmap(page);
 				res = dump_emit(cprm, kaddr, PAGE_SIZE);
 				kunmap(page);
-				page_cache_release(page);
+				put_page(page);
 			} else {
 				res = dump_skip(cprm, PAGE_SIZE);
 			}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3172c4e2f502..20a2c02b77c4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -331,7 +331,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
 	ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return ret;
 }
@@ -1149,7 +1149,7 @@ void bd_set_size(struct block_device *bdev, loff_t size)
 	inode_lock(bdev->bd_inode);
 	i_size_write(bdev->bd_inode, size);
 	inode_unlock(bdev->bd_inode);
-	while (bsize < PAGE_CACHE_SIZE) {
+	while (bsize < PAGE_SIZE) {
 		if (size & bsize)
 			break;
 		bsize <<= 1;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index e34a71b3e225..866789ede4cb 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -757,7 +757,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
 			BUG_ON(NULL == l);
 
 			ret = btrfsic_read_block(state, &tmp_next_block_ctx);
-			if (ret < (int)PAGE_CACHE_SIZE) {
+			if (ret < (int)PAGE_SIZE) {
 				printk(KERN_INFO
 				       "btrfsic: read @logical %llu failed!\n",
 				       tmp_next_block_ctx.start);
@@ -1231,15 +1231,15 @@ static void btrfsic_read_from_block_data(
 	size_t offset_in_page;
 	char *kaddr;
 	char *dst = (char *)dstv;
-	size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = block_ctx->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + offset) >> PAGE_SHIFT;
 
 	WARN_ON(offset + len > block_ctx->len);
-	offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1);
+	offset_in_page = (start_offset + offset) & (PAGE_SIZE - 1);
 
 	while (len > 0) {
-		cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
-		BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_CACHE_SIZE));
+		cur = min(len, ((size_t)PAGE_SIZE - offset_in_page));
+		BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_SIZE));
 		kaddr = block_ctx->datav[i];
 		memcpy(dst, kaddr + offset_in_page, cur);
 
@@ -1605,8 +1605,8 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
 
 		BUG_ON(!block_ctx->datav);
 		BUG_ON(!block_ctx->pagev);
-		num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
-			    PAGE_CACHE_SHIFT;
+		num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >>
+			    PAGE_SHIFT;
 		while (num_pages > 0) {
 			num_pages--;
 			if (block_ctx->datav[num_pages]) {
@@ -1637,15 +1637,15 @@ static int btrfsic_read_block(struct btrfsic_state *state,
 	BUG_ON(block_ctx->datav);
 	BUG_ON(block_ctx->pagev);
 	BUG_ON(block_ctx->mem_to_free);
-	if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
+	if (block_ctx->dev_bytenr & ((u64)PAGE_SIZE - 1)) {
 		printk(KERN_INFO
 		       "btrfsic: read_block() with unaligned bytenr %llu\n",
 		       block_ctx->dev_bytenr);
 		return -1;
 	}
 
-	num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
-		    PAGE_CACHE_SHIFT;
+	num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >>
+		    PAGE_SHIFT;
 	block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
 					  sizeof(*block_ctx->pagev)) *
 					 num_pages, GFP_NOFS);
@@ -1676,8 +1676,8 @@ static int btrfsic_read_block(struct btrfsic_state *state,
 
 		for (j = i; j < num_pages; j++) {
 			ret = bio_add_page(bio, block_ctx->pagev[j],
-					   PAGE_CACHE_SIZE, 0);
-			if (PAGE_CACHE_SIZE != ret)
+					   PAGE_SIZE, 0);
+			if (PAGE_SIZE != ret)
 				break;
 		}
 		if (j == i) {
@@ -1693,7 +1693,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
 			return -1;
 		}
 		bio_put(bio);
-		dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
+		dev_bytenr += (j - i) * PAGE_SIZE;
 		i = j;
 	}
 	for (i = 0; i < num_pages; i++) {
@@ -1769,9 +1769,9 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
 	u32 crc = ~(u32)0;
 	unsigned int i;
 
-	if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
+	if (num_pages * PAGE_SIZE < state->metablock_size)
 		return 1; /* not metadata */
-	num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
+	num_pages = state->metablock_size >> PAGE_SHIFT;
 	h = (struct btrfs_header *)datav[0];
 
 	if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
@@ -1779,8 +1779,8 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
 
 	for (i = 0; i < num_pages; i++) {
 		u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
-		size_t sublen = i ? PAGE_CACHE_SIZE :
-				    (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
+		size_t sublen = i ? PAGE_SIZE :
+				    (PAGE_SIZE - BTRFS_CSUM_SIZE);
 
 		crc = btrfs_crc32c(crc, data, sublen);
 	}
@@ -1826,14 +1826,14 @@ again:
 		if (block->is_superblock) {
 			bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
 						    mapped_datav[0]);
-			if (num_pages * PAGE_CACHE_SIZE <
+			if (num_pages * PAGE_SIZE <
 			    BTRFS_SUPER_INFO_SIZE) {
 				printk(KERN_INFO
 				       "btrfsic: cannot work with too short bios!\n");
 				return;
 			}
 			is_metadata = 1;
-			BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
+			BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_SIZE - 1));
 			processed_len = BTRFS_SUPER_INFO_SIZE;
 			if (state->print_mask &
 			    BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
@@ -1844,7 +1844,7 @@ again:
 		}
 		if (is_metadata) {
 			if (!block->is_superblock) {
-				if (num_pages * PAGE_CACHE_SIZE <
+				if (num_pages * PAGE_SIZE <
 				    state->metablock_size) {
 					printk(KERN_INFO
 					       "btrfsic: cannot work with too short bios!\n");
@@ -1880,7 +1880,7 @@ again:
 			}
 			block->logical_bytenr = bytenr;
 		} else {
-			if (num_pages * PAGE_CACHE_SIZE <
+			if (num_pages * PAGE_SIZE <
 			    state->datablock_size) {
 				printk(KERN_INFO
 				       "btrfsic: cannot work with too short bios!\n");
@@ -2013,7 +2013,7 @@ again:
 			block->logical_bytenr = bytenr;
 			block->is_metadata = 1;
 			if (block->is_superblock) {
-				BUG_ON(PAGE_CACHE_SIZE !=
+				BUG_ON(PAGE_SIZE !=
 				       BTRFS_SUPER_INFO_SIZE);
 				ret = btrfsic_process_written_superblock(
 						state,
@@ -2172,8 +2172,8 @@ again:
 continue_loop:
 	BUG_ON(!processed_len);
 	dev_bytenr += processed_len;
-	mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
-	num_pages -= processed_len >> PAGE_CACHE_SHIFT;
+	mapped_datav += processed_len >> PAGE_SHIFT;
+	num_pages -= processed_len >> PAGE_SHIFT;
 	goto again;
 }
 
@@ -2954,7 +2954,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
 			goto leave;
 		cur_bytenr = dev_bytenr;
 		for (i = 0; i < bio->bi_vcnt; i++) {
-			BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
+			BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_SIZE);
 			mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
 			if (!mapped_datav[i]) {
 				while (i > 0) {
@@ -3037,16 +3037,16 @@ int btrfsic_mount(struct btrfs_root *root,
 	struct list_head *dev_head = &fs_devices->devices;
 	struct btrfs_device *device;
 
-	if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
+	if (root->nodesize & ((u64)PAGE_SIZE - 1)) {
 		printk(KERN_INFO
 		       "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
-		       root->nodesize, PAGE_CACHE_SIZE);
+		       root->nodesize, PAGE_SIZE);
 		return -1;
 	}
-	if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
+	if (root->sectorsize & ((u64)PAGE_SIZE - 1)) {
 		printk(KERN_INFO
 		       "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
-		       root->sectorsize, PAGE_CACHE_SIZE);
+		       root->sectorsize, PAGE_SIZE);
 		return -1;
 	}
 	state = kzalloc(sizeof(*state), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 3346cd8f9910..ff61a41ac90b 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -119,7 +119,7 @@ static int check_compressed_csum(struct inode *inode,
 		csum = ~(u32)0;
 
 		kaddr = kmap_atomic(page);
-		csum = btrfs_csum_data(kaddr, csum, PAGE_CACHE_SIZE);
+		csum = btrfs_csum_data(kaddr, csum, PAGE_SIZE);
 		btrfs_csum_final(csum, (char *)&csum);
 		kunmap_atomic(kaddr);
 
@@ -190,7 +190,7 @@ csum_failed:
 	for (index = 0; index < cb->nr_pages; index++) {
 		page = cb->compressed_pages[index];
 		page->mapping = NULL;
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	/* do io completion on the original bio */
@@ -224,8 +224,8 @@ out:
 static noinline void end_compressed_writeback(struct inode *inode,
 					      const struct compressed_bio *cb)
 {
-	unsigned long index = cb->start >> PAGE_CACHE_SHIFT;
-	unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_CACHE_SHIFT;
+	unsigned long index = cb->start >> PAGE_SHIFT;
+	unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
 	struct page *pages[16];
 	unsigned long nr_pages = end_index - index + 1;
 	int i;
@@ -247,7 +247,7 @@ static noinline void end_compressed_writeback(struct inode *inode,
 			if (cb->errors)
 				SetPageError(pages[i]);
 			end_page_writeback(pages[i]);
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 		}
 		nr_pages -= ret;
 		index += ret;
@@ -304,7 +304,7 @@ static void end_compressed_bio_write(struct bio *bio)
 	for (index = 0; index < cb->nr_pages; index++) {
 		page = cb->compressed_pages[index];
 		page->mapping = NULL;
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	/* finally free the cb struct */
@@ -341,7 +341,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	int ret;
 	int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
-	WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
+	WARN_ON(start & ((u64)PAGE_SIZE - 1));
 	cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
 	if (!cb)
 		return -ENOMEM;
@@ -374,14 +374,14 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 		page->mapping = inode->i_mapping;
 		if (bio->bi_iter.bi_size)
 			ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
-							   PAGE_CACHE_SIZE,
+							   PAGE_SIZE,
 							   bio, 0);
 		else
 			ret = 0;
 
 		page->mapping = NULL;
-		if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) <
-		    PAGE_CACHE_SIZE) {
+		if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) <
+		    PAGE_SIZE) {
 			bio_get(bio);
 
 			/*
@@ -410,15 +410,15 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 			BUG_ON(!bio);
 			bio->bi_private = cb;
 			bio->bi_end_io = end_compressed_bio_write;
-			bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
+			bio_add_page(bio, page, PAGE_SIZE, 0);
 		}
-		if (bytes_left < PAGE_CACHE_SIZE) {
+		if (bytes_left < PAGE_SIZE) {
 			btrfs_info(BTRFS_I(inode)->root->fs_info,
 					"bytes left %lu compress len %lu nr %lu",
 			       bytes_left, cb->compressed_len, cb->nr_pages);
 		}
-		bytes_left -= PAGE_CACHE_SIZE;
-		first_byte += PAGE_CACHE_SIZE;
+		bytes_left -= PAGE_SIZE;
+		first_byte += PAGE_SIZE;
 		cond_resched();
 	}
 	bio_get(bio);
@@ -457,17 +457,17 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 	int misses = 0;
 
 	page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page;
-	last_offset = (page_offset(page) + PAGE_CACHE_SIZE);
+	last_offset = (page_offset(page) + PAGE_SIZE);
 	em_tree = &BTRFS_I(inode)->extent_tree;
 	tree = &BTRFS_I(inode)->io_tree;
 
 	if (isize == 0)
 		return 0;
 
-	end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+	end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
 
 	while (last_offset < compressed_end) {
-		pg_index = last_offset >> PAGE_CACHE_SHIFT;
+		pg_index = last_offset >> PAGE_SHIFT;
 
 		if (pg_index > end_index)
 			break;
@@ -488,11 +488,11 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 			break;
 
 		if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
-			page_cache_release(page);
+			put_page(page);
 			goto next;
 		}
 
-		end = last_offset + PAGE_CACHE_SIZE - 1;
+		end = last_offset + PAGE_SIZE - 1;
 		/*
 		 * at this point, we have a locked page in the page cache
 		 * for these bytes in the file.  But, we have to make
@@ -502,27 +502,27 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 		lock_extent(tree, last_offset, end);
 		read_lock(&em_tree->lock);
 		em = lookup_extent_mapping(em_tree, last_offset,
-					   PAGE_CACHE_SIZE);
+					   PAGE_SIZE);
 		read_unlock(&em_tree->lock);
 
 		if (!em || last_offset < em->start ||
-		    (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
+		    (last_offset + PAGE_SIZE > extent_map_end(em)) ||
 		    (em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
 			free_extent_map(em);
 			unlock_extent(tree, last_offset, end);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			break;
 		}
 		free_extent_map(em);
 
 		if (page->index == end_index) {
 			char *userpage;
-			size_t zero_offset = isize & (PAGE_CACHE_SIZE - 1);
+			size_t zero_offset = isize & (PAGE_SIZE - 1);
 
 			if (zero_offset) {
 				int zeros;
-				zeros = PAGE_CACHE_SIZE - zero_offset;
+				zeros = PAGE_SIZE - zero_offset;
 				userpage = kmap_atomic(page);
 				memset(userpage + zero_offset, 0, zeros);
 				flush_dcache_page(page);
@@ -531,19 +531,19 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 		}
 
 		ret = bio_add_page(cb->orig_bio, page,
-				   PAGE_CACHE_SIZE, 0);
+				   PAGE_SIZE, 0);
 
-		if (ret == PAGE_CACHE_SIZE) {
+		if (ret == PAGE_SIZE) {
 			nr_pages++;
-			page_cache_release(page);
+			put_page(page);
 		} else {
 			unlock_extent(tree, last_offset, end);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			break;
 		}
 next:
-		last_offset += PAGE_CACHE_SIZE;
+		last_offset += PAGE_SIZE;
 	}
 	return 0;
 }
@@ -567,7 +567,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	struct extent_map_tree *em_tree;
 	struct compressed_bio *cb;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-	unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
+	unsigned long uncompressed_len = bio->bi_vcnt * PAGE_SIZE;
 	unsigned long compressed_len;
 	unsigned long nr_pages;
 	unsigned long pg_index;
@@ -589,7 +589,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	read_lock(&em_tree->lock);
 	em = lookup_extent_mapping(em_tree,
 				   page_offset(bio->bi_io_vec->bv_page),
-				   PAGE_CACHE_SIZE);
+				   PAGE_SIZE);
 	read_unlock(&em_tree->lock);
 	if (!em)
 		return -EIO;
@@ -617,7 +617,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	cb->compress_type = extent_compress_type(bio_flags);
 	cb->orig_bio = bio;
 
-	nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE);
+	nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
 	cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
 				       GFP_NOFS);
 	if (!cb->compressed_pages)
@@ -640,7 +640,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	add_ra_bio_pages(inode, em_start + em_len, cb);
 
 	/* include any pages we added in add_ra-bio_pages */
-	uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
+	uncompressed_len = bio->bi_vcnt * PAGE_SIZE;
 	cb->len = uncompressed_len;
 
 	comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
@@ -653,18 +653,18 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	for (pg_index = 0; pg_index < nr_pages; pg_index++) {
 		page = cb->compressed_pages[pg_index];
 		page->mapping = inode->i_mapping;
-		page->index = em_start >> PAGE_CACHE_SHIFT;
+		page->index = em_start >> PAGE_SHIFT;
 
 		if (comp_bio->bi_iter.bi_size)
 			ret = tree->ops->merge_bio_hook(READ, page, 0,
-							PAGE_CACHE_SIZE,
+							PAGE_SIZE,
 							comp_bio, 0);
 		else
 			ret = 0;
 
 		page->mapping = NULL;
-		if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) <
-		    PAGE_CACHE_SIZE) {
+		if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
+		    PAGE_SIZE) {
 			bio_get(comp_bio);
 
 			ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio,
@@ -702,9 +702,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			comp_bio->bi_private = cb;
 			comp_bio->bi_end_io = end_compressed_bio_read;
 
-			bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0);
+			bio_add_page(comp_bio, page, PAGE_SIZE, 0);
 		}
-		cur_disk_byte += PAGE_CACHE_SIZE;
+		cur_disk_byte += PAGE_SIZE;
 	}
 	bio_get(comp_bio);
 
@@ -1013,8 +1013,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
 
 	/* copy bytes from the working buffer into the pages */
 	while (working_bytes > 0) {
-		bytes = min(PAGE_CACHE_SIZE - *pg_offset,
-			    PAGE_CACHE_SIZE - buf_offset);
+		bytes = min(PAGE_SIZE - *pg_offset,
+			    PAGE_SIZE - buf_offset);
 		bytes = min(bytes, working_bytes);
 		kaddr = kmap_atomic(page_out);
 		memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
@@ -1027,7 +1027,7 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
 		current_buf_start += bytes;
 
 		/* check if we need to pick another page */
-		if (*pg_offset == PAGE_CACHE_SIZE) {
+		if (*pg_offset == PAGE_SIZE) {
 			(*pg_index)++;
 			if (*pg_index >= vcnt)
 				return 0;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4b02591b0301..942af3d2885f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1055,7 +1055,7 @@ static void btree_invalidatepage(struct page *page, unsigned int offset,
 			   (unsigned long long)page_offset(page));
 		ClearPagePrivate(page);
 		set_page_private(page, 0);
-		page_cache_release(page);
+		put_page(page);
 	}
 }
 
@@ -1757,7 +1757,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
 	if (err)
 		return err;
 
-	bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
+	bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
 	bdi->congested_fn	= btrfs_congested_fn;
 	bdi->congested_data	= info;
 	bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
@@ -2537,7 +2537,7 @@ int open_ctree(struct super_block *sb,
 		err = ret;
 		goto fail_bdi;
 	}
-	fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
+	fs_info->dirty_metadata_batch = PAGE_SIZE *
 					(1 + ilog2(nr_cpu_ids));
 
 	ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL);
@@ -2782,7 +2782,7 @@ int open_ctree(struct super_block *sb,
 	 * flag our filesystem as having big metadata blocks if
 	 * they are bigger than the page size
 	 */
-	if (btrfs_super_nodesize(disk_super) > PAGE_CACHE_SIZE) {
+	if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
 		if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
 			printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n");
 		features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
@@ -2832,7 +2832,7 @@ int open_ctree(struct super_block *sb,
 
 	fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
 	fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
-				    SZ_4M / PAGE_CACHE_SIZE);
+				    SZ_4M / PAGE_SIZE);
 
 	tree_root->nodesize = nodesize;
 	tree_root->sectorsize = sectorsize;
@@ -4071,9 +4071,9 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 		ret = -EINVAL;
 	}
 	/* Only PAGE SIZE is supported yet */
-	if (sectorsize != PAGE_CACHE_SIZE) {
+	if (sectorsize != PAGE_SIZE) {
 		printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n",
-				sectorsize, PAGE_CACHE_SIZE);
+				sectorsize, PAGE_SIZE);
 		ret = -EINVAL;
 	}
 	if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 53e12977bfd0..ce114ba9780a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3452,7 +3452,7 @@ again:
 		num_pages = 1;
 
 	num_pages *= 16;
-	num_pages *= PAGE_CACHE_SIZE;
+	num_pages *= PAGE_SIZE;
 
 	ret = btrfs_check_data_free_space(inode, 0, num_pages);
 	if (ret)
@@ -4639,7 +4639,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 	loops = 0;
 	while (delalloc_bytes && loops < 3) {
 		max_reclaim = min(delalloc_bytes, to_reclaim);
-		nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
+		nr_pages = max_reclaim >> PAGE_SHIFT;
 		btrfs_writeback_inodes_sb_nr(root, nr_pages, items);
 		/*
 		 * We need to wait for the async pages to actually start before
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 76a0c8597d98..93d696d248d9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1363,23 +1363,23 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
 
 void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
 {
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
-	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+	unsigned long index = start >> PAGE_SHIFT;
+	unsigned long end_index = end >> PAGE_SHIFT;
 	struct page *page;
 
 	while (index <= end_index) {
 		page = find_get_page(inode->i_mapping, index);
 		BUG_ON(!page); /* Pages should be in the extent_io_tree */
 		clear_page_dirty_for_io(page);
-		page_cache_release(page);
+		put_page(page);
 		index++;
 	}
 }
 
 void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
 {
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
-	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+	unsigned long index = start >> PAGE_SHIFT;
+	unsigned long end_index = end >> PAGE_SHIFT;
 	struct page *page;
 
 	while (index <= end_index) {
@@ -1387,7 +1387,7 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
 		BUG_ON(!page); /* Pages should be in the extent_io_tree */
 		__set_page_dirty_nobuffers(page);
 		account_page_redirty(page);
-		page_cache_release(page);
+		put_page(page);
 		index++;
 	}
 }
@@ -1397,15 +1397,15 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
  */
 static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
 {
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
-	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+	unsigned long index = start >> PAGE_SHIFT;
+	unsigned long end_index = end >> PAGE_SHIFT;
 	struct page *page;
 
 	while (index <= end_index) {
 		page = find_get_page(tree->mapping, index);
 		BUG_ON(!page); /* Pages should be in the extent_io_tree */
 		set_page_writeback(page);
-		page_cache_release(page);
+		put_page(page);
 		index++;
 	}
 }
@@ -1556,8 +1556,8 @@ static noinline void __unlock_for_delalloc(struct inode *inode,
 {
 	int ret;
 	struct page *pages[16];
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
-	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+	unsigned long index = start >> PAGE_SHIFT;
+	unsigned long end_index = end >> PAGE_SHIFT;
 	unsigned long nr_pages = end_index - index + 1;
 	int i;
 
@@ -1571,7 +1571,7 @@ static noinline void __unlock_for_delalloc(struct inode *inode,
 		for (i = 0; i < ret; i++) {
 			if (pages[i] != locked_page)
 				unlock_page(pages[i]);
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 		}
 		nr_pages -= ret;
 		index += ret;
@@ -1584,9 +1584,9 @@ static noinline int lock_delalloc_pages(struct inode *inode,
 					u64 delalloc_start,
 					u64 delalloc_end)
 {
-	unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
+	unsigned long index = delalloc_start >> PAGE_SHIFT;
 	unsigned long start_index = index;
-	unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
+	unsigned long end_index = delalloc_end >> PAGE_SHIFT;
 	unsigned long pages_locked = 0;
 	struct page *pages[16];
 	unsigned long nrpages;
@@ -1619,11 +1619,11 @@ static noinline int lock_delalloc_pages(struct inode *inode,
 				    pages[i]->mapping != inode->i_mapping) {
 					ret = -EAGAIN;
 					unlock_page(pages[i]);
-					page_cache_release(pages[i]);
+					put_page(pages[i]);
 					goto done;
 				}
 			}
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 			pages_locked++;
 		}
 		nrpages -= ret;
@@ -1636,7 +1636,7 @@ done:
 		__unlock_for_delalloc(inode, locked_page,
 			      delalloc_start,
 			      ((u64)(start_index + pages_locked - 1)) <<
-			      PAGE_CACHE_SHIFT);
+			      PAGE_SHIFT);
 	}
 	return ret;
 }
@@ -1696,7 +1696,7 @@ again:
 		free_extent_state(cached_state);
 		cached_state = NULL;
 		if (!loops) {
-			max_bytes = PAGE_CACHE_SIZE;
+			max_bytes = PAGE_SIZE;
 			loops = 1;
 			goto again;
 		} else {
@@ -1735,8 +1735,8 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 	int ret;
 	struct page *pages[16];
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
-	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+	unsigned long index = start >> PAGE_SHIFT;
+	unsigned long end_index = end >> PAGE_SHIFT;
 	unsigned long nr_pages = end_index - index + 1;
 	int i;
 
@@ -1757,7 +1757,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
 				SetPagePrivate2(pages[i]);
 
 			if (pages[i] == locked_page) {
-				page_cache_release(pages[i]);
+				put_page(pages[i]);
 				continue;
 			}
 			if (page_ops & PAGE_CLEAR_DIRTY)
@@ -1770,7 +1770,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
 				end_page_writeback(pages[i]);
 			if (page_ops & PAGE_UNLOCK)
 				unlock_page(pages[i]);
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 		}
 		nr_pages -= ret;
 		index += ret;
@@ -1961,7 +1961,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
 static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
 {
 	u64 start = page_offset(page);
-	u64 end = start + PAGE_CACHE_SIZE - 1;
+	u64 end = start + PAGE_SIZE - 1;
 	if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
 		SetPageUptodate(page);
 }
@@ -2071,11 +2071,11 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
 		struct page *p = eb->pages[i];
 
 		ret = repair_io_failure(root->fs_info->btree_inode, start,
-					PAGE_CACHE_SIZE, start, p,
+					PAGE_SIZE, start, p,
 					start - page_offset(p), mirror_num);
 		if (ret)
 			break;
-		start += PAGE_CACHE_SIZE;
+		start += PAGE_SIZE;
 	}
 
 	return ret;
@@ -2466,8 +2466,8 @@ static void end_bio_extent_writepage(struct bio *bio)
 		 * advance bv_offset and adjust bv_len to compensate.
 		 * Print a warning for nonzero offsets, and an error
 		 * if they don't add up to a full page.  */
-		if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
-			if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
+		if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
+			if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
 				btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
 				   "partial page write in btrfs with offset %u and length %u",
 					bvec->bv_offset, bvec->bv_len);
@@ -2541,8 +2541,8 @@ static void end_bio_extent_readpage(struct bio *bio)
 		 * advance bv_offset and adjust bv_len to compensate.
 		 * Print a warning for nonzero offsets, and an error
 		 * if they don't add up to a full page.  */
-		if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
-			if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
+		if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
+			if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
 				btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
 				   "partial page read in btrfs with offset %u and length %u",
 					bvec->bv_offset, bvec->bv_len);
@@ -2598,13 +2598,13 @@ static void end_bio_extent_readpage(struct bio *bio)
 readpage_ok:
 		if (likely(uptodate)) {
 			loff_t i_size = i_size_read(inode);
-			pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+			pgoff_t end_index = i_size >> PAGE_SHIFT;
 			unsigned off;
 
 			/* Zero out the end if this page straddles i_size */
-			off = i_size & (PAGE_CACHE_SIZE-1);
+			off = i_size & (PAGE_SIZE-1);
 			if (page->index == end_index && off)
-				zero_user_segment(page, off, PAGE_CACHE_SIZE);
+				zero_user_segment(page, off, PAGE_SIZE);
 			SetPageUptodate(page);
 		} else {
 			ClearPageUptodate(page);
@@ -2768,7 +2768,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
 	struct bio *bio;
 	int contig = 0;
 	int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
-	size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
+	size_t page_size = min_t(size_t, size, PAGE_SIZE);
 
 	if (bio_ret && *bio_ret) {
 		bio = *bio_ret;
@@ -2821,7 +2821,7 @@ static void attach_extent_buffer_page(struct extent_buffer *eb,
 {
 	if (!PagePrivate(page)) {
 		SetPagePrivate(page);
-		page_cache_get(page);
+		get_page(page);
 		set_page_private(page, (unsigned long)eb);
 	} else {
 		WARN_ON(page->private != (unsigned long)eb);
@@ -2832,7 +2832,7 @@ void set_page_extent_mapped(struct page *page)
 {
 	if (!PagePrivate(page)) {
 		SetPagePrivate(page);
-		page_cache_get(page);
+		get_page(page);
 		set_page_private(page, EXTENT_PAGE_PRIVATE);
 	}
 }
@@ -2880,7 +2880,7 @@ static int __do_readpage(struct extent_io_tree *tree,
 {
 	struct inode *inode = page->mapping->host;
 	u64 start = page_offset(page);
-	u64 page_end = start + PAGE_CACHE_SIZE - 1;
+	u64 page_end = start + PAGE_SIZE - 1;
 	u64 end;
 	u64 cur = start;
 	u64 extent_offset;
@@ -2909,12 +2909,12 @@ static int __do_readpage(struct extent_io_tree *tree,
 		}
 	}
 
-	if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
+	if (page->index == last_byte >> PAGE_SHIFT) {
 		char *userpage;
-		size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
+		size_t zero_offset = last_byte & (PAGE_SIZE - 1);
 
 		if (zero_offset) {
-			iosize = PAGE_CACHE_SIZE - zero_offset;
+			iosize = PAGE_SIZE - zero_offset;
 			userpage = kmap_atomic(page);
 			memset(userpage + zero_offset, 0, iosize);
 			flush_dcache_page(page);
@@ -2922,14 +2922,14 @@ static int __do_readpage(struct extent_io_tree *tree,
 		}
 	}
 	while (cur <= end) {
-		unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+		unsigned long pnr = (last_byte >> PAGE_SHIFT) + 1;
 		bool force_bio_submit = false;
 
 		if (cur >= last_byte) {
 			char *userpage;
 			struct extent_state *cached = NULL;
 
-			iosize = PAGE_CACHE_SIZE - pg_offset;
+			iosize = PAGE_SIZE - pg_offset;
 			userpage = kmap_atomic(page);
 			memset(userpage + pg_offset, 0, iosize);
 			flush_dcache_page(page);
@@ -3112,7 +3112,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
 	for (index = 0; index < nr_pages; index++) {
 		__do_readpage(tree, pages[index], get_extent, em_cached, bio,
 			      mirror_num, bio_flags, rw, prev_em_start);
-		page_cache_release(pages[index]);
+		put_page(pages[index]);
 	}
 }
 
@@ -3134,10 +3134,10 @@ static void __extent_readpages(struct extent_io_tree *tree,
 		page_start = page_offset(pages[index]);
 		if (!end) {
 			start = page_start;
-			end = start + PAGE_CACHE_SIZE - 1;
+			end = start + PAGE_SIZE - 1;
 			first_index = index;
 		} else if (end + 1 == page_start) {
-			end += PAGE_CACHE_SIZE;
+			end += PAGE_SIZE;
 		} else {
 			__do_contiguous_readpages(tree, &pages[first_index],
 						  index - first_index, start,
@@ -3145,7 +3145,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
 						  bio, mirror_num, bio_flags,
 						  rw, prev_em_start);
 			start = page_start;
-			end = start + PAGE_CACHE_SIZE - 1;
+			end = start + PAGE_SIZE - 1;
 			first_index = index;
 		}
 	}
@@ -3167,13 +3167,13 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 	struct inode *inode = page->mapping->host;
 	struct btrfs_ordered_extent *ordered;
 	u64 start = page_offset(page);
-	u64 end = start + PAGE_CACHE_SIZE - 1;
+	u64 end = start + PAGE_SIZE - 1;
 	int ret;
 
 	while (1) {
 		lock_extent(tree, start, end);
 		ordered = btrfs_lookup_ordered_range(inode, start,
-						PAGE_CACHE_SIZE);
+						PAGE_SIZE);
 		if (!ordered)
 			break;
 		unlock_extent(tree, start, end);
@@ -3227,7 +3227,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
 			      unsigned long *nr_written)
 {
 	struct extent_io_tree *tree = epd->tree;
-	u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1;
+	u64 page_end = delalloc_start + PAGE_SIZE - 1;
 	u64 nr_delalloc;
 	u64 delalloc_to_write = 0;
 	u64 delalloc_end = 0;
@@ -3269,8 +3269,8 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
 		 * PAGE_CACHE_SIZE
 		 */
 		delalloc_to_write += (delalloc_end - delalloc_start +
-				      PAGE_CACHE_SIZE) >>
-				      PAGE_CACHE_SHIFT;
+				      PAGE_SIZE) >>
+				      PAGE_SHIFT;
 		delalloc_start = delalloc_end + 1;
 	}
 	if (wbc->nr_to_write < delalloc_to_write) {
@@ -3319,7 +3319,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 {
 	struct extent_io_tree *tree = epd->tree;
 	u64 start = page_offset(page);
-	u64 page_end = start + PAGE_CACHE_SIZE - 1;
+	u64 page_end = start + PAGE_SIZE - 1;
 	u64 end;
 	u64 cur = start;
 	u64 extent_offset;
@@ -3434,7 +3434,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 		if (ret) {
 			SetPageError(page);
 		} else {
-			unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1;
+			unsigned long max_nr = (i_size >> PAGE_SHIFT) + 1;
 
 			set_range_writeback(tree, cur, cur + iosize - 1);
 			if (!PageWriteback(page)) {
@@ -3477,12 +3477,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 	struct inode *inode = page->mapping->host;
 	struct extent_page_data *epd = data;
 	u64 start = page_offset(page);
-	u64 page_end = start + PAGE_CACHE_SIZE - 1;
+	u64 page_end = start + PAGE_SIZE - 1;
 	int ret;
 	int nr = 0;
 	size_t pg_offset = 0;
 	loff_t i_size = i_size_read(inode);
-	unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
+	unsigned long end_index = i_size >> PAGE_SHIFT;
 	int write_flags;
 	unsigned long nr_written = 0;
 
@@ -3497,10 +3497,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 
 	ClearPageError(page);
 
-	pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
+	pg_offset = i_size & (PAGE_SIZE - 1);
 	if (page->index > end_index ||
 	   (page->index == end_index && !pg_offset)) {
-		page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
 		unlock_page(page);
 		return 0;
 	}
@@ -3510,7 +3510,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 
 		userpage = kmap_atomic(page);
 		memset(userpage + pg_offset, 0,
-		       PAGE_CACHE_SIZE - pg_offset);
+		       PAGE_SIZE - pg_offset);
 		kunmap_atomic(userpage);
 		flush_dcache_page(page);
 	}
@@ -3748,7 +3748,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 		clear_page_dirty_for_io(p);
 		set_page_writeback(p);
 		ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
-					 PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
+					 PAGE_SIZE, 0, bdev, &epd->bio,
 					 -1, end_bio_extent_buffer_writepage,
 					 0, epd->bio_flags, bio_flags, false);
 		epd->bio_flags = bio_flags;
@@ -3760,7 +3760,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 			ret = -EIO;
 			break;
 		}
-		offset += PAGE_CACHE_SIZE;
+		offset += PAGE_SIZE;
 		update_nr_written(p, wbc, 1);
 		unlock_page(p);
 	}
@@ -3804,8 +3804,8 @@ int btree_write_cache_pages(struct address_space *mapping,
 		index = mapping->writeback_index; /* Start from prev offset */
 		end = -1;
 	} else {
-		index = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		index = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		scanned = 1;
 	}
 	if (wbc->sync_mode == WB_SYNC_ALL)
@@ -3948,8 +3948,8 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
 		index = mapping->writeback_index; /* Start from prev offset */
 		end = -1;
 	} else {
-		index = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		index = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		scanned = 1;
 	}
 	if (wbc->sync_mode == WB_SYNC_ALL)
@@ -4083,8 +4083,8 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
 	int ret = 0;
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
-	unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
-		PAGE_CACHE_SHIFT;
+	unsigned long nr_pages = (end - start + PAGE_SIZE) >>
+		PAGE_SHIFT;
 
 	struct extent_page_data epd = {
 		.bio = NULL,
@@ -4102,18 +4102,18 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
 	};
 
 	while (start <= end) {
-		page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+		page = find_get_page(mapping, start >> PAGE_SHIFT);
 		if (clear_page_dirty_for_io(page))
 			ret = __extent_writepage(page, &wbc_writepages, &epd);
 		else {
 			if (tree->ops && tree->ops->writepage_end_io_hook)
 				tree->ops->writepage_end_io_hook(page, start,
-						 start + PAGE_CACHE_SIZE - 1,
+						 start + PAGE_SIZE - 1,
 						 NULL, 1);
 			unlock_page(page);
 		}
-		page_cache_release(page);
-		start += PAGE_CACHE_SIZE;
+		put_page(page);
+		start += PAGE_SIZE;
 	}
 
 	flush_epd_write_bio(&epd);
@@ -4163,7 +4163,7 @@ int extent_readpages(struct extent_io_tree *tree,
 		list_del(&page->lru);
 		if (add_to_page_cache_lru(page, mapping,
 					page->index, GFP_NOFS)) {
-			page_cache_release(page);
+			put_page(page);
 			continue;
 		}
 
@@ -4197,7 +4197,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
 {
 	struct extent_state *cached_state = NULL;
 	u64 start = page_offset(page);
-	u64 end = start + PAGE_CACHE_SIZE - 1;
+	u64 end = start + PAGE_SIZE - 1;
 	size_t blocksize = page->mapping->host->i_sb->s_blocksize;
 
 	start += ALIGN(offset, blocksize);
@@ -4223,7 +4223,7 @@ static int try_release_extent_state(struct extent_map_tree *map,
 				    struct page *page, gfp_t mask)
 {
 	u64 start = page_offset(page);
-	u64 end = start + PAGE_CACHE_SIZE - 1;
+	u64 end = start + PAGE_SIZE - 1;
 	int ret = 1;
 
 	if (test_range_bit(tree, start, end,
@@ -4262,7 +4262,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
 {
 	struct extent_map *em;
 	u64 start = page_offset(page);
-	u64 end = start + PAGE_CACHE_SIZE - 1;
+	u64 end = start + PAGE_SIZE - 1;
 
 	if (gfpflags_allow_blocking(mask) &&
 	    page->mapping->host->i_size > SZ_16M) {
@@ -4587,14 +4587,14 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
 			ClearPagePrivate(page);
 			set_page_private(page, 0);
 			/* One for the page private */
-			page_cache_release(page);
+			put_page(page);
 		}
 
 		if (mapped)
 			spin_unlock(&page->mapping->private_lock);
 
 		/* One for when we alloced the page */
-		page_cache_release(page);
+		put_page(page);
 	} while (index != 0);
 }
 
@@ -4779,7 +4779,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
 
 	rcu_read_lock();
 	eb = radix_tree_lookup(&fs_info->buffer_radix,
-			       start >> PAGE_CACHE_SHIFT);
+			       start >> PAGE_SHIFT);
 	if (eb && atomic_inc_not_zero(&eb->refs)) {
 		rcu_read_unlock();
 		/*
@@ -4829,7 +4829,7 @@ again:
 		goto free_eb;
 	spin_lock(&fs_info->buffer_lock);
 	ret = radix_tree_insert(&fs_info->buffer_radix,
-				start >> PAGE_CACHE_SHIFT, eb);
+				start >> PAGE_SHIFT, eb);
 	spin_unlock(&fs_info->buffer_lock);
 	radix_tree_preload_end();
 	if (ret == -EEXIST) {
@@ -4862,7 +4862,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	unsigned long len = fs_info->tree_root->nodesize;
 	unsigned long num_pages = num_extent_pages(start, len);
 	unsigned long i;
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
+	unsigned long index = start >> PAGE_SHIFT;
 	struct extent_buffer *eb;
 	struct extent_buffer *exists = NULL;
 	struct page *p;
@@ -4896,7 +4896,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 			if (atomic_inc_not_zero(&exists->refs)) {
 				spin_unlock(&mapping->private_lock);
 				unlock_page(p);
-				page_cache_release(p);
+				put_page(p);
 				mark_extent_buffer_accessed(exists, p);
 				goto free_eb;
 			}
@@ -4908,7 +4908,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 			 */
 			ClearPagePrivate(p);
 			WARN_ON(PageDirty(p));
-			page_cache_release(p);
+			put_page(p);
 		}
 		attach_extent_buffer_page(eb, p);
 		spin_unlock(&mapping->private_lock);
@@ -4931,7 +4931,7 @@ again:
 
 	spin_lock(&fs_info->buffer_lock);
 	ret = radix_tree_insert(&fs_info->buffer_radix,
-				start >> PAGE_CACHE_SHIFT, eb);
+				start >> PAGE_SHIFT, eb);
 	spin_unlock(&fs_info->buffer_lock);
 	radix_tree_preload_end();
 	if (ret == -EEXIST) {
@@ -4994,7 +4994,7 @@ static int release_extent_buffer(struct extent_buffer *eb)
 
 			spin_lock(&fs_info->buffer_lock);
 			radix_tree_delete(&fs_info->buffer_radix,
-					  eb->start >> PAGE_CACHE_SHIFT);
+					  eb->start >> PAGE_SHIFT);
 			spin_unlock(&fs_info->buffer_lock);
 		} else {
 			spin_unlock(&eb->refs_lock);
@@ -5168,8 +5168,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
 
 	if (start) {
 		WARN_ON(start < eb->start);
-		start_i = (start >> PAGE_CACHE_SHIFT) -
-			(eb->start >> PAGE_CACHE_SHIFT);
+		start_i = (start >> PAGE_SHIFT) -
+			(eb->start >> PAGE_SHIFT);
 	} else {
 		start_i = 0;
 	}
@@ -5252,18 +5252,18 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
 	struct page *page;
 	char *kaddr;
 	char *dst = (char *)dstv;
-	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+	offset = (start_offset + start) & (PAGE_SIZE - 1);
 
 	while (len > 0) {
 		page = eb->pages[i];
 
-		cur = min(len, (PAGE_CACHE_SIZE - offset));
+		cur = min(len, (PAGE_SIZE - offset));
 		kaddr = page_address(page);
 		memcpy(dst, kaddr + offset, cur);
 
@@ -5283,19 +5283,19 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
 	struct page *page;
 	char *kaddr;
 	char __user *dst = (char __user *)dstv;
-	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 	int ret = 0;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+	offset = (start_offset + start) & (PAGE_SIZE - 1);
 
 	while (len > 0) {
 		page = eb->pages[i];
 
-		cur = min(len, (PAGE_CACHE_SIZE - offset));
+		cur = min(len, (PAGE_SIZE - offset));
 		kaddr = page_address(page);
 		if (copy_to_user(dst, kaddr + offset, cur)) {
 			ret = -EFAULT;
@@ -5316,13 +5316,13 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 			       unsigned long *map_start,
 			       unsigned long *map_len)
 {
-	size_t offset = start & (PAGE_CACHE_SIZE - 1);
+	size_t offset = start & (PAGE_SIZE - 1);
 	char *kaddr;
 	struct page *p;
-	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 	unsigned long end_i = (start_offset + start + min_len - 1) >>
-		PAGE_CACHE_SHIFT;
+		PAGE_SHIFT;
 
 	if (i != end_i)
 		return -EINVAL;
@@ -5332,7 +5332,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 		*map_start = 0;
 	} else {
 		offset = 0;
-		*map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
+		*map_start = ((u64)i << PAGE_SHIFT) - start_offset;
 	}
 
 	if (start + min_len > eb->len) {
@@ -5345,7 +5345,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 	p = eb->pages[i];
 	kaddr = page_address(p);
 	*map = kaddr + offset;
-	*map_len = PAGE_CACHE_SIZE - offset;
+	*map_len = PAGE_SIZE - offset;
 	return 0;
 }
 
@@ -5358,19 +5358,19 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
 	struct page *page;
 	char *kaddr;
 	char *ptr = (char *)ptrv;
-	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 	int ret = 0;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+	offset = (start_offset + start) & (PAGE_SIZE - 1);
 
 	while (len > 0) {
 		page = eb->pages[i];
 
-		cur = min(len, (PAGE_CACHE_SIZE - offset));
+		cur = min(len, (PAGE_SIZE - offset));
 
 		kaddr = page_address(page);
 		ret = memcmp(ptr, kaddr + offset, cur);
@@ -5393,19 +5393,19 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
 	struct page *page;
 	char *kaddr;
 	char *src = (char *)srcv;
-	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+	offset = (start_offset + start) & (PAGE_SIZE - 1);
 
 	while (len > 0) {
 		page = eb->pages[i];
 		WARN_ON(!PageUptodate(page));
 
-		cur = min(len, PAGE_CACHE_SIZE - offset);
+		cur = min(len, PAGE_SIZE - offset);
 		kaddr = page_address(page);
 		memcpy(kaddr + offset, src, cur);
 
@@ -5423,19 +5423,19 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
 	size_t offset;
 	struct page *page;
 	char *kaddr;
-	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+	offset = (start_offset + start) & (PAGE_SIZE - 1);
 
 	while (len > 0) {
 		page = eb->pages[i];
 		WARN_ON(!PageUptodate(page));
 
-		cur = min(len, PAGE_CACHE_SIZE - offset);
+		cur = min(len, PAGE_SIZE - offset);
 		kaddr = page_address(page);
 		memset(kaddr + offset, c, cur);
 
@@ -5454,19 +5454,19 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
 	size_t offset;
 	struct page *page;
 	char *kaddr;
-	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
 
 	WARN_ON(src->len != dst_len);
 
 	offset = (start_offset + dst_offset) &
-		(PAGE_CACHE_SIZE - 1);
+		(PAGE_SIZE - 1);
 
 	while (len > 0) {
 		page = dst->pages[i];
 		WARN_ON(!PageUptodate(page));
 
-		cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
+		cur = min(len, (unsigned long)(PAGE_SIZE - offset));
 
 		kaddr = page_address(page);
 		read_extent_buffer(src, kaddr + offset, src_offset, cur);
@@ -5508,7 +5508,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,
 				    unsigned long *page_index,
 				    size_t *page_offset)
 {
-	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
 	size_t byte_offset = BIT_BYTE(nr);
 	size_t offset;
 
@@ -5519,8 +5519,8 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,
 	 */
 	offset = start_offset + start + byte_offset;
 
-	*page_index = offset >> PAGE_CACHE_SHIFT;
-	*page_offset = offset & (PAGE_CACHE_SIZE - 1);
+	*page_index = offset >> PAGE_SHIFT;
+	*page_offset = offset & (PAGE_SIZE - 1);
 }
 
 /**
@@ -5572,7 +5572,7 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
 		len -= bits_to_set;
 		bits_to_set = BITS_PER_BYTE;
 		mask_to_set = ~0U;
-		if (++offset >= PAGE_CACHE_SIZE && len > 0) {
+		if (++offset >= PAGE_SIZE && len > 0) {
 			offset = 0;
 			page = eb->pages[++i];
 			WARN_ON(!PageUptodate(page));
@@ -5614,7 +5614,7 @@ void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
 		len -= bits_to_clear;
 		bits_to_clear = BITS_PER_BYTE;
 		mask_to_clear = ~0U;
-		if (++offset >= PAGE_CACHE_SIZE && len > 0) {
+		if (++offset >= PAGE_SIZE && len > 0) {
 			offset = 0;
 			page = eb->pages[++i];
 			WARN_ON(!PageUptodate(page));
@@ -5661,7 +5661,7 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 	size_t cur;
 	size_t dst_off_in_page;
 	size_t src_off_in_page;
-	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+	size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
 	unsigned long dst_i;
 	unsigned long src_i;
 
@@ -5680,17 +5680,17 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 
 	while (len > 0) {
 		dst_off_in_page = (start_offset + dst_offset) &
-			(PAGE_CACHE_SIZE - 1);
+			(PAGE_SIZE - 1);
 		src_off_in_page = (start_offset + src_offset) &
-			(PAGE_CACHE_SIZE - 1);
+			(PAGE_SIZE - 1);
 
-		dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
-		src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
+		dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
+		src_i = (start_offset + src_offset) >> PAGE_SHIFT;
 
-		cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
+		cur = min(len, (unsigned long)(PAGE_SIZE -
 					       src_off_in_page));
 		cur = min_t(unsigned long, cur,
-			(unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
+			(unsigned long)(PAGE_SIZE - dst_off_in_page));
 
 		copy_pages(dst->pages[dst_i], dst->pages[src_i],
 			   dst_off_in_page, src_off_in_page, cur);
@@ -5709,7 +5709,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 	size_t src_off_in_page;
 	unsigned long dst_end = dst_offset + len - 1;
 	unsigned long src_end = src_offset + len - 1;
-	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+	size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
 	unsigned long dst_i;
 	unsigned long src_i;
 
@@ -5728,13 +5728,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 		return;
 	}
 	while (len > 0) {
-		dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
-		src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
+		dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
+		src_i = (start_offset + src_end) >> PAGE_SHIFT;
 
 		dst_off_in_page = (start_offset + dst_end) &
-			(PAGE_CACHE_SIZE - 1);
+			(PAGE_SIZE - 1);
 		src_off_in_page = (start_offset + src_end) &
-			(PAGE_CACHE_SIZE - 1);
+			(PAGE_SIZE - 1);
 
 		cur = min_t(unsigned long, len, src_off_in_page + 1);
 		cur = min(cur, dst_off_in_page + 1);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5dbf92e68fbd..b5e0ade90e88 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -120,7 +120,7 @@ struct extent_state {
 };
 
 #define INLINE_EXTENT_BUFFER_PAGES 16
-#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_CACHE_SIZE)
+#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_SIZE)
 struct extent_buffer {
 	u64 start;
 	unsigned long len;
@@ -365,8 +365,8 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
 
 static inline unsigned long num_extent_pages(u64 start, u64 len)
 {
-	return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
-		(start >> PAGE_CACHE_SHIFT);
+	return ((start + len + PAGE_SIZE - 1) >> PAGE_SHIFT) -
+		(start >> PAGE_SHIFT);
 }
 
 static inline void extent_buffer_get(struct extent_buffer *eb)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index b5baf5bdc8e1..7a7d6e253cfc 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -32,7 +32,7 @@
 				  size) - 1))
 
 #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
-				       PAGE_CACHE_SIZE))
+				       PAGE_SIZE))
 
 #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \
 				   sizeof(struct btrfs_ordered_sum)) / \
@@ -203,7 +203,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
 		csum = (u8 *)dst;
 	}
 
-	if (bio->bi_iter.bi_size > PAGE_CACHE_SIZE * 8)
+	if (bio->bi_iter.bi_size > PAGE_SIZE * 8)
 		path->reada = READA_FORWARD;
 
 	WARN_ON(bio->bi_vcnt <= 0);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 15a09cb156ce..cf31a60c6284 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -414,11 +414,11 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
 	size_t copied = 0;
 	size_t total_copied = 0;
 	int pg = 0;
-	int offset = pos & (PAGE_CACHE_SIZE - 1);
+	int offset = pos & (PAGE_SIZE - 1);
 
 	while (write_bytes > 0) {
 		size_t count = min_t(size_t,
-				     PAGE_CACHE_SIZE - offset, write_bytes);
+				     PAGE_SIZE - offset, write_bytes);
 		struct page *page = prepared_pages[pg];
 		/*
 		 * Copy data from userspace to the current page
@@ -448,7 +448,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
 		if (unlikely(copied == 0))
 			break;
 
-		if (copied < PAGE_CACHE_SIZE - offset) {
+		if (copied < PAGE_SIZE - offset) {
 			offset += copied;
 		} else {
 			pg++;
@@ -473,7 +473,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
 		 */
 		ClearPageChecked(pages[i]);
 		unlock_page(pages[i]);
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 	}
 }
 
@@ -1297,7 +1297,7 @@ static int prepare_uptodate_page(struct inode *inode,
 {
 	int ret = 0;
 
-	if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) &&
+	if (((pos & (PAGE_SIZE - 1)) || force_uptodate) &&
 	    !PageUptodate(page)) {
 		ret = btrfs_readpage(NULL, page);
 		if (ret)
@@ -1323,7 +1323,7 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,
 				  size_t write_bytes, bool force_uptodate)
 {
 	int i;
-	unsigned long index = pos >> PAGE_CACHE_SHIFT;
+	unsigned long index = pos >> PAGE_SHIFT;
 	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
 	int err = 0;
 	int faili;
@@ -1345,7 +1345,7 @@ again:
 			err = prepare_uptodate_page(inode, pages[i],
 						    pos + write_bytes, false);
 		if (err) {
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 			if (err == -EAGAIN) {
 				err = 0;
 				goto again;
@@ -1360,7 +1360,7 @@ again:
 fail:
 	while (faili >= 0) {
 		unlock_page(pages[faili]);
-		page_cache_release(pages[faili]);
+		put_page(pages[faili]);
 		faili--;
 	}
 	return err;
@@ -1408,7 +1408,7 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
 					     cached_state, GFP_NOFS);
 			for (i = 0; i < num_pages; i++) {
 				unlock_page(pages[i]);
-				page_cache_release(pages[i]);
+				put_page(pages[i]);
 			}
 			btrfs_start_ordered_extent(inode, ordered, 1);
 			btrfs_put_ordered_extent(ordered);
@@ -1497,8 +1497,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 	bool force_page_uptodate = false;
 	bool need_unlock;
 
-	nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_CACHE_SIZE),
-			PAGE_CACHE_SIZE / (sizeof(struct page *)));
+	nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
+			PAGE_SIZE / (sizeof(struct page *)));
 	nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
 	nrptrs = max(nrptrs, 8);
 	pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
@@ -1506,13 +1506,13 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		return -ENOMEM;
 
 	while (iov_iter_count(i) > 0) {
-		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
+		size_t offset = pos & (PAGE_SIZE - 1);
 		size_t sector_offset;
 		size_t write_bytes = min(iov_iter_count(i),
-					 nrptrs * (size_t)PAGE_CACHE_SIZE -
+					 nrptrs * (size_t)PAGE_SIZE -
 					 offset);
 		size_t num_pages = DIV_ROUND_UP(write_bytes + offset,
-						PAGE_CACHE_SIZE);
+						PAGE_SIZE);
 		size_t reserve_bytes;
 		size_t dirty_pages;
 		size_t copied;
@@ -1547,7 +1547,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 			 * write_bytes, so scale down.
 			 */
 			num_pages = DIV_ROUND_UP(write_bytes + offset,
-						 PAGE_CACHE_SIZE);
+						 PAGE_SIZE);
 			reserve_bytes = round_up(write_bytes + sector_offset,
 					root->sectorsize);
 			goto reserve_metadata;
@@ -1609,7 +1609,7 @@ again:
 		} else {
 			force_page_uptodate = false;
 			dirty_pages = DIV_ROUND_UP(copied + offset,
-						   PAGE_CACHE_SIZE);
+						   PAGE_SIZE);
 		}
 
 		/*
@@ -1641,7 +1641,7 @@ again:
 				u64 __pos;
 
 				__pos = round_down(pos, root->sectorsize) +
-					(dirty_pages << PAGE_CACHE_SHIFT);
+					(dirty_pages << PAGE_SHIFT);
 				btrfs_delalloc_release_space(inode, __pos,
 							     release_bytes);
 			}
@@ -1682,7 +1682,7 @@ again:
 		cond_resched();
 
 		balance_dirty_pages_ratelimited(inode->i_mapping);
-		if (dirty_pages < (root->nodesize >> PAGE_CACHE_SHIFT) + 1)
+		if (dirty_pages < (root->nodesize >> PAGE_SHIFT) + 1)
 			btrfs_btree_balance_dirty(root);
 
 		pos += copied;
@@ -1738,8 +1738,8 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
 		goto out;
 	written += written_buffered;
 	iocb->ki_pos = pos + written_buffered;
-	invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
-				 endbyte >> PAGE_CACHE_SHIFT);
+	invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT,
+				 endbyte >> PAGE_SHIFT);
 out:
 	return written ? written : err;
 }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 8f835bfa1bdd..5e6062c26129 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -29,7 +29,7 @@
 #include "inode-map.h"
 #include "volumes.h"
 
-#define BITS_PER_BITMAP		(PAGE_CACHE_SIZE * 8)
+#define BITS_PER_BITMAP		(PAGE_SIZE * 8)
 #define MAX_CACHE_BYTES_PER_GIG	SZ_32K
 
 struct btrfs_trim_range {
@@ -295,7 +295,7 @@ static int readahead_cache(struct inode *inode)
 		return -ENOMEM;
 
 	file_ra_state_init(ra, inode->i_mapping);
-	last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+	last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
 
 	page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
 
@@ -310,14 +310,14 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
 	int num_pages;
 	int check_crcs = 0;
 
-	num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
+	num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
 
 	if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
 		check_crcs = 1;
 
 	/* Make sure we can fit our crcs into the first page */
 	if (write && check_crcs &&
-	    (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
+	    (num_pages * sizeof(u32)) >= PAGE_SIZE)
 		return -ENOSPC;
 
 	memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
@@ -354,9 +354,9 @@ static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
 	io_ctl->page = io_ctl->pages[io_ctl->index++];
 	io_ctl->cur = page_address(io_ctl->page);
 	io_ctl->orig = io_ctl->cur;
-	io_ctl->size = PAGE_CACHE_SIZE;
+	io_ctl->size = PAGE_SIZE;
 	if (clear)
-		memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
+		memset(io_ctl->cur, 0, PAGE_SIZE);
 }
 
 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
@@ -369,7 +369,7 @@ static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
 		if (io_ctl->pages[i]) {
 			ClearPageChecked(io_ctl->pages[i]);
 			unlock_page(io_ctl->pages[i]);
-			page_cache_release(io_ctl->pages[i]);
+			put_page(io_ctl->pages[i]);
 		}
 	}
 }
@@ -475,7 +475,7 @@ static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
 		offset = sizeof(u32) * io_ctl->num_pages;
 
 	crc = btrfs_csum_data(io_ctl->orig + offset, crc,
-			      PAGE_CACHE_SIZE - offset);
+			      PAGE_SIZE - offset);
 	btrfs_csum_final(crc, (char *)&crc);
 	io_ctl_unmap_page(io_ctl);
 	tmp = page_address(io_ctl->pages[0]);
@@ -503,7 +503,7 @@ static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
 
 	io_ctl_map_page(io_ctl, 0);
 	crc = btrfs_csum_data(io_ctl->orig + offset, crc,
-			      PAGE_CACHE_SIZE - offset);
+			      PAGE_SIZE - offset);
 	btrfs_csum_final(crc, (char *)&crc);
 	if (val != crc) {
 		btrfs_err_rl(io_ctl->root->fs_info,
@@ -561,7 +561,7 @@ static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
 		io_ctl_map_page(io_ctl, 0);
 	}
 
-	memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
+	memcpy(io_ctl->cur, bitmap, PAGE_SIZE);
 	io_ctl_set_crc(io_ctl, io_ctl->index - 1);
 	if (io_ctl->index < io_ctl->num_pages)
 		io_ctl_map_page(io_ctl, 0);
@@ -621,7 +621,7 @@ static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
 	if (ret)
 		return ret;
 
-	memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
+	memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE);
 	io_ctl_unmap_page(io_ctl);
 
 	return 0;
@@ -775,7 +775,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 		} else {
 			ASSERT(num_bitmaps);
 			num_bitmaps--;
-			e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+			e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS);
 			if (!e->bitmap) {
 				kmem_cache_free(
 					btrfs_free_space_cachep, e);
@@ -1660,7 +1660,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
 	 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
 	 * we add more bitmaps.
 	 */
-	bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
+	bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_SIZE;
 
 	if (bitmap_bytes >= max_bytes) {
 		ctl->extents_thresh = 0;
@@ -2111,7 +2111,7 @@ new_bitmap:
 		}
 
 		/* allocate the bitmap */
-		info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+		info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS);
 		spin_lock(&ctl->tree_lock);
 		if (!info->bitmap) {
 			ret = -ENOMEM;
@@ -3580,7 +3580,7 @@ again:
 	}
 
 	if (!map) {
-		map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+		map = kzalloc(PAGE_SIZE, GFP_NOFS);
 		if (!map) {
 			kmem_cache_free(btrfs_free_space_cachep, info);
 			return -ENOMEM;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 1f0ec19b23f6..70107f7c9307 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -283,7 +283,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
 }
 
 #define INIT_THRESHOLD	((SZ_32K / 2) / sizeof(struct btrfs_free_space))
-#define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8)
+#define INODES_PER_BITMAP (PAGE_SIZE * 8)
 
 /*
  * The goal is to keep the memory used by the free_ino tree won't
@@ -317,7 +317,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
 	}
 
 	ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
-				PAGE_CACHE_SIZE / sizeof(*info);
+				PAGE_SIZE / sizeof(*info);
 }
 
 /*
@@ -481,12 +481,12 @@ again:
 
 	spin_lock(&ctl->tree_lock);
 	prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
-	prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
-	prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
+	prealloc = ALIGN(prealloc, PAGE_SIZE);
+	prealloc += ctl->total_bitmaps * PAGE_SIZE;
 	spin_unlock(&ctl->tree_lock);
 
 	/* Just to make sure we have enough space */
-	prealloc += 8 * PAGE_CACHE_SIZE;
+	prealloc += 8 * PAGE_SIZE;
 
 	ret = btrfs_delalloc_reserve_space(inode, 0, prealloc);
 	if (ret)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 41a5688ffdfe..2aaba58b4856 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -194,7 +194,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 		while (compressed_size > 0) {
 			cpage = compressed_pages[i];
 			cur_size = min_t(unsigned long, compressed_size,
-				       PAGE_CACHE_SIZE);
+				       PAGE_SIZE);
 
 			kaddr = kmap_atomic(cpage);
 			write_extent_buffer(leaf, kaddr, ptr, cur_size);
@@ -208,13 +208,13 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 						  compress_type);
 	} else {
 		page = find_get_page(inode->i_mapping,
-				     start >> PAGE_CACHE_SHIFT);
+				     start >> PAGE_SHIFT);
 		btrfs_set_file_extent_compression(leaf, ei, 0);
 		kaddr = kmap_atomic(page);
-		offset = start & (PAGE_CACHE_SIZE - 1);
+		offset = start & (PAGE_SIZE - 1);
 		write_extent_buffer(leaf, kaddr + offset, ptr, size);
 		kunmap_atomic(kaddr);
-		page_cache_release(page);
+		put_page(page);
 	}
 	btrfs_mark_buffer_dirty(leaf);
 	btrfs_release_path(path);
@@ -322,7 +322,7 @@ out:
 	 * And at reserve time, it's always aligned to page size, so
 	 * just free one page here.
 	 */
-	btrfs_qgroup_free_data(inode, 0, PAGE_CACHE_SIZE);
+	btrfs_qgroup_free_data(inode, 0, PAGE_SIZE);
 	btrfs_free_path(path);
 	btrfs_end_transaction(trans, root);
 	return ret;
@@ -435,8 +435,8 @@ static noinline void compress_file_range(struct inode *inode,
 	actual_end = min_t(u64, isize, end + 1);
 again:
 	will_compress = 0;
-	nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1;
-	nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_CACHE_SIZE);
+	nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
+	nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_SIZE);
 
 	/*
 	 * we don't want to send crud past the end of i_size through
@@ -514,7 +514,7 @@ again:
 
 		if (!ret) {
 			unsigned long offset = total_compressed &
-				(PAGE_CACHE_SIZE - 1);
+				(PAGE_SIZE - 1);
 			struct page *page = pages[nr_pages_ret - 1];
 			char *kaddr;
 
@@ -524,7 +524,7 @@ again:
 			if (offset) {
 				kaddr = kmap_atomic(page);
 				memset(kaddr + offset, 0,
-				       PAGE_CACHE_SIZE - offset);
+				       PAGE_SIZE - offset);
 				kunmap_atomic(kaddr);
 			}
 			will_compress = 1;
@@ -580,7 +580,7 @@ cont:
 		 * one last check to make sure the compression is really a
 		 * win, compare the page count read with the blocks on disk
 		 */
-		total_in = ALIGN(total_in, PAGE_CACHE_SIZE);
+		total_in = ALIGN(total_in, PAGE_SIZE);
 		if (total_compressed >= total_in) {
 			will_compress = 0;
 		} else {
@@ -594,7 +594,7 @@ cont:
 		 */
 		for (i = 0; i < nr_pages_ret; i++) {
 			WARN_ON(pages[i]->mapping);
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 		}
 		kfree(pages);
 		pages = NULL;
@@ -650,7 +650,7 @@ cleanup_and_bail_uncompressed:
 free_pages_out:
 	for (i = 0; i < nr_pages_ret; i++) {
 		WARN_ON(pages[i]->mapping);
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 	}
 	kfree(pages);
 }
@@ -664,7 +664,7 @@ static void free_async_extent_pages(struct async_extent *async_extent)
 
 	for (i = 0; i < async_extent->nr_pages; i++) {
 		WARN_ON(async_extent->pages[i]->mapping);
-		page_cache_release(async_extent->pages[i]);
+		put_page(async_extent->pages[i]);
 	}
 	kfree(async_extent->pages);
 	async_extent->nr_pages = 0;
@@ -966,7 +966,7 @@ static noinline int cow_file_range(struct inode *inode,
 				     PAGE_END_WRITEBACK);
 
 			*nr_written = *nr_written +
-			     (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
+			     (end - start + PAGE_SIZE) / PAGE_SIZE;
 			*page_started = 1;
 			goto out;
 		} else if (ret < 0) {
@@ -1106,8 +1106,8 @@ static noinline void async_cow_submit(struct btrfs_work *work)
 	async_cow = container_of(work, struct async_cow, work);
 
 	root = async_cow->root;
-	nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
-		PAGE_CACHE_SHIFT;
+	nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
+		PAGE_SHIFT;
 
 	/*
 	 * atomic_sub_return implies a barrier for waitqueue_active
@@ -1164,8 +1164,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
 				async_cow_start, async_cow_submit,
 				async_cow_free);
 
-		nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
-			PAGE_CACHE_SHIFT;
+		nr_pages = (cur_end - start + PAGE_SIZE) >>
+			PAGE_SHIFT;
 		atomic_add(nr_pages, &root->fs_info->async_delalloc_pages);
 
 		btrfs_queue_work(root->fs_info->delalloc_workers,
@@ -1960,7 +1960,7 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
 			      struct extent_state **cached_state)
 {
-	WARN_ON((end & (PAGE_CACHE_SIZE - 1)) == 0);
+	WARN_ON((end & (PAGE_SIZE - 1)) == 0);
 	return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
 				   cached_state, GFP_NOFS);
 }
@@ -1993,7 +1993,7 @@ again:
 
 	inode = page->mapping->host;
 	page_start = page_offset(page);
-	page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
+	page_end = page_offset(page) + PAGE_SIZE - 1;
 
 	lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
 			 &cached_state);
@@ -2003,7 +2003,7 @@ again:
 		goto out;
 
 	ordered = btrfs_lookup_ordered_range(inode, page_start,
-					PAGE_CACHE_SIZE);
+					PAGE_SIZE);
 	if (ordered) {
 		unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
 				     page_end, &cached_state, GFP_NOFS);
@@ -2014,7 +2014,7 @@ again:
 	}
 
 	ret = btrfs_delalloc_reserve_space(inode, page_start,
-					   PAGE_CACHE_SIZE);
+					   PAGE_SIZE);
 	if (ret) {
 		mapping_set_error(page->mapping, ret);
 		end_extent_writepage(page, ret, page_start, page_end);
@@ -2030,7 +2030,7 @@ out:
 			     &cached_state, GFP_NOFS);
 out_page:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	kfree(fixup);
 }
 
@@ -2063,7 +2063,7 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
 		return -EAGAIN;
 
 	SetPageChecked(page);
-	page_cache_get(page);
+	get_page(page);
 	btrfs_init_work(&fixup->work, btrfs_fixup_helper,
 			btrfs_writepage_fixup_worker, NULL, NULL);
 	fixup->page = page;
@@ -4247,7 +4247,7 @@ static int truncate_inline_extent(struct inode *inode,
 
 	if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
 		loff_t offset = new_size;
-		loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
+		loff_t page_end = ALIGN(offset, PAGE_SIZE);
 
 		/*
 		 * Zero out the remaining of the last page of our inline extent,
@@ -4633,7 +4633,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
 	struct extent_state *cached_state = NULL;
 	char *kaddr;
 	u32 blocksize = root->sectorsize;
-	pgoff_t index = from >> PAGE_CACHE_SHIFT;
+	pgoff_t index = from >> PAGE_SHIFT;
 	unsigned offset = from & (blocksize - 1);
 	struct page *page;
 	gfp_t mask = btrfs_alloc_write_mask(mapping);
@@ -4668,7 +4668,7 @@ again:
 		lock_page(page);
 		if (page->mapping != mapping) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			goto again;
 		}
 		if (!PageUptodate(page)) {
@@ -4686,7 +4686,7 @@ again:
 		unlock_extent_cached(io_tree, block_start, block_end,
 				     &cached_state, GFP_NOFS);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		btrfs_start_ordered_extent(inode, ordered, 1);
 		btrfs_put_ordered_extent(ordered);
 		goto again;
@@ -4728,7 +4728,7 @@ out_unlock:
 		btrfs_delalloc_release_space(inode, block_start,
 					     blocksize);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 out:
 	return ret;
 }
@@ -6717,7 +6717,7 @@ static noinline int uncompress_inline(struct btrfs_path *path,
 
 	read_extent_buffer(leaf, tmp, ptr, inline_size);
 
-	max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
+	max_size = min_t(unsigned long, PAGE_SIZE, max_size);
 	ret = btrfs_decompress(compress_type, tmp, page,
 			       extent_offset, inline_size, max_size);
 	kfree(tmp);
@@ -6879,8 +6879,8 @@ next:
 
 		size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
 		extent_offset = page_offset(page) + pg_offset - extent_start;
-		copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
-				size - extent_offset);
+		copy_size = min_t(u64, PAGE_SIZE - pg_offset,
+				  size - extent_offset);
 		em->start = extent_start + extent_offset;
 		em->len = ALIGN(copy_size, root->sectorsize);
 		em->orig_block_len = em->len;
@@ -6899,9 +6899,9 @@ next:
 				map = kmap(page);
 				read_extent_buffer(leaf, map + pg_offset, ptr,
 						   copy_size);
-				if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
+				if (pg_offset + copy_size < PAGE_SIZE) {
 					memset(map + pg_offset + copy_size, 0,
-					       PAGE_CACHE_SIZE - pg_offset -
+					       PAGE_SIZE - pg_offset -
 					       copy_size);
 				}
 				kunmap(page);
@@ -7336,12 +7336,12 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
 	int start_idx;
 	int end_idx;
 
-	start_idx = start >> PAGE_CACHE_SHIFT;
+	start_idx = start >> PAGE_SHIFT;
 
 	/*
 	 * end is the last byte in the last page.  end == start is legal
 	 */
-	end_idx = end >> PAGE_CACHE_SHIFT;
+	end_idx = end >> PAGE_SHIFT;
 
 	rcu_read_lock();
 
@@ -7382,7 +7382,7 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
 		 * include/linux/pagemap.h for details.
 		 */
 		if (unlikely(page != *pagep)) {
-			page_cache_release(page);
+			put_page(page);
 			page = NULL;
 		}
 	}
@@ -7390,7 +7390,7 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
 	if (page) {
 		if (page->index <= end_idx)
 			found = true;
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	rcu_read_unlock();
@@ -8719,7 +8719,7 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
 	if (ret == 1) {
 		ClearPagePrivate(page);
 		set_page_private(page, 0);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return ret;
 }
@@ -8739,7 +8739,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 	struct btrfs_ordered_extent *ordered;
 	struct extent_state *cached_state = NULL;
 	u64 page_start = page_offset(page);
-	u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+	u64 page_end = page_start + PAGE_SIZE - 1;
 	u64 start;
 	u64 end;
 	int inode_evicting = inode->i_state & I_FREEING;
@@ -8822,7 +8822,7 @@ again:
 	 * 2) Not written to disk
 	 *    This means the reserved space should be freed here.
 	 */
-	btrfs_qgroup_free_data(inode, page_start, PAGE_CACHE_SIZE);
+	btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE);
 	if (!inode_evicting) {
 		clear_extent_bit(tree, page_start, page_end,
 				 EXTENT_LOCKED | EXTENT_DIRTY |
@@ -8837,7 +8837,7 @@ again:
 	if (PagePrivate(page)) {
 		ClearPagePrivate(page);
 		set_page_private(page, 0);
-		page_cache_release(page);
+		put_page(page);
 	}
 }
 
@@ -8874,11 +8874,11 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	u64 page_end;
 	u64 end;
 
-	reserved_space = PAGE_CACHE_SIZE;
+	reserved_space = PAGE_SIZE;
 
 	sb_start_pagefault(inode->i_sb);
 	page_start = page_offset(page);
-	page_end = page_start + PAGE_CACHE_SIZE - 1;
+	page_end = page_start + PAGE_SIZE - 1;
 	end = page_end;
 
 	/*
@@ -8934,15 +8934,15 @@ again:
 		goto again;
 	}
 
-	if (page->index == ((size - 1) >> PAGE_CACHE_SHIFT)) {
+	if (page->index == ((size - 1) >> PAGE_SHIFT)) {
 		reserved_space = round_up(size - page_start, root->sectorsize);
-		if (reserved_space < PAGE_CACHE_SIZE) {
+		if (reserved_space < PAGE_SIZE) {
 			end = page_start + reserved_space - 1;
 			spin_lock(&BTRFS_I(inode)->lock);
 			BTRFS_I(inode)->outstanding_extents++;
 			spin_unlock(&BTRFS_I(inode)->lock);
 			btrfs_delalloc_release_space(inode, page_start,
-						PAGE_CACHE_SIZE - reserved_space);
+						PAGE_SIZE - reserved_space);
 		}
 	}
 
@@ -8969,14 +8969,14 @@ again:
 	ret = 0;
 
 	/* page is wholly or partially inside EOF */
-	if (page_start + PAGE_CACHE_SIZE > size)
-		zero_start = size & ~PAGE_CACHE_MASK;
+	if (page_start + PAGE_SIZE > size)
+		zero_start = size & ~PAGE_MASK;
 	else
-		zero_start = PAGE_CACHE_SIZE;
+		zero_start = PAGE_SIZE;
 
-	if (zero_start != PAGE_CACHE_SIZE) {
+	if (zero_start != PAGE_SIZE) {
 		kaddr = kmap(page);
-		memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
+		memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start);
 		flush_dcache_page(page);
 		kunmap(page);
 	}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 053e677839fe..94a0c8a3e871 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -898,7 +898,7 @@ static int check_defrag_in_cache(struct inode *inode, u64 offset, u32 thresh)
 	u64 end;
 
 	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
+	em = lookup_extent_mapping(em_tree, offset, PAGE_SIZE);
 	read_unlock(&em_tree->lock);
 
 	if (em) {
@@ -988,7 +988,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct extent_map *em;
-	u64 len = PAGE_CACHE_SIZE;
+	u64 len = PAGE_SIZE;
 
 	/*
 	 * hopefully we have this extent in the tree already, try without
@@ -1124,15 +1124,15 @@ static int cluster_pages_for_defrag(struct inode *inode,
 	struct extent_io_tree *tree;
 	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
 
-	file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
+	file_end = (isize - 1) >> PAGE_SHIFT;
 	if (!isize || start_index > file_end)
 		return 0;
 
 	page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
 
 	ret = btrfs_delalloc_reserve_space(inode,
-			start_index << PAGE_CACHE_SHIFT,
-			page_cnt << PAGE_CACHE_SHIFT);
+			start_index << PAGE_SHIFT,
+			page_cnt << PAGE_SHIFT);
 	if (ret)
 		return ret;
 	i_done = 0;
@@ -1148,7 +1148,7 @@ again:
 			break;
 
 		page_start = page_offset(page);
-		page_end = page_start + PAGE_CACHE_SIZE - 1;
+		page_end = page_start + PAGE_SIZE - 1;
 		while (1) {
 			lock_extent_bits(tree, page_start, page_end,
 					 &cached_state);
@@ -1169,7 +1169,7 @@ again:
 			 */
 			if (page->mapping != inode->i_mapping) {
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 				goto again;
 			}
 		}
@@ -1179,7 +1179,7 @@ again:
 			lock_page(page);
 			if (!PageUptodate(page)) {
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 				ret = -EIO;
 				break;
 			}
@@ -1187,7 +1187,7 @@ again:
 
 		if (page->mapping != inode->i_mapping) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			goto again;
 		}
 
@@ -1208,7 +1208,7 @@ again:
 		wait_on_page_writeback(pages[i]);
 
 	page_start = page_offset(pages[0]);
-	page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
+	page_end = page_offset(pages[i_done - 1]) + PAGE_SIZE;
 
 	lock_extent_bits(&BTRFS_I(inode)->io_tree,
 			 page_start, page_end - 1, &cached_state);
@@ -1222,8 +1222,8 @@ again:
 		BTRFS_I(inode)->outstanding_extents++;
 		spin_unlock(&BTRFS_I(inode)->lock);
 		btrfs_delalloc_release_space(inode,
-				start_index << PAGE_CACHE_SHIFT,
-				(page_cnt - i_done) << PAGE_CACHE_SHIFT);
+				start_index << PAGE_SHIFT,
+				(page_cnt - i_done) << PAGE_SHIFT);
 	}
 
 
@@ -1240,17 +1240,17 @@ again:
 		set_page_extent_mapped(pages[i]);
 		set_page_dirty(pages[i]);
 		unlock_page(pages[i]);
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 	}
 	return i_done;
 out:
 	for (i = 0; i < i_done; i++) {
 		unlock_page(pages[i]);
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 	}
 	btrfs_delalloc_release_space(inode,
-			start_index << PAGE_CACHE_SHIFT,
-			page_cnt << PAGE_CACHE_SHIFT);
+			start_index << PAGE_SHIFT,
+			page_cnt << PAGE_SHIFT);
 	return ret;
 
 }
@@ -1273,7 +1273,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 	int defrag_count = 0;
 	int compress_type = BTRFS_COMPRESS_ZLIB;
 	u32 extent_thresh = range->extent_thresh;
-	unsigned long max_cluster = SZ_256K >> PAGE_CACHE_SHIFT;
+	unsigned long max_cluster = SZ_256K >> PAGE_SHIFT;
 	unsigned long cluster = max_cluster;
 	u64 new_align = ~((u64)SZ_128K - 1);
 	struct page **pages = NULL;
@@ -1317,9 +1317,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 	/* find the last page to defrag */
 	if (range->start + range->len > range->start) {
 		last_index = min_t(u64, isize - 1,
-			 range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
+			 range->start + range->len - 1) >> PAGE_SHIFT;
 	} else {
-		last_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+		last_index = (isize - 1) >> PAGE_SHIFT;
 	}
 
 	if (newer_than) {
@@ -1331,11 +1331,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 			 * we always align our defrag to help keep
 			 * the extents in the file evenly spaced
 			 */
-			i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
+			i = (newer_off & new_align) >> PAGE_SHIFT;
 		} else
 			goto out_ra;
 	} else {
-		i = range->start >> PAGE_CACHE_SHIFT;
+		i = range->start >> PAGE_SHIFT;
 	}
 	if (!max_to_defrag)
 		max_to_defrag = last_index - i + 1;
@@ -1348,7 +1348,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 		inode->i_mapping->writeback_index = i;
 
 	while (i <= last_index && defrag_count < max_to_defrag &&
-	       (i < DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE))) {
+	       (i < DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE))) {
 		/*
 		 * make sure we stop running if someone unmounts
 		 * the FS
@@ -1362,7 +1362,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 			break;
 		}
 
-		if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
+		if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT,
 					 extent_thresh, &last_len, &skip,
 					 &defrag_end, range->flags &
 					 BTRFS_DEFRAG_RANGE_COMPRESS)) {
@@ -1371,14 +1371,14 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 			 * the should_defrag function tells us how much to skip
 			 * bump our counter by the suggested amount
 			 */
-			next = DIV_ROUND_UP(skip, PAGE_CACHE_SIZE);
+			next = DIV_ROUND_UP(skip, PAGE_SIZE);
 			i = max(i + 1, next);
 			continue;
 		}
 
 		if (!newer_than) {
-			cluster = (PAGE_CACHE_ALIGN(defrag_end) >>
-				   PAGE_CACHE_SHIFT) - i;
+			cluster = (PAGE_ALIGN(defrag_end) >>
+				   PAGE_SHIFT) - i;
 			cluster = min(cluster, max_cluster);
 		} else {
 			cluster = max_cluster;
@@ -1412,20 +1412,20 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 				i += ret;
 
 			newer_off = max(newer_off + 1,
-					(u64)i << PAGE_CACHE_SHIFT);
+					(u64)i << PAGE_SHIFT);
 
 			ret = find_new_extents(root, inode, newer_than,
 					       &newer_off, SZ_64K);
 			if (!ret) {
 				range->start = newer_off;
-				i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
+				i = (newer_off & new_align) >> PAGE_SHIFT;
 			} else {
 				break;
 			}
 		} else {
 			if (ret > 0) {
 				i += ret;
-				last_len += ret << PAGE_CACHE_SHIFT;
+				last_len += ret << PAGE_SHIFT;
 			} else {
 				i++;
 				last_len = 0;
@@ -1722,7 +1722,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
 	if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
 		readonly = true;
 	if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
-		if (vol_args->size > PAGE_CACHE_SIZE) {
+		if (vol_args->size > PAGE_SIZE) {
 			ret = -EINVAL;
 			goto free_args;
 		}
@@ -2806,12 +2806,12 @@ static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
 		lock_page(page);
 		if (!PageUptodate(page)) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			return ERR_PTR(-EIO);
 		}
 		if (page->mapping != inode->i_mapping) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			return ERR_PTR(-EAGAIN);
 		}
 	}
@@ -2823,7 +2823,7 @@ static int gather_extent_pages(struct inode *inode, struct page **pages,
 			       int num_pages, u64 off)
 {
 	int i;
-	pgoff_t index = off >> PAGE_CACHE_SHIFT;
+	pgoff_t index = off >> PAGE_SHIFT;
 
 	for (i = 0; i < num_pages; i++) {
 again:
@@ -2932,12 +2932,12 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
 		pg = cmp->src_pages[i];
 		if (pg) {
 			unlock_page(pg);
-			page_cache_release(pg);
+			put_page(pg);
 		}
 		pg = cmp->dst_pages[i];
 		if (pg) {
 			unlock_page(pg);
-			page_cache_release(pg);
+			put_page(pg);
 		}
 	}
 	kfree(cmp->src_pages);
@@ -2949,7 +2949,7 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
 				  u64 len, struct cmp_pages *cmp)
 {
 	int ret;
-	int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
+	int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT;
 	struct page **src_pgarr, **dst_pgarr;
 
 	/*
@@ -2987,12 +2987,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
 	int ret = 0;
 	int i;
 	struct page *src_page, *dst_page;
-	unsigned int cmp_len = PAGE_CACHE_SIZE;
+	unsigned int cmp_len = PAGE_SIZE;
 	void *addr, *dst_addr;
 
 	i = 0;
 	while (len) {
-		if (len < PAGE_CACHE_SIZE)
+		if (len < PAGE_SIZE)
 			cmp_len = len;
 
 		BUG_ON(i >= cmp->num_pages);
@@ -3191,7 +3191,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
 	if (olen > BTRFS_MAX_DEDUPE_LEN)
 		olen = BTRFS_MAX_DEDUPE_LEN;
 
-	if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
+	if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
 		/*
 		 * Btrfs does not support blocksize < page_size. As a
 		 * result, btrfs_cmp_data() won't correctly handle
@@ -3891,8 +3891,8 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
 	 * data immediately and not the previous data.
 	 */
 	truncate_inode_pages_range(&inode->i_data,
-				round_down(destoff, PAGE_CACHE_SIZE),
-				round_up(destoff + len, PAGE_CACHE_SIZE) - 1);
+				round_down(destoff, PAGE_SIZE),
+				round_up(destoff + len, PAGE_SIZE) - 1);
 out_unlock:
 	if (!same_inode)
 		btrfs_double_inode_unlock(src, inode);
@@ -4124,7 +4124,7 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
 	/* we generally have at most 6 or so space infos, one for each raid
 	 * level.  So, a whole page should be more than enough for everyone
 	 */
-	if (alloc_size > PAGE_CACHE_SIZE)
+	if (alloc_size > PAGE_SIZE)
 		return -ENOMEM;
 
 	space_args.total_spaces = 0;
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index a2f051347731..1adfbe7be6b8 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -55,8 +55,8 @@ static struct list_head *lzo_alloc_workspace(void)
 		return ERR_PTR(-ENOMEM);
 
 	workspace->mem = vmalloc(LZO1X_MEM_COMPRESS);
-	workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
-	workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
+	workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_SIZE));
+	workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_SIZE));
 	if (!workspace->mem || !workspace->buf || !workspace->cbuf)
 		goto fail;
 
@@ -116,7 +116,7 @@ static int lzo_compress_pages(struct list_head *ws,
 	*total_out = 0;
 	*total_in = 0;
 
-	in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+	in_page = find_get_page(mapping, start >> PAGE_SHIFT);
 	data_in = kmap(in_page);
 
 	/*
@@ -133,10 +133,10 @@ static int lzo_compress_pages(struct list_head *ws,
 	tot_out = LZO_LEN;
 	pages[0] = out_page;
 	nr_pages = 1;
-	pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
+	pg_bytes_left = PAGE_SIZE - LZO_LEN;
 
 	/* compress at most one page of data each time */
-	in_len = min(len, PAGE_CACHE_SIZE);
+	in_len = min(len, PAGE_SIZE);
 	while (tot_in < len) {
 		ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
 				       &out_len, workspace->mem);
@@ -201,7 +201,7 @@ static int lzo_compress_pages(struct list_head *ws,
 				cpage_out = kmap(out_page);
 				pages[nr_pages++] = out_page;
 
-				pg_bytes_left = PAGE_CACHE_SIZE;
+				pg_bytes_left = PAGE_SIZE;
 				out_offset = 0;
 			}
 		}
@@ -221,12 +221,12 @@ static int lzo_compress_pages(struct list_head *ws,
 
 		bytes_left = len - tot_in;
 		kunmap(in_page);
-		page_cache_release(in_page);
+		put_page(in_page);
 
-		start += PAGE_CACHE_SIZE;
-		in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+		start += PAGE_SIZE;
+		in_page = find_get_page(mapping, start >> PAGE_SHIFT);
 		data_in = kmap(in_page);
-		in_len = min(bytes_left, PAGE_CACHE_SIZE);
+		in_len = min(bytes_left, PAGE_SIZE);
 	}
 
 	if (tot_out > tot_in)
@@ -248,7 +248,7 @@ out:
 
 	if (in_page) {
 		kunmap(in_page);
-		page_cache_release(in_page);
+		put_page(in_page);
 	}
 
 	return ret;
@@ -266,7 +266,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
 	char *data_in;
 	unsigned long page_in_index = 0;
 	unsigned long page_out_index = 0;
-	unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_CACHE_SIZE);
+	unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
 	unsigned long buf_start;
 	unsigned long buf_offset = 0;
 	unsigned long bytes;
@@ -289,7 +289,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
 	tot_in = LZO_LEN;
 	in_offset = LZO_LEN;
 	tot_len = min_t(size_t, srclen, tot_len);
-	in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
+	in_page_bytes_left = PAGE_SIZE - LZO_LEN;
 
 	tot_out = 0;
 	pg_offset = 0;
@@ -345,12 +345,12 @@ cont:
 
 				data_in = kmap(pages_in[++page_in_index]);
 
-				in_page_bytes_left = PAGE_CACHE_SIZE;
+				in_page_bytes_left = PAGE_SIZE;
 				in_offset = 0;
 			}
 		}
 
-		out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
+		out_len = lzo1x_worst_compress(PAGE_SIZE);
 		ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
 					    &out_len);
 		if (need_unmap)
@@ -399,7 +399,7 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
 	in_len = read_compress_length(data_in);
 	data_in += LZO_LEN;
 
-	out_len = PAGE_CACHE_SIZE;
+	out_len = PAGE_SIZE;
 	ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
 	if (ret != LZO_E_OK) {
 		printk(KERN_WARNING "BTRFS: decompress failed!\n");
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 55161369fab1..0b7792e02dd5 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -270,7 +270,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
 		s = kmap(rbio->bio_pages[i]);
 		d = kmap(rbio->stripe_pages[i]);
 
-		memcpy(d, s, PAGE_CACHE_SIZE);
+		memcpy(d, s, PAGE_SIZE);
 
 		kunmap(rbio->bio_pages[i]);
 		kunmap(rbio->stripe_pages[i]);
@@ -962,7 +962,7 @@ static struct page *page_in_rbio(struct btrfs_raid_bio *rbio,
  */
 static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
 {
-	return DIV_ROUND_UP(stripe_len, PAGE_CACHE_SIZE) * nr_stripes;
+	return DIV_ROUND_UP(stripe_len, PAGE_SIZE) * nr_stripes;
 }
 
 /*
@@ -1078,7 +1078,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
 	u64 disk_start;
 
 	stripe = &rbio->bbio->stripes[stripe_nr];
-	disk_start = stripe->physical + (page_index << PAGE_CACHE_SHIFT);
+	disk_start = stripe->physical + (page_index << PAGE_SHIFT);
 
 	/* if the device is missing, just fail this stripe */
 	if (!stripe->dev->bdev)
@@ -1096,8 +1096,8 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
 		if (last_end == disk_start && stripe->dev->bdev &&
 		    !last->bi_error &&
 		    last->bi_bdev == stripe->dev->bdev) {
-			ret = bio_add_page(last, page, PAGE_CACHE_SIZE, 0);
-			if (ret == PAGE_CACHE_SIZE)
+			ret = bio_add_page(last, page, PAGE_SIZE, 0);
+			if (ret == PAGE_SIZE)
 				return 0;
 		}
 	}
@@ -1111,7 +1111,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
 	bio->bi_bdev = stripe->dev->bdev;
 	bio->bi_iter.bi_sector = disk_start >> 9;
 
-	bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
+	bio_add_page(bio, page, PAGE_SIZE, 0);
 	bio_list_add(bio_list, bio);
 	return 0;
 }
@@ -1154,7 +1154,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
 	bio_list_for_each(bio, &rbio->bio_list) {
 		start = (u64)bio->bi_iter.bi_sector << 9;
 		stripe_offset = start - rbio->bbio->raid_map[0];
-		page_index = stripe_offset >> PAGE_CACHE_SHIFT;
+		page_index = stripe_offset >> PAGE_SHIFT;
 
 		for (i = 0; i < bio->bi_vcnt; i++) {
 			p = bio->bi_io_vec[i].bv_page;
@@ -1253,7 +1253,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
 		} else {
 			/* raid5 */
 			memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
-			run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE);
+			run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
 		}
 
 
@@ -1914,7 +1914,7 @@ pstripe:
 			/* Copy parity block into failed block to start with */
 			memcpy(pointers[faila],
 			       pointers[rbio->nr_data],
-			       PAGE_CACHE_SIZE);
+			       PAGE_SIZE);
 
 			/* rearrange the pointer array */
 			p = pointers[faila];
@@ -1923,7 +1923,7 @@ pstripe:
 			pointers[rbio->nr_data - 1] = p;
 
 			/* xor in the rest */
-			run_xor(pointers, rbio->nr_data - 1, PAGE_CACHE_SIZE);
+			run_xor(pointers, rbio->nr_data - 1, PAGE_SIZE);
 		}
 		/* if we're doing this rebuild as part of an rmw, go through
 		 * and set all of our private rbio pages in the
@@ -2250,7 +2250,7 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
 	ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
 				rbio->stripe_len * rbio->nr_data);
 	stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
-	index = stripe_offset >> PAGE_CACHE_SHIFT;
+	index = stripe_offset >> PAGE_SHIFT;
 	rbio->bio_pages[index] = page;
 }
 
@@ -2365,14 +2365,14 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
 		} else {
 			/* raid5 */
 			memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
-			run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE);
+			run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
 		}
 
 		/* Check scrubbing pairty and repair it */
 		p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
 		parity = kmap(p);
-		if (memcmp(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE))
-			memcpy(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE);
+		if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE))
+			memcpy(parity, pointers[rbio->scrubp], PAGE_SIZE);
 		else
 			/* Parity is right, needn't writeback */
 			bitmap_clear(rbio->dbitmap, pagenr, 1);
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index b892914968c1..298631eaee78 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -226,7 +226,7 @@ int btree_readahead_hook(struct btrfs_fs_info *fs_info,
 	/* find extent */
 	spin_lock(&fs_info->reada_lock);
 	re = radix_tree_lookup(&fs_info->reada_tree,
-			       start >> PAGE_CACHE_SHIFT);
+			       start >> PAGE_SHIFT);
 	if (re)
 		re->refcnt++;
 	spin_unlock(&fs_info->reada_lock);
@@ -257,7 +257,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
 	zone = NULL;
 	spin_lock(&fs_info->reada_lock);
 	ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
-				     logical >> PAGE_CACHE_SHIFT, 1);
+				     logical >> PAGE_SHIFT, 1);
 	if (ret == 1 && logical >= zone->start && logical <= zone->end) {
 		kref_get(&zone->refcnt);
 		spin_unlock(&fs_info->reada_lock);
@@ -294,13 +294,13 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
 
 	spin_lock(&fs_info->reada_lock);
 	ret = radix_tree_insert(&dev->reada_zones,
-				(unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
+				(unsigned long)(zone->end >> PAGE_SHIFT),
 				zone);
 
 	if (ret == -EEXIST) {
 		kfree(zone);
 		ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
-					     logical >> PAGE_CACHE_SHIFT, 1);
+					     logical >> PAGE_SHIFT, 1);
 		if (ret == 1 && logical >= zone->start && logical <= zone->end)
 			kref_get(&zone->refcnt);
 		else
@@ -326,7 +326,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
 	u64 length;
 	int real_stripes;
 	int nzones = 0;
-	unsigned long index = logical >> PAGE_CACHE_SHIFT;
+	unsigned long index = logical >> PAGE_SHIFT;
 	int dev_replace_is_ongoing;
 	int have_zone = 0;
 
@@ -495,7 +495,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
 			     struct reada_extent *re)
 {
 	int i;
-	unsigned long index = re->logical >> PAGE_CACHE_SHIFT;
+	unsigned long index = re->logical >> PAGE_SHIFT;
 
 	spin_lock(&fs_info->reada_lock);
 	if (--re->refcnt) {
@@ -538,7 +538,7 @@ static void reada_zone_release(struct kref *kref)
 	struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
 
 	radix_tree_delete(&zone->device->reada_zones,
-			  zone->end >> PAGE_CACHE_SHIFT);
+			  zone->end >> PAGE_SHIFT);
 
 	kfree(zone);
 }
@@ -587,7 +587,7 @@ static int reada_add_block(struct reada_control *rc, u64 logical,
 static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock)
 {
 	int i;
-	unsigned long index = zone->end >> PAGE_CACHE_SHIFT;
+	unsigned long index = zone->end >> PAGE_SHIFT;
 
 	for (i = 0; i < zone->ndevs; ++i) {
 		struct reada_zone *peer;
@@ -622,7 +622,7 @@ static int reada_pick_zone(struct btrfs_device *dev)
 					     (void **)&zone, index, 1);
 		if (ret == 0)
 			break;
-		index = (zone->end >> PAGE_CACHE_SHIFT) + 1;
+		index = (zone->end >> PAGE_SHIFT) + 1;
 		if (zone->locked) {
 			if (zone->elems > top_locked_elems) {
 				top_locked_elems = zone->elems;
@@ -673,7 +673,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
 	 * plugging to speed things up
 	 */
 	ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
-				     dev->reada_next >> PAGE_CACHE_SHIFT, 1);
+				     dev->reada_next >> PAGE_SHIFT, 1);
 	if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
 		ret = reada_pick_zone(dev);
 		if (!ret) {
@@ -682,7 +682,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
 		}
 		re = NULL;
 		ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
-					dev->reada_next >> PAGE_CACHE_SHIFT, 1);
+					dev->reada_next >> PAGE_SHIFT, 1);
 	}
 	if (ret == 0) {
 		spin_unlock(&fs_info->reada_lock);
@@ -838,7 +838,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
 				printk(KERN_CONT " curr off %llu",
 					device->reada_next - zone->start);
 			printk(KERN_CONT "\n");
-			index = (zone->end >> PAGE_CACHE_SHIFT) + 1;
+			index = (zone->end >> PAGE_SHIFT) + 1;
 		}
 		cnt = 0;
 		index = 0;
@@ -864,7 +864,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
 				}
 			}
 			printk(KERN_CONT "\n");
-			index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
+			index = (re->logical >> PAGE_SHIFT) + 1;
 			if (++cnt > 15)
 				break;
 		}
@@ -880,7 +880,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
 		if (ret == 0)
 			break;
 		if (!re->scheduled) {
-			index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
+			index = (re->logical >> PAGE_SHIFT) + 1;
 			continue;
 		}
 		printk(KERN_DEBUG
@@ -897,7 +897,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
 			}
 		}
 		printk(KERN_CONT "\n");
-		index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
+		index = (re->logical >> PAGE_SHIFT) + 1;
 	}
 	spin_unlock(&fs_info->reada_lock);
 }
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 2bd0011450df..3c93968b539d 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3129,10 +3129,10 @@ static int relocate_file_extent_cluster(struct inode *inode,
 	if (ret)
 		goto out;
 
-	index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
-	last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
+	index = (cluster->start - offset) >> PAGE_SHIFT;
+	last_index = (cluster->end - offset) >> PAGE_SHIFT;
 	while (index <= last_index) {
-		ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
+		ret = btrfs_delalloc_reserve_metadata(inode, PAGE_SIZE);
 		if (ret)
 			goto out;
 
@@ -3145,7 +3145,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
 						   mask);
 			if (!page) {
 				btrfs_delalloc_release_metadata(inode,
-							PAGE_CACHE_SIZE);
+							PAGE_SIZE);
 				ret = -ENOMEM;
 				goto out;
 			}
@@ -3162,16 +3162,16 @@ static int relocate_file_extent_cluster(struct inode *inode,
 			lock_page(page);
 			if (!PageUptodate(page)) {
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 				btrfs_delalloc_release_metadata(inode,
-							PAGE_CACHE_SIZE);
+							PAGE_SIZE);
 				ret = -EIO;
 				goto out;
 			}
 		}
 
 		page_start = page_offset(page);
-		page_end = page_start + PAGE_CACHE_SIZE - 1;
+		page_end = page_start + PAGE_SIZE - 1;
 
 		lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
 
@@ -3191,7 +3191,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
 		unlock_extent(&BTRFS_I(inode)->io_tree,
 			      page_start, page_end);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
 		index++;
 		balance_dirty_pages_ratelimited(inode->i_mapping);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 39dbdcbf4d13..4678f03e878e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -703,7 +703,7 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
-	index = offset >> PAGE_CACHE_SHIFT;
+	index = offset >> PAGE_SHIFT;
 
 	page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
 	if (!page) {
@@ -1636,7 +1636,7 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
 	if (spage->io_error) {
 		void *mapped_buffer = kmap_atomic(spage->page);
 
-		memset(mapped_buffer, 0, PAGE_CACHE_SIZE);
+		memset(mapped_buffer, 0, PAGE_SIZE);
 		flush_dcache_page(spage->page);
 		kunmap_atomic(mapped_buffer);
 	}
@@ -4294,8 +4294,8 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
 		goto out;
 	}
 
-	while (len >= PAGE_CACHE_SIZE) {
-		index = offset >> PAGE_CACHE_SHIFT;
+	while (len >= PAGE_SIZE) {
+		index = offset >> PAGE_SHIFT;
 again:
 		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
 		if (!page) {
@@ -4326,7 +4326,7 @@ again:
 			 */
 			if (page->mapping != inode->i_mapping) {
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 				goto again;
 			}
 			if (!PageUptodate(page)) {
@@ -4348,15 +4348,15 @@ again:
 			ret = err;
 next_page:
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
 		if (ret)
 			break;
 
-		offset += PAGE_CACHE_SIZE;
-		physical_for_dev_replace += PAGE_CACHE_SIZE;
-		nocow_ctx_logical += PAGE_CACHE_SIZE;
-		len -= PAGE_CACHE_SIZE;
+		offset += PAGE_SIZE;
+		physical_for_dev_replace += PAGE_SIZE;
+		nocow_ctx_logical += PAGE_SIZE;
+		len -= PAGE_SIZE;
 	}
 	ret = COPY_COMPLETE;
 out:
@@ -4390,8 +4390,8 @@ static int write_page_nocow(struct scrub_ctx *sctx,
 	bio->bi_iter.bi_size = 0;
 	bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
 	bio->bi_bdev = dev->bdev;
-	ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
-	if (ret != PAGE_CACHE_SIZE) {
+	ret = bio_add_page(bio, page, PAGE_SIZE, 0);
+	if (ret != PAGE_SIZE) {
 leave_with_eio:
 		bio_put(bio);
 		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 19b7bf4284ee..8d358c547c59 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4449,9 +4449,9 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
 	struct page *page;
 	char *addr;
 	struct btrfs_key key;
-	pgoff_t index = offset >> PAGE_CACHE_SHIFT;
+	pgoff_t index = offset >> PAGE_SHIFT;
 	pgoff_t last_index;
-	unsigned pg_offset = offset & ~PAGE_CACHE_MASK;
+	unsigned pg_offset = offset & ~PAGE_MASK;
 	ssize_t ret = 0;
 
 	key.objectid = sctx->cur_ino;
@@ -4471,7 +4471,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
 	if (len == 0)
 		goto out;
 
-	last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+	last_index = (offset + len - 1) >> PAGE_SHIFT;
 
 	/* initial readahead */
 	memset(&sctx->ra, 0, sizeof(struct file_ra_state));
@@ -4481,7 +4481,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
 
 	while (index <= last_index) {
 		unsigned cur_len = min_t(unsigned, len,
-					 PAGE_CACHE_SIZE - pg_offset);
+					 PAGE_SIZE - pg_offset);
 		page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
 		if (!page) {
 			ret = -ENOMEM;
@@ -4493,7 +4493,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
 			lock_page(page);
 			if (!PageUptodate(page)) {
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 				ret = -EIO;
 				break;
 			}
@@ -4503,7 +4503,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
 		memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
 		kunmap(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		index++;
 		pg_offset = 0;
 		len -= cur_len;
@@ -4804,7 +4804,7 @@ static int clone_range(struct send_ctx *sctx,
 		type = btrfs_file_extent_type(leaf, ei);
 		if (type == BTRFS_FILE_EXTENT_INLINE) {
 			ext_len = btrfs_file_extent_inline_len(leaf, slot, ei);
-			ext_len = PAGE_CACHE_ALIGN(ext_len);
+			ext_len = PAGE_ALIGN(ext_len);
 		} else {
 			ext_len = btrfs_file_extent_num_bytes(leaf, ei);
 		}
@@ -4886,7 +4886,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
 		 * but there may be items after this page.  Make
 		 * sure to send the whole thing
 		 */
-		len = PAGE_CACHE_ALIGN(len);
+		len = PAGE_ALIGN(len);
 	} else {
 		len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
 	}
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 669b58201e36..ac3a06d28531 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -32,8 +32,8 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
 {
 	int ret;
 	struct page *pages[16];
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
-	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+	unsigned long index = start >> PAGE_SHIFT;
+	unsigned long end_index = end >> PAGE_SHIFT;
 	unsigned long nr_pages = end_index - index + 1;
 	int i;
 	int count = 0;
@@ -49,9 +49,9 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
 				count++;
 			if (flags & PROCESS_UNLOCK && PageLocked(pages[i]))
 				unlock_page(pages[i]);
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 			if (flags & PROCESS_RELEASE)
-				page_cache_release(pages[i]);
+				put_page(pages[i]);
 		}
 		nr_pages -= ret;
 		index += ret;
@@ -93,7 +93,7 @@ static int test_find_delalloc(void)
 	 * everything to make sure our pages don't get evicted and screw up our
 	 * test.
 	 */
-	for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
+	for (index = 0; index < (total_dirty >> PAGE_SHIFT); index++) {
 		page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
 		if (!page) {
 			test_msg("Failed to allocate test page\n");
@@ -104,7 +104,7 @@ static int test_find_delalloc(void)
 		if (index) {
 			unlock_page(page);
 		} else {
-			page_cache_get(page);
+			get_page(page);
 			locked_page = page;
 		}
 	}
@@ -129,7 +129,7 @@ static int test_find_delalloc(void)
 	}
 	unlock_extent(&tmp, start, end);
 	unlock_page(locked_page);
-	page_cache_release(locked_page);
+	put_page(locked_page);
 
 	/*
 	 * Test this scenario
@@ -139,7 +139,7 @@ static int test_find_delalloc(void)
 	 */
 	test_start = SZ_64M;
 	locked_page = find_lock_page(inode->i_mapping,
-				     test_start >> PAGE_CACHE_SHIFT);
+				     test_start >> PAGE_SHIFT);
 	if (!locked_page) {
 		test_msg("Couldn't find the locked page\n");
 		goto out_bits;
@@ -165,7 +165,7 @@ static int test_find_delalloc(void)
 	}
 	unlock_extent(&tmp, start, end);
 	/* locked_page was unlocked above */
-	page_cache_release(locked_page);
+	put_page(locked_page);
 
 	/*
 	 * Test this scenario
@@ -174,7 +174,7 @@ static int test_find_delalloc(void)
 	 */
 	test_start = max_bytes + 4096;
 	locked_page = find_lock_page(inode->i_mapping, test_start >>
-				     PAGE_CACHE_SHIFT);
+				     PAGE_SHIFT);
 	if (!locked_page) {
 		test_msg("Could'nt find the locked page\n");
 		goto out_bits;
@@ -225,13 +225,13 @@ static int test_find_delalloc(void)
 	 * range we want to find.
 	 */
 	page = find_get_page(inode->i_mapping,
-			     (max_bytes + SZ_1M) >> PAGE_CACHE_SHIFT);
+			     (max_bytes + SZ_1M) >> PAGE_SHIFT);
 	if (!page) {
 		test_msg("Couldn't find our page\n");
 		goto out_bits;
 	}
 	ClearPageDirty(page);
-	page_cache_release(page);
+	put_page(page);
 
 	/* We unlocked it in the previous test */
 	lock_page(locked_page);
@@ -249,9 +249,9 @@ static int test_find_delalloc(void)
 		test_msg("Didn't find our range\n");
 		goto out_bits;
 	}
-	if (start != test_start && end != test_start + PAGE_CACHE_SIZE - 1) {
+	if (start != test_start && end != test_start + PAGE_SIZE - 1) {
 		test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
-			 test_start, test_start + PAGE_CACHE_SIZE - 1, start,
+			 test_start, test_start + PAGE_SIZE - 1, start,
 			 end);
 		goto out_bits;
 	}
@@ -265,7 +265,7 @@ out_bits:
 	clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
 out:
 	if (locked_page)
-		page_cache_release(locked_page);
+		put_page(locked_page);
 	process_page_range(inode, 0, total_dirty - 1,
 			   PROCESS_UNLOCK | PROCESS_RELEASE);
 	iput(inode);
@@ -298,9 +298,9 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 		return -EINVAL;
 	}
 
-	bitmap_set(bitmap, (PAGE_CACHE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+	bitmap_set(bitmap, (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
 		   sizeof(long) * BITS_PER_BYTE);
-	extent_buffer_bitmap_set(eb, PAGE_CACHE_SIZE - sizeof(long) / 2, 0,
+	extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
 				 sizeof(long) * BITS_PER_BYTE);
 	if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
 		test_msg("Setting straddling pages failed\n");
@@ -309,10 +309,10 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 
 	bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
 	bitmap_clear(bitmap,
-		     (PAGE_CACHE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+		     (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
 		     sizeof(long) * BITS_PER_BYTE);
 	extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
-	extent_buffer_bitmap_clear(eb, PAGE_CACHE_SIZE - sizeof(long) / 2, 0,
+	extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
 				   sizeof(long) * BITS_PER_BYTE);
 	if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
 		test_msg("Clearing straddling pages failed\n");
@@ -353,7 +353,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 
 static int test_eb_bitmaps(void)
 {
-	unsigned long len = PAGE_CACHE_SIZE * 4;
+	unsigned long len = PAGE_SIZE * 4;
 	unsigned long *bitmap;
 	struct extent_buffer *eb;
 	int ret;
@@ -379,7 +379,7 @@ static int test_eb_bitmaps(void)
 
 	/* Do it over again with an extent buffer which isn't page-aligned. */
 	free_extent_buffer(eb);
-	eb = __alloc_dummy_extent_buffer(NULL, PAGE_CACHE_SIZE / 2, len);
+	eb = __alloc_dummy_extent_buffer(NULL, PAGE_SIZE / 2, len);
 	if (!eb) {
 		test_msg("Couldn't allocate test extent buffer\n");
 		kfree(bitmap);
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index c9ad97b1e690..514247515312 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -22,7 +22,7 @@
 #include "../disk-io.h"
 #include "../free-space-cache.h"
 
-#define BITS_PER_BITMAP		(PAGE_CACHE_SIZE * 8)
+#define BITS_PER_BITMAP		(PAGE_SIZE * 8)
 
 /*
  * This test just does basic sanity checking, making sure we can add an exten
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e2b54d546b7c..bd0f45fb38c4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1025,16 +1025,16 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 	}
 
 	/* make sure our super fits in the device */
-	if (bytenr + PAGE_CACHE_SIZE >= i_size_read(bdev->bd_inode))
+	if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
 		goto error_bdev_put;
 
 	/* make sure our super fits in the page */
-	if (sizeof(*disk_super) > PAGE_CACHE_SIZE)
+	if (sizeof(*disk_super) > PAGE_SIZE)
 		goto error_bdev_put;
 
 	/* make sure our super doesn't straddle pages on disk */
-	index = bytenr >> PAGE_CACHE_SHIFT;
-	if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_CACHE_SHIFT != index)
+	index = bytenr >> PAGE_SHIFT;
+	if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
 		goto error_bdev_put;
 
 	/* pull in the page with our super */
@@ -1047,7 +1047,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 	p = kmap(page);
 
 	/* align our pointer to the offset of the super block */
-	disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
+	disk_super = p + (bytenr & ~PAGE_MASK);
 
 	if (btrfs_super_bytenr(disk_super) != bytenr ||
 	    btrfs_super_magic(disk_super) != BTRFS_MAGIC)
@@ -1075,7 +1075,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 
 error_unmap:
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 
 error_bdev_put:
 	blkdev_put(bdev, flags);
@@ -6527,7 +6527,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
 	 * but sb spans only this function. Add an explicit SetPageUptodate call
 	 * to silence the warning eg. on PowerPC 64.
 	 */
-	if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE)
+	if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE)
 		SetPageUptodate(sb->pages[0]);
 
 	write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 82990b8f872b..88d274e8ecf2 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -59,7 +59,7 @@ static struct list_head *zlib_alloc_workspace(void)
 	workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
 			zlib_inflate_workspacesize());
 	workspace->strm.workspace = vmalloc(workspacesize);
-	workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+	workspace->buf = kmalloc(PAGE_SIZE, GFP_NOFS);
 	if (!workspace->strm.workspace || !workspace->buf)
 		goto fail;
 
@@ -103,7 +103,7 @@ static int zlib_compress_pages(struct list_head *ws,
 	workspace->strm.total_in = 0;
 	workspace->strm.total_out = 0;
 
-	in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+	in_page = find_get_page(mapping, start >> PAGE_SHIFT);
 	data_in = kmap(in_page);
 
 	out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
@@ -117,8 +117,8 @@ static int zlib_compress_pages(struct list_head *ws,
 
 	workspace->strm.next_in = data_in;
 	workspace->strm.next_out = cpage_out;
-	workspace->strm.avail_out = PAGE_CACHE_SIZE;
-	workspace->strm.avail_in = min(len, PAGE_CACHE_SIZE);
+	workspace->strm.avail_out = PAGE_SIZE;
+	workspace->strm.avail_in = min(len, PAGE_SIZE);
 
 	while (workspace->strm.total_in < len) {
 		ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
@@ -156,7 +156,7 @@ static int zlib_compress_pages(struct list_head *ws,
 			cpage_out = kmap(out_page);
 			pages[nr_pages] = out_page;
 			nr_pages++;
-			workspace->strm.avail_out = PAGE_CACHE_SIZE;
+			workspace->strm.avail_out = PAGE_SIZE;
 			workspace->strm.next_out = cpage_out;
 		}
 		/* we're all done */
@@ -170,14 +170,14 @@ static int zlib_compress_pages(struct list_head *ws,
 
 			bytes_left = len - workspace->strm.total_in;
 			kunmap(in_page);
-			page_cache_release(in_page);
+			put_page(in_page);
 
-			start += PAGE_CACHE_SIZE;
+			start += PAGE_SIZE;
 			in_page = find_get_page(mapping,
-						start >> PAGE_CACHE_SHIFT);
+						start >> PAGE_SHIFT);
 			data_in = kmap(in_page);
 			workspace->strm.avail_in = min(bytes_left,
-							   PAGE_CACHE_SIZE);
+							   PAGE_SIZE);
 			workspace->strm.next_in = data_in;
 		}
 	}
@@ -205,7 +205,7 @@ out:
 
 	if (in_page) {
 		kunmap(in_page);
-		page_cache_release(in_page);
+		put_page(in_page);
 	}
 	return ret;
 }
@@ -223,18 +223,18 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
 	size_t total_out = 0;
 	unsigned long page_in_index = 0;
 	unsigned long page_out_index = 0;
-	unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_CACHE_SIZE);
+	unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
 	unsigned long buf_start;
 	unsigned long pg_offset;
 
 	data_in = kmap(pages_in[page_in_index]);
 	workspace->strm.next_in = data_in;
-	workspace->strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
+	workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE);
 	workspace->strm.total_in = 0;
 
 	workspace->strm.total_out = 0;
 	workspace->strm.next_out = workspace->buf;
-	workspace->strm.avail_out = PAGE_CACHE_SIZE;
+	workspace->strm.avail_out = PAGE_SIZE;
 	pg_offset = 0;
 
 	/* If it's deflate, and it's got no preset dictionary, then
@@ -274,7 +274,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
 		}
 
 		workspace->strm.next_out = workspace->buf;
-		workspace->strm.avail_out = PAGE_CACHE_SIZE;
+		workspace->strm.avail_out = PAGE_SIZE;
 
 		if (workspace->strm.avail_in == 0) {
 			unsigned long tmp;
@@ -288,7 +288,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
 			workspace->strm.next_in = data_in;
 			tmp = srclen - workspace->strm.total_in;
 			workspace->strm.avail_in = min(tmp,
-							   PAGE_CACHE_SIZE);
+							   PAGE_SIZE);
 		}
 	}
 	if (ret != Z_STREAM_END)
@@ -325,7 +325,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
 	workspace->strm.total_in = 0;
 
 	workspace->strm.next_out = workspace->buf;
-	workspace->strm.avail_out = PAGE_CACHE_SIZE;
+	workspace->strm.avail_out = PAGE_SIZE;
 	workspace->strm.total_out = 0;
 	/* If it's deflate, and it's got no preset dictionary, then
 	   we can tell zlib to skip the adler32 check. */
@@ -368,8 +368,8 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
 		else
 			buf_offset = 0;
 
-		bytes = min(PAGE_CACHE_SIZE - pg_offset,
-			    PAGE_CACHE_SIZE - buf_offset);
+		bytes = min(PAGE_SIZE - pg_offset,
+			    PAGE_SIZE - buf_offset);
 		bytes = min(bytes, bytes_left);
 
 		kaddr = kmap_atomic(dest_page);
@@ -380,7 +380,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
 		bytes_left -= bytes;
 next:
 		workspace->strm.next_out = workspace->buf;
-		workspace->strm.avail_out = PAGE_CACHE_SIZE;
+		workspace->strm.avail_out = PAGE_SIZE;
 	}
 
 	if (ret != Z_STREAM_END && bytes_left != 0)
diff --git a/fs/buffer.c b/fs/buffer.c
index 33be29675358..af0d9a82a8ed 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -129,7 +129,7 @@ __clear_page_buffers(struct page *page)
 {
 	ClearPagePrivate(page);
 	set_page_private(page, 0);
-	page_cache_release(page);
+	put_page(page);
 }
 
 static void buffer_io_error(struct buffer_head *bh, char *msg)
@@ -207,7 +207,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
 	struct page *page;
 	int all_mapped = 1;
 
-	index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
+	index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
 	page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
 	if (!page)
 		goto out;
@@ -245,7 +245,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
 	}
 out_unlock:
 	spin_unlock(&bd_mapping->private_lock);
-	page_cache_release(page);
+	put_page(page);
 out:
 	return ret;
 }
@@ -1040,7 +1040,7 @@ done:
 	ret = (block < end_block) ? 1 : -ENXIO;
 failed:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return ret;
 }
 
@@ -1533,7 +1533,7 @@ void block_invalidatepage(struct page *page, unsigned int offset,
 	/*
 	 * Check for overflow
 	 */
-	BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
+	BUG_ON(stop > PAGE_SIZE || stop < length);
 
 	head = page_buffers(page);
 	bh = head;
@@ -1716,7 +1716,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	blocksize = bh->b_size;
 	bbits = block_size_bits(blocksize);
 
-	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+	block = (sector_t)page->index << (PAGE_SHIFT - bbits);
 	last_block = (i_size_read(inode) - 1) >> bbits;
 
 	/*
@@ -1894,7 +1894,7 @@ EXPORT_SYMBOL(page_zero_new_buffers);
 int __block_write_begin(struct page *page, loff_t pos, unsigned len,
 		get_block_t *get_block)
 {
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from = pos & (PAGE_SIZE - 1);
 	unsigned to = from + len;
 	struct inode *inode = page->mapping->host;
 	unsigned block_start, block_end;
@@ -1904,15 +1904,15 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
 	struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
 
 	BUG_ON(!PageLocked(page));
-	BUG_ON(from > PAGE_CACHE_SIZE);
-	BUG_ON(to > PAGE_CACHE_SIZE);
+	BUG_ON(from > PAGE_SIZE);
+	BUG_ON(to > PAGE_SIZE);
 	BUG_ON(from > to);
 
 	head = create_page_buffers(page, inode, 0);
 	blocksize = head->b_size;
 	bbits = block_size_bits(blocksize);
 
-	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+	block = (sector_t)page->index << (PAGE_SHIFT - bbits);
 
 	for(bh = head, block_start = 0; bh != head || !block_start;
 	    block++, block_start=block_end, bh = bh->b_this_page) {
@@ -2020,7 +2020,7 @@ static int __block_commit_write(struct inode *inode, struct page *page,
 int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
 		unsigned flags, struct page **pagep, get_block_t *get_block)
 {
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	struct page *page;
 	int status;
 
@@ -2031,7 +2031,7 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
 	status = __block_write_begin(page, pos, len, get_block);
 	if (unlikely(status)) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 	}
 
@@ -2047,7 +2047,7 @@ int block_write_end(struct file *file, struct address_space *mapping,
 	struct inode *inode = mapping->host;
 	unsigned start;
 
-	start = pos & (PAGE_CACHE_SIZE - 1);
+	start = pos & (PAGE_SIZE - 1);
 
 	if (unlikely(copied < len)) {
 		/*
@@ -2099,7 +2099,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
 	}
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (old_size < pos)
 		pagecache_isize_extended(inode, old_size, pos);
@@ -2136,9 +2136,9 @@ int block_is_partially_uptodate(struct page *page, unsigned long from,
 
 	head = page_buffers(page);
 	blocksize = head->b_size;
-	to = min_t(unsigned, PAGE_CACHE_SIZE - from, count);
+	to = min_t(unsigned, PAGE_SIZE - from, count);
 	to = from + to;
-	if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
+	if (from < blocksize && to > PAGE_SIZE - blocksize)
 		return 0;
 
 	bh = head;
@@ -2181,7 +2181,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 	blocksize = head->b_size;
 	bbits = block_size_bits(blocksize);
 
-	iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+	iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
 	lblock = (i_size_read(inode)+blocksize-1) >> bbits;
 	bh = head;
 	nr = 0;
@@ -2295,16 +2295,16 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
 	unsigned zerofrom, offset, len;
 	int err = 0;
 
-	index = pos >> PAGE_CACHE_SHIFT;
-	offset = pos & ~PAGE_CACHE_MASK;
+	index = pos >> PAGE_SHIFT;
+	offset = pos & ~PAGE_MASK;
 
-	while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
-		zerofrom = curpos & ~PAGE_CACHE_MASK;
+	while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
+		zerofrom = curpos & ~PAGE_MASK;
 		if (zerofrom & (blocksize-1)) {
 			*bytes |= (blocksize-1);
 			(*bytes)++;
 		}
-		len = PAGE_CACHE_SIZE - zerofrom;
+		len = PAGE_SIZE - zerofrom;
 
 		err = pagecache_write_begin(file, mapping, curpos, len,
 						AOP_FLAG_UNINTERRUPTIBLE,
@@ -2329,7 +2329,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
 
 	/* page covers the boundary, find the boundary offset */
 	if (index == curidx) {
-		zerofrom = curpos & ~PAGE_CACHE_MASK;
+		zerofrom = curpos & ~PAGE_MASK;
 		/* if we will expand the thing last block will be filled */
 		if (offset <= zerofrom) {
 			goto out;
@@ -2375,7 +2375,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
 	if (err)
 		return err;
 
-	zerofrom = *bytes & ~PAGE_CACHE_MASK;
+	zerofrom = *bytes & ~PAGE_MASK;
 	if (pos+len > *bytes && zerofrom & (blocksize-1)) {
 		*bytes |= (blocksize-1);
 		(*bytes)++;
@@ -2430,10 +2430,10 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 	}
 
 	/* page is wholly or partially inside EOF */
-	if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
-		end = size & ~PAGE_CACHE_MASK;
+	if (((page->index + 1) << PAGE_SHIFT) > size)
+		end = size & ~PAGE_MASK;
 	else
-		end = PAGE_CACHE_SIZE;
+		end = PAGE_SIZE;
 
 	ret = __block_write_begin(page, 0, end, get_block);
 	if (!ret)
@@ -2508,8 +2508,8 @@ int nobh_write_begin(struct address_space *mapping,
 	int ret = 0;
 	int is_mapped_to_disk = 1;
 
-	index = pos >> PAGE_CACHE_SHIFT;
-	from = pos & (PAGE_CACHE_SIZE - 1);
+	index = pos >> PAGE_SHIFT;
+	from = pos & (PAGE_SIZE - 1);
 	to = from + len;
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
@@ -2543,7 +2543,7 @@ int nobh_write_begin(struct address_space *mapping,
 		goto out_release;
 	}
 
-	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+	block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
 
 	/*
 	 * We loop across all blocks in the page, whether or not they are
@@ -2551,7 +2551,7 @@ int nobh_write_begin(struct address_space *mapping,
 	 * page is fully mapped-to-disk.
 	 */
 	for (block_start = 0, block_in_page = 0, bh = head;
-		  block_start < PAGE_CACHE_SIZE;
+		  block_start < PAGE_SIZE;
 		  block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
 		int create;
 
@@ -2623,7 +2623,7 @@ failed:
 
 out_release:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	*pagep = NULL;
 
 	return ret;
@@ -2653,7 +2653,7 @@ int nobh_write_end(struct file *file, struct address_space *mapping,
 	}
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	while (head) {
 		bh = head;
@@ -2675,7 +2675,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
 {
 	struct inode * const inode = page->mapping->host;
 	loff_t i_size = i_size_read(inode);
-	const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	const pgoff_t end_index = i_size >> PAGE_SHIFT;
 	unsigned offset;
 	int ret;
 
@@ -2684,7 +2684,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
 		goto out;
 
 	/* Is the page fully outside i_size? (truncate in progress) */
-	offset = i_size & (PAGE_CACHE_SIZE-1);
+	offset = i_size & (PAGE_SIZE-1);
 	if (page->index >= end_index+1 || !offset) {
 		/*
 		 * The page may have dirty, unmapped buffers.  For example,
@@ -2707,7 +2707,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
 	 * the  page size, the remaining memory is zeroed when mapped, and
 	 * writes to that region are not written out to the file."
 	 */
-	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+	zero_user_segment(page, offset, PAGE_SIZE);
 out:
 	ret = mpage_writepage(page, get_block, wbc);
 	if (ret == -EAGAIN)
@@ -2720,8 +2720,8 @@ EXPORT_SYMBOL(nobh_writepage);
 int nobh_truncate_page(struct address_space *mapping,
 			loff_t from, get_block_t *get_block)
 {
-	pgoff_t index = from >> PAGE_CACHE_SHIFT;
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	pgoff_t index = from >> PAGE_SHIFT;
+	unsigned offset = from & (PAGE_SIZE-1);
 	unsigned blocksize;
 	sector_t iblock;
 	unsigned length, pos;
@@ -2738,7 +2738,7 @@ int nobh_truncate_page(struct address_space *mapping,
 		return 0;
 
 	length = blocksize - length;
-	iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
 
 	page = grab_cache_page(mapping, index);
 	err = -ENOMEM;
@@ -2748,7 +2748,7 @@ int nobh_truncate_page(struct address_space *mapping,
 	if (page_has_buffers(page)) {
 has_buffers:
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		return block_truncate_page(mapping, from, get_block);
 	}
 
@@ -2772,7 +2772,7 @@ has_buffers:
 	if (!PageUptodate(page)) {
 		err = mapping->a_ops->readpage(NULL, page);
 		if (err) {
-			page_cache_release(page);
+			put_page(page);
 			goto out;
 		}
 		lock_page(page);
@@ -2789,7 +2789,7 @@ has_buffers:
 
 unlock:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 out:
 	return err;
 }
@@ -2798,8 +2798,8 @@ EXPORT_SYMBOL(nobh_truncate_page);
 int block_truncate_page(struct address_space *mapping,
 			loff_t from, get_block_t *get_block)
 {
-	pgoff_t index = from >> PAGE_CACHE_SHIFT;
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	pgoff_t index = from >> PAGE_SHIFT;
+	unsigned offset = from & (PAGE_SIZE-1);
 	unsigned blocksize;
 	sector_t iblock;
 	unsigned length, pos;
@@ -2816,7 +2816,7 @@ int block_truncate_page(struct address_space *mapping,
 		return 0;
 
 	length = blocksize - length;
-	iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
 	
 	page = grab_cache_page(mapping, index);
 	err = -ENOMEM;
@@ -2865,7 +2865,7 @@ int block_truncate_page(struct address_space *mapping,
 
 unlock:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 out:
 	return err;
 }
@@ -2879,7 +2879,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
 {
 	struct inode * const inode = page->mapping->host;
 	loff_t i_size = i_size_read(inode);
-	const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	const pgoff_t end_index = i_size >> PAGE_SHIFT;
 	unsigned offset;
 
 	/* Is the page fully inside i_size? */
@@ -2888,14 +2888,14 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
 					       end_buffer_async_write);
 
 	/* Is the page fully outside i_size? (truncate in progress) */
-	offset = i_size & (PAGE_CACHE_SIZE-1);
+	offset = i_size & (PAGE_SIZE-1);
 	if (page->index >= end_index+1 || !offset) {
 		/*
 		 * The page may have dirty, unmapped buffers.  For example,
 		 * they may have been added in ext3_writepage().  Make them
 		 * freeable here, so the page does not leak.
 		 */
-		do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		do_invalidatepage(page, 0, PAGE_SIZE);
 		unlock_page(page);
 		return 0; /* don't care */
 	}
@@ -2907,7 +2907,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
 	 * the  page size, the remaining memory is zeroed when mapped, and
 	 * writes to that region are not written out to the file."
 	 */
-	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+	zero_user_segment(page, offset, PAGE_SIZE);
 	return __block_write_full_page(inode, page, get_block, wbc,
 							end_buffer_async_write);
 }
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index c0f3da3926a0..afbdc418966d 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -194,10 +194,10 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
 			error = -EIO;
 		}
 
-		page_cache_release(monitor->back_page);
+		put_page(monitor->back_page);
 
 		fscache_end_io(op, monitor->netfs_page, error);
-		page_cache_release(monitor->netfs_page);
+		put_page(monitor->netfs_page);
 		fscache_retrieval_complete(op, 1);
 		fscache_put_retrieval(op);
 		kfree(monitor);
@@ -288,8 +288,8 @@ monitor_backing_page:
 	_debug("- monitor add");
 
 	/* install the monitor */
-	page_cache_get(monitor->netfs_page);
-	page_cache_get(backpage);
+	get_page(monitor->netfs_page);
+	get_page(backpage);
 	monitor->back_page = backpage;
 	monitor->monitor.private = backpage;
 	add_page_wait_queue(backpage, &monitor->monitor);
@@ -310,7 +310,7 @@ backing_page_already_present:
 	_debug("- present");
 
 	if (newpage) {
-		page_cache_release(newpage);
+		put_page(newpage);
 		newpage = NULL;
 	}
 
@@ -342,7 +342,7 @@ success:
 
 out:
 	if (backpage)
-		page_cache_release(backpage);
+		put_page(backpage);
 	if (monitor) {
 		fscache_put_retrieval(monitor->op);
 		kfree(monitor);
@@ -363,7 +363,7 @@ io_error:
 	goto out;
 
 nomem_page:
-	page_cache_release(newpage);
+	put_page(newpage);
 nomem_monitor:
 	fscache_put_retrieval(monitor->op);
 	kfree(monitor);
@@ -530,7 +530,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
 					    netpage->index, cachefiles_gfp);
 		if (ret < 0) {
 			if (ret == -EEXIST) {
-				page_cache_release(netpage);
+				put_page(netpage);
 				fscache_retrieval_complete(op, 1);
 				continue;
 			}
@@ -538,10 +538,10 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
 		}
 
 		/* install a monitor */
-		page_cache_get(netpage);
+		get_page(netpage);
 		monitor->netfs_page = netpage;
 
-		page_cache_get(backpage);
+		get_page(backpage);
 		monitor->back_page = backpage;
 		monitor->monitor.private = backpage;
 		add_page_wait_queue(backpage, &monitor->monitor);
@@ -555,10 +555,10 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
 			unlock_page(backpage);
 		}
 
-		page_cache_release(backpage);
+		put_page(backpage);
 		backpage = NULL;
 
-		page_cache_release(netpage);
+		put_page(netpage);
 		netpage = NULL;
 		continue;
 
@@ -603,7 +603,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
 					    netpage->index, cachefiles_gfp);
 		if (ret < 0) {
 			if (ret == -EEXIST) {
-				page_cache_release(netpage);
+				put_page(netpage);
 				fscache_retrieval_complete(op, 1);
 				continue;
 			}
@@ -612,14 +612,14 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
 
 		copy_highpage(netpage, backpage);
 
-		page_cache_release(backpage);
+		put_page(backpage);
 		backpage = NULL;
 
 		fscache_mark_page_cached(op, netpage);
 
 		/* the netpage is unlocked and marked up to date here */
 		fscache_end_io(op, netpage, 0);
-		page_cache_release(netpage);
+		put_page(netpage);
 		netpage = NULL;
 		fscache_retrieval_complete(op, 1);
 		continue;
@@ -632,11 +632,11 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
 out:
 	/* tidy up */
 	if (newpage)
-		page_cache_release(newpage);
+		put_page(newpage);
 	if (netpage)
-		page_cache_release(netpage);
+		put_page(netpage);
 	if (backpage)
-		page_cache_release(backpage);
+		put_page(backpage);
 	if (monitor) {
 		fscache_put_retrieval(op);
 		kfree(monitor);
@@ -644,7 +644,7 @@ out:
 
 	list_for_each_entry_safe(netpage, _n, list, lru) {
 		list_del(&netpage->lru);
-		page_cache_release(netpage);
+		put_page(netpage);
 		fscache_retrieval_complete(op, 1);
 	}
 
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index fc5cae2a0db2..4801571f51cb 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -143,7 +143,7 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
 	inode = page->mapping->host;
 	ci = ceph_inode(inode);
 
-	if (offset != 0 || length != PAGE_CACHE_SIZE) {
+	if (offset != 0 || length != PAGE_SIZE) {
 		dout("%p invalidatepage %p idx %lu partial dirty page %u~%u\n",
 		     inode, page, page->index, offset, length);
 		return;
@@ -197,10 +197,10 @@ static int readpage_nounlock(struct file *filp, struct page *page)
 		&ceph_inode_to_client(inode)->client->osdc;
 	int err = 0;
 	u64 off = page_offset(page);
-	u64 len = PAGE_CACHE_SIZE;
+	u64 len = PAGE_SIZE;
 
 	if (off >= i_size_read(inode)) {
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
 		return 0;
 	}
@@ -212,7 +212,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
 		 */
 		if (off == 0)
 			return -EINVAL;
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
 		return 0;
 	}
@@ -234,9 +234,9 @@ static int readpage_nounlock(struct file *filp, struct page *page)
 		ceph_fscache_readpage_cancel(inode, page);
 		goto out;
 	}
-	if (err < PAGE_CACHE_SIZE)
+	if (err < PAGE_SIZE)
 		/* zero fill remainder of page */
-		zero_user_segment(page, err, PAGE_CACHE_SIZE);
+		zero_user_segment(page, err, PAGE_SIZE);
 	else
 		flush_dcache_page(page);
 
@@ -278,10 +278,10 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
 
 		if (rc < 0 && rc != -ENOENT)
 			goto unlock;
-		if (bytes < (int)PAGE_CACHE_SIZE) {
+		if (bytes < (int)PAGE_SIZE) {
 			/* zero (remainder of) page */
 			int s = bytes < 0 ? 0 : bytes;
-			zero_user_segment(page, s, PAGE_CACHE_SIZE);
+			zero_user_segment(page, s, PAGE_SIZE);
 		}
  		dout("finish_read %p uptodate %p idx %lu\n", inode, page,
 		     page->index);
@@ -290,8 +290,8 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
 		ceph_readpage_to_fscache(inode, page);
 unlock:
 		unlock_page(page);
-		page_cache_release(page);
-		bytes -= PAGE_CACHE_SIZE;
+		put_page(page);
+		bytes -= PAGE_SIZE;
 	}
 	kfree(osd_data->pages);
 }
@@ -336,7 +336,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
 		if (max && nr_pages == max)
 			break;
 	}
-	len = nr_pages << PAGE_CACHE_SHIFT;
+	len = nr_pages << PAGE_SHIFT;
 	dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
 	     off, len);
 	vino = ceph_vino(inode);
@@ -364,7 +364,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
 		if (add_to_page_cache_lru(page, &inode->i_data, page->index,
 					  GFP_KERNEL)) {
 			ceph_fscache_uncache_page(inode, page);
-			page_cache_release(page);
+			put_page(page);
 			dout("start_read %p add_to_page_cache failed %p\n",
 			     inode, page);
 			nr_pages = i;
@@ -415,8 +415,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 	if (rc == 0)
 		goto out;
 
-	if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
-		max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
+	if (fsc->mount_options->rsize >= PAGE_SIZE)
+		max = (fsc->mount_options->rsize + PAGE_SIZE - 1)
 			>> PAGE_SHIFT;
 
 	dout("readpages %p file %p nr_pages %d max %d\n", inode,
@@ -484,7 +484,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	long writeback_stat;
 	u64 truncate_size;
 	u32 truncate_seq;
-	int err = 0, len = PAGE_CACHE_SIZE;
+	int err = 0, len = PAGE_SIZE;
 
 	dout("writepage %p idx %lu\n", page, page->index);
 
@@ -725,9 +725,9 @@ static int ceph_writepages_start(struct address_space *mapping,
 	}
 	if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
 		wsize = fsc->mount_options->wsize;
-	if (wsize < PAGE_CACHE_SIZE)
-		wsize = PAGE_CACHE_SIZE;
-	max_pages_ever = wsize >> PAGE_CACHE_SHIFT;
+	if (wsize < PAGE_SIZE)
+		wsize = PAGE_SIZE;
+	max_pages_ever = wsize >> PAGE_SHIFT;
 
 	pagevec_init(&pvec, 0);
 
@@ -737,8 +737,8 @@ static int ceph_writepages_start(struct address_space *mapping,
 		end = -1;
 		dout(" cyclic, start at %lu\n", start);
 	} else {
-		start = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		start = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 			range_whole = 1;
 		should_loop = 0;
@@ -887,7 +887,7 @@ get_more_pages:
 
 				num_ops = 1 + do_sync;
 				strip_unit_end = page->index +
-					((len - 1) >> PAGE_CACHE_SHIFT);
+					((len - 1) >> PAGE_SHIFT);
 
 				BUG_ON(pages);
 				max_pages = calc_pages_for(0, (u64)len);
@@ -901,7 +901,7 @@ get_more_pages:
 
 				len = 0;
 			} else if (page->index !=
-				   (offset + len) >> PAGE_CACHE_SHIFT) {
+				   (offset + len) >> PAGE_SHIFT) {
 				if (num_ops >= (pool ?  CEPH_OSD_SLAB_OPS :
 							CEPH_OSD_MAX_OPS)) {
 					redirty_page_for_writepage(wbc, page);
@@ -929,7 +929,7 @@ get_more_pages:
 
 			pages[locked_pages] = page;
 			locked_pages++;
-			len += PAGE_CACHE_SIZE;
+			len += PAGE_SIZE;
 		}
 
 		/* did we get anything? */
@@ -981,7 +981,7 @@ new_request:
 			BUG_ON(IS_ERR(req));
 		}
 		BUG_ON(len < page_offset(pages[locked_pages - 1]) +
-			     PAGE_CACHE_SIZE - offset);
+			     PAGE_SIZE - offset);
 
 		req->r_callback = writepages_finish;
 		req->r_inode = inode;
@@ -1011,7 +1011,7 @@ new_request:
 			}
 
 			set_page_writeback(pages[i]);
-			len += PAGE_CACHE_SIZE;
+			len += PAGE_SIZE;
 		}
 
 		if (snap_size != -1) {
@@ -1020,7 +1020,7 @@ new_request:
 			/* writepages_finish() clears writeback pages
 			 * according to the data length, so make sure
 			 * data length covers all locked pages */
-			u64 min_len = len + 1 - PAGE_CACHE_SIZE;
+			u64 min_len = len + 1 - PAGE_SIZE;
 			len = min(len, (u64)i_size_read(inode) - offset);
 			len = max(len, min_len);
 		}
@@ -1135,8 +1135,8 @@ static int ceph_update_writeable_page(struct file *file,
 {
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	loff_t page_off = pos & PAGE_CACHE_MASK;
-	int pos_in_page = pos & ~PAGE_CACHE_MASK;
+	loff_t page_off = pos & PAGE_MASK;
+	int pos_in_page = pos & ~PAGE_MASK;
 	int end_in_page = pos_in_page + len;
 	loff_t i_size;
 	int r;
@@ -1191,7 +1191,7 @@ retry_locked:
 	}
 
 	/* full page? */
-	if (pos_in_page == 0 && len == PAGE_CACHE_SIZE)
+	if (pos_in_page == 0 && len == PAGE_SIZE)
 		return 0;
 
 	/* past end of file? */
@@ -1199,12 +1199,12 @@ retry_locked:
 
 	if (page_off >= i_size ||
 	    (pos_in_page == 0 && (pos+len) >= i_size &&
-	     end_in_page - pos_in_page != PAGE_CACHE_SIZE)) {
+	     end_in_page - pos_in_page != PAGE_SIZE)) {
 		dout(" zeroing %p 0 - %d and %d - %d\n",
-		     page, pos_in_page, end_in_page, (int)PAGE_CACHE_SIZE);
+		     page, pos_in_page, end_in_page, (int)PAGE_SIZE);
 		zero_user_segments(page,
 				   0, pos_in_page,
-				   end_in_page, PAGE_CACHE_SIZE);
+				   end_in_page, PAGE_SIZE);
 		return 0;
 	}
 
@@ -1228,7 +1228,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
 {
 	struct inode *inode = file_inode(file);
 	struct page *page;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	int r;
 
 	do {
@@ -1242,7 +1242,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
 
 		r = ceph_update_writeable_page(file, pos, len, page);
 		if (r < 0)
-			page_cache_release(page);
+			put_page(page);
 		else
 			*pagep = page;
 	} while (r == -EAGAIN);
@@ -1259,7 +1259,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
 			  struct page *page, void *fsdata)
 {
 	struct inode *inode = file_inode(file);
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from = pos & (PAGE_SIZE - 1);
 	int check_cap = 0;
 
 	dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
@@ -1279,7 +1279,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
 	set_page_dirty(page);
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (check_cap)
 		ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
@@ -1322,11 +1322,11 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_file_info *fi = vma->vm_file->private_data;
 	struct page *pinned_page = NULL;
-	loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
+	loff_t off = vmf->pgoff << PAGE_SHIFT;
 	int want, got, ret;
 
 	dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
-	     inode, ceph_vinop(inode), off, (size_t)PAGE_CACHE_SIZE);
+	     inode, ceph_vinop(inode), off, (size_t)PAGE_SIZE);
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
 		want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
 	else
@@ -1343,7 +1343,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		}
 	}
 	dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
-	     inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got));
+	     inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got));
 
 	if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
 	    ci->i_inline_version == CEPH_INLINE_NONE)
@@ -1352,16 +1352,16 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		ret = -EAGAIN;
 
 	dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
-	     inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
+	     inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got), ret);
 	if (pinned_page)
-		page_cache_release(pinned_page);
+		put_page(pinned_page);
 	ceph_put_cap_refs(ci, got);
 
 	if (ret != -EAGAIN)
 		return ret;
 
 	/* read inline data */
-	if (off >= PAGE_CACHE_SIZE) {
+	if (off >= PAGE_SIZE) {
 		/* does not support inline data > PAGE_SIZE */
 		ret = VM_FAULT_SIGBUS;
 	} else {
@@ -1378,12 +1378,12 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 					 CEPH_STAT_CAP_INLINE_DATA, true);
 		if (ret1 < 0 || off >= i_size_read(inode)) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			ret = VM_FAULT_SIGBUS;
 			goto out;
 		}
-		if (ret1 < PAGE_CACHE_SIZE)
-			zero_user_segment(page, ret1, PAGE_CACHE_SIZE);
+		if (ret1 < PAGE_SIZE)
+			zero_user_segment(page, ret1, PAGE_SIZE);
 		else
 			flush_dcache_page(page);
 		SetPageUptodate(page);
@@ -1392,7 +1392,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 out:
 	dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
-	     inode, off, (size_t)PAGE_CACHE_SIZE, ret);
+	     inode, off, (size_t)PAGE_SIZE, ret);
 	return ret;
 }
 
@@ -1430,10 +1430,10 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 		}
 	}
 
-	if (off + PAGE_CACHE_SIZE <= size)
-		len = PAGE_CACHE_SIZE;
+	if (off + PAGE_SIZE <= size)
+		len = PAGE_SIZE;
 	else
-		len = size & ~PAGE_CACHE_MASK;
+		len = size & ~PAGE_MASK;
 
 	dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n",
 	     inode, ceph_vinop(inode), off, len, size);
@@ -1519,7 +1519,7 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
 			return;
 		if (PageUptodate(page)) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			return;
 		}
 	}
@@ -1534,14 +1534,14 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
 	}
 
 	if (page != locked_page) {
-		if (len < PAGE_CACHE_SIZE)
-			zero_user_segment(page, len, PAGE_CACHE_SIZE);
+		if (len < PAGE_SIZE)
+			zero_user_segment(page, len, PAGE_SIZE);
 		else
 			flush_dcache_page(page);
 
 		SetPageUptodate(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 }
 
@@ -1578,7 +1578,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 				from_pagecache = true;
 				lock_page(page);
 			} else {
-				page_cache_release(page);
+				put_page(page);
 				page = NULL;
 			}
 		}
@@ -1586,8 +1586,8 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 
 	if (page) {
 		len = i_size_read(inode);
-		if (len > PAGE_CACHE_SIZE)
-			len = PAGE_CACHE_SIZE;
+		if (len > PAGE_SIZE)
+			len = PAGE_SIZE;
 	} else {
 		page = __page_cache_alloc(GFP_NOFS);
 		if (!page) {
@@ -1670,7 +1670,7 @@ out:
 	if (page && page != locked_page) {
 		if (from_pagecache) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 		} else
 			__free_pages(page, 0);
 	}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index de17bb232ff8..cfaeef18cbca 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2510,7 +2510,7 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
 					*pinned_page = page;
 					break;
 				}
-				page_cache_release(page);
+				put_page(page);
 			}
 			/*
 			 * drop cap refs first because getattr while
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index fadc243dfb28..4fb2bbc2a272 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -129,7 +129,7 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 	struct inode *dir = d_inode(parent);
 	struct dentry *dentry, *last = NULL;
 	struct ceph_dentry_info *di;
-	unsigned nsize = PAGE_CACHE_SIZE / sizeof(struct dentry *);
+	unsigned nsize = PAGE_SIZE / sizeof(struct dentry *);
 	int err = 0;
 	loff_t ptr_pos = 0;
 	struct ceph_readdir_cache_control cache_ctl = {};
@@ -154,7 +154,7 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 		}
 
 		err = -EAGAIN;
-		pgoff = ptr_pos >> PAGE_CACHE_SHIFT;
+		pgoff = ptr_pos >> PAGE_SHIFT;
 		if (!cache_ctl.page || pgoff != page_index(cache_ctl.page)) {
 			ceph_readdir_cache_release(&cache_ctl);
 			cache_ctl.page = find_lock_page(&dir->i_data, pgoff);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ef38f01c1795..a79f9269831e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -466,7 +466,7 @@ more:
 			ret += zlen;
 		}
 
-		didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
+		didpages = (page_align + ret) >> PAGE_SHIFT;
 		pos += ret;
 		read = pos - off;
 		left -= ret;
@@ -806,8 +806,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 
 	if (write) {
 		ret = invalidate_inode_pages2_range(inode->i_mapping,
-					pos >> PAGE_CACHE_SHIFT,
-					(pos + count) >> PAGE_CACHE_SHIFT);
+					pos >> PAGE_SHIFT,
+					(pos + count) >> PAGE_SHIFT);
 		if (ret < 0)
 			dout("invalidate_inode_pages2_range returned %d\n", ret);
 
@@ -872,7 +872,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 			 * may block.
 			 */
 			truncate_inode_pages_range(inode->i_mapping, pos,
-					(pos+len) | (PAGE_CACHE_SIZE - 1));
+					(pos+len) | (PAGE_SIZE - 1));
 
 			osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
 		}
@@ -1006,8 +1006,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 		return ret;
 
 	ret = invalidate_inode_pages2_range(inode->i_mapping,
-					    pos >> PAGE_CACHE_SHIFT,
-					    (pos + count) >> PAGE_CACHE_SHIFT);
+					    pos >> PAGE_SHIFT,
+					    (pos + count) >> PAGE_SHIFT);
 	if (ret < 0)
 		dout("invalidate_inode_pages2_range returned %d\n", ret);
 
@@ -1036,7 +1036,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 		 * write from beginning of first page,
 		 * regardless of io alignment
 		 */
-		num_pages = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		num_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
 		pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
 		if (IS_ERR(pages)) {
@@ -1159,7 +1159,7 @@ again:
 	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
 	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
 	if (pinned_page) {
-		page_cache_release(pinned_page);
+		put_page(pinned_page);
 		pinned_page = NULL;
 	}
 	ceph_put_cap_refs(ci, got);
@@ -1188,10 +1188,10 @@ again:
 		if (retry_op == READ_INLINE) {
 			BUG_ON(ret > 0 || read > 0);
 			if (iocb->ki_pos < i_size &&
-			    iocb->ki_pos < PAGE_CACHE_SIZE) {
+			    iocb->ki_pos < PAGE_SIZE) {
 				loff_t end = min_t(loff_t, i_size,
 						   iocb->ki_pos + len);
-				end = min_t(loff_t, end, PAGE_CACHE_SIZE);
+				end = min_t(loff_t, end, PAGE_SIZE);
 				if (statret < end)
 					zero_user_segment(page, statret, end);
 				ret = copy_page_to_iter(page,
@@ -1463,21 +1463,21 @@ static inline void ceph_zero_partial_page(
 	struct inode *inode, loff_t offset, unsigned size)
 {
 	struct page *page;
-	pgoff_t index = offset >> PAGE_CACHE_SHIFT;
+	pgoff_t index = offset >> PAGE_SHIFT;
 
 	page = find_lock_page(inode->i_mapping, index);
 	if (page) {
 		wait_on_page_writeback(page);
-		zero_user(page, offset & (PAGE_CACHE_SIZE - 1), size);
+		zero_user(page, offset & (PAGE_SIZE - 1), size);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 }
 
 static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset,
 				      loff_t length)
 {
-	loff_t nearly = round_up(offset, PAGE_CACHE_SIZE);
+	loff_t nearly = round_up(offset, PAGE_SIZE);
 	if (offset < nearly) {
 		loff_t size = nearly - offset;
 		if (length < size)
@@ -1486,8 +1486,8 @@ static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset,
 		offset += size;
 		length -= size;
 	}
-	if (length >= PAGE_CACHE_SIZE) {
-		loff_t size = round_down(length, PAGE_CACHE_SIZE);
+	if (length >= PAGE_SIZE) {
+		loff_t size = round_down(length, PAGE_SIZE);
 		truncate_pagecache_range(inode, offset, offset + size - 1);
 		offset += size;
 		length -= size;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index ed58b168904a..edfade037738 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1338,7 +1338,7 @@ void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl)
 {
 	if (ctl->page) {
 		kunmap(ctl->page);
-		page_cache_release(ctl->page);
+		put_page(ctl->page);
 		ctl->page = NULL;
 	}
 }
@@ -1348,7 +1348,7 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn,
 			      struct ceph_mds_request *req)
 {
 	struct ceph_inode_info *ci = ceph_inode(dir);
-	unsigned nsize = PAGE_CACHE_SIZE / sizeof(struct dentry*);
+	unsigned nsize = PAGE_SIZE / sizeof(struct dentry*);
 	unsigned idx = ctl->index % nsize;
 	pgoff_t pgoff = ctl->index / nsize;
 
@@ -1367,7 +1367,7 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn,
 		unlock_page(ctl->page);
 		ctl->dentries = kmap(ctl->page);
 		if (idx == 0)
-			memset(ctl->dentries, 0, PAGE_CACHE_SIZE);
+			memset(ctl->dentries, 0, PAGE_SIZE);
 	}
 
 	if (req->r_dir_release_cnt == atomic64_read(&ci->i_release_count) &&
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 44852c3ae531..541ead4d8965 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1610,7 +1610,7 @@ again:
 	while (!list_empty(&tmp_list)) {
 		if (!msg) {
 			msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE,
-					PAGE_CACHE_SIZE, GFP_NOFS, false);
+					PAGE_SIZE, GFP_NOFS, false);
 			if (!msg)
 				goto out_err;
 			head = msg->front.iov_base;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 37712ccffcc6..ee69a537dba5 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -97,7 +97,7 @@ struct ceph_mds_reply_info_parsed {
 /*
  * cap releases are batched and sent to the MDS en masse.
  */
-#define CEPH_CAPS_PER_RELEASE ((PAGE_CACHE_SIZE -			\
+#define CEPH_CAPS_PER_RELEASE ((PAGE_SIZE -			\
 				sizeof(struct ceph_mds_cap_release)) /	\
 			       sizeof(struct ceph_mds_cap_item))
 
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index c973043deb0e..f12d5e2955c2 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -560,7 +560,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 
 	/* set up mempools */
 	err = -ENOMEM;
-	page_count = fsc->mount_options->wsize >> PAGE_CACHE_SHIFT;
+	page_count = fsc->mount_options->wsize >> PAGE_SHIFT;
 	size = sizeof (struct page *) * (page_count ? page_count : 1);
 	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size);
 	if (!fsc->wb_pagevec_pool)
@@ -912,13 +912,13 @@ static int ceph_register_bdi(struct super_block *sb,
 	int err;
 
 	/* set ra_pages based on rasize mount option? */
-	if (fsc->mount_options->rasize >= PAGE_CACHE_SIZE)
+	if (fsc->mount_options->rasize >= PAGE_SIZE)
 		fsc->backing_dev_info.ra_pages =
-			(fsc->mount_options->rasize + PAGE_CACHE_SIZE - 1)
+			(fsc->mount_options->rasize + PAGE_SIZE - 1)
 			>> PAGE_SHIFT;
 	else
 		fsc->backing_dev_info.ra_pages =
-			VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
+			VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
 
 	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
 			   atomic_long_inc_return(&bdi_seq));
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 1d86fc620e5c..89201564c346 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -962,7 +962,7 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off,
 	cifs_dbg(FYI, "about to flush pages\n");
 	/* should we flush first and last page first */
 	truncate_inode_pages_range(&target_inode->i_data, destoff,
-				   PAGE_CACHE_ALIGN(destoff + len)-1);
+				   PAGE_ALIGN(destoff + len)-1);
 
 	if (target_tcon->ses->server->ops->duplicate_extents)
 		rc = target_tcon->ses->server->ops->duplicate_extents(xid,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 76fcb50295a3..a894bf809ff7 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1929,17 +1929,17 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
 
 		wsize = server->ops->wp_retry_size(inode);
 		if (wsize < rest_len) {
-			nr_pages = wsize / PAGE_CACHE_SIZE;
+			nr_pages = wsize / PAGE_SIZE;
 			if (!nr_pages) {
 				rc = -ENOTSUPP;
 				break;
 			}
-			cur_len = nr_pages * PAGE_CACHE_SIZE;
-			tailsz = PAGE_CACHE_SIZE;
+			cur_len = nr_pages * PAGE_SIZE;
+			tailsz = PAGE_SIZE;
 		} else {
-			nr_pages = DIV_ROUND_UP(rest_len, PAGE_CACHE_SIZE);
+			nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE);
 			cur_len = rest_len;
-			tailsz = rest_len - (nr_pages - 1) * PAGE_CACHE_SIZE;
+			tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE;
 		}
 
 		wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
@@ -1957,7 +1957,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
 		wdata2->sync_mode = wdata->sync_mode;
 		wdata2->nr_pages = nr_pages;
 		wdata2->offset = page_offset(wdata2->pages[0]);
-		wdata2->pagesz = PAGE_CACHE_SIZE;
+		wdata2->pagesz = PAGE_SIZE;
 		wdata2->tailsz = tailsz;
 		wdata2->bytes = cur_len;
 
@@ -1975,7 +1975,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
 			if (rc != 0 && rc != -EAGAIN) {
 				SetPageError(wdata2->pages[j]);
 				end_page_writeback(wdata2->pages[j]);
-				page_cache_release(wdata2->pages[j]);
+				put_page(wdata2->pages[j]);
 			}
 		}
 
@@ -2018,7 +2018,7 @@ cifs_writev_complete(struct work_struct *work)
 		else if (wdata->result < 0)
 			SetPageError(page);
 		end_page_writeback(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	if (wdata->result != -EAGAIN)
 		mapping_set_error(inode->i_mapping, wdata->result);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a763cd3d9e7c..6f62ac821a84 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3630,7 +3630,7 @@ try_mount_again:
 	cifs_sb->rsize = server->ops->negotiate_rsize(tcon, volume_info);
 
 	/* tune readahead according to rsize */
-	cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_CACHE_SIZE;
+	cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_SIZE;
 
 remote_path_check:
 #ifdef CONFIG_CIFS_DFS_UPCALL
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ff882aeaccc6..5ce540dc6996 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1833,7 +1833,7 @@ refind_writable:
 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 {
 	struct address_space *mapping = page->mapping;
-	loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
 	char *write_data;
 	int rc = -EFAULT;
 	int bytes_written = 0;
@@ -1849,7 +1849,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 	write_data = kmap(page);
 	write_data += from;
 
-	if ((to > PAGE_CACHE_SIZE) || (from > to)) {
+	if ((to > PAGE_SIZE) || (from > to)) {
 		kunmap(page);
 		return -EIO;
 	}
@@ -1991,7 +1991,7 @@ wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
 
 	/* put any pages we aren't going to use */
 	for (i = nr_pages; i < found_pages; i++) {
-		page_cache_release(wdata->pages[i]);
+		put_page(wdata->pages[i]);
 		wdata->pages[i] = NULL;
 	}
 
@@ -2009,11 +2009,11 @@ wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
 	wdata->sync_mode = wbc->sync_mode;
 	wdata->nr_pages = nr_pages;
 	wdata->offset = page_offset(wdata->pages[0]);
-	wdata->pagesz = PAGE_CACHE_SIZE;
+	wdata->pagesz = PAGE_SIZE;
 	wdata->tailsz = min(i_size_read(mapping->host) -
 			page_offset(wdata->pages[nr_pages - 1]),
-			(loff_t)PAGE_CACHE_SIZE);
-	wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
+			(loff_t)PAGE_SIZE);
+	wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
 
 	if (wdata->cfile != NULL)
 		cifsFileInfo_put(wdata->cfile);
@@ -2047,15 +2047,15 @@ static int cifs_writepages(struct address_space *mapping,
 	 * If wsize is smaller than the page cache size, default to writing
 	 * one page at a time via cifs_writepage
 	 */
-	if (cifs_sb->wsize < PAGE_CACHE_SIZE)
+	if (cifs_sb->wsize < PAGE_SIZE)
 		return generic_writepages(mapping, wbc);
 
 	if (wbc->range_cyclic) {
 		index = mapping->writeback_index; /* Start from prev offset */
 		end = -1;
 	} else {
-		index = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		index = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 			range_whole = true;
 		scanned = true;
@@ -2071,7 +2071,7 @@ retry:
 		if (rc)
 			break;
 
-		tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
+		tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
 
 		wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
 						  &found_pages);
@@ -2111,7 +2111,7 @@ retry:
 				else
 					SetPageError(wdata->pages[i]);
 				end_page_writeback(wdata->pages[i]);
-				page_cache_release(wdata->pages[i]);
+				put_page(wdata->pages[i]);
 			}
 			if (rc != -EAGAIN)
 				mapping_set_error(mapping, rc);
@@ -2154,7 +2154,7 @@ cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
 
 	xid = get_xid();
 /* BB add check for wbc flags */
-	page_cache_get(page);
+	get_page(page);
 	if (!PageUptodate(page))
 		cifs_dbg(FYI, "ppw - page not up to date\n");
 
@@ -2170,7 +2170,7 @@ cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
 	 */
 	set_page_writeback(page);
 retry_write:
-	rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
+	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
 	if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
 		goto retry_write;
 	else if (rc == -EAGAIN)
@@ -2180,7 +2180,7 @@ retry_write:
 	else
 		SetPageUptodate(page);
 	end_page_writeback(page);
-	page_cache_release(page);
+	put_page(page);
 	free_xid(xid);
 	return rc;
 }
@@ -2214,12 +2214,12 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
 		if (copied == len)
 			SetPageUptodate(page);
 		ClearPageChecked(page);
-	} else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
+	} else if (!PageUptodate(page) && copied == PAGE_SIZE)
 		SetPageUptodate(page);
 
 	if (!PageUptodate(page)) {
 		char *page_data;
-		unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+		unsigned offset = pos & (PAGE_SIZE - 1);
 		unsigned int xid;
 
 		xid = get_xid();
@@ -2248,7 +2248,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
 	}
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return rc;
 }
@@ -3286,9 +3286,9 @@ cifs_readv_complete(struct work_struct *work)
 		    (rdata->result == -EAGAIN && got_bytes))
 			cifs_readpage_to_fscache(rdata->mapping->host, page);
 
-		got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
+		got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
 
-		page_cache_release(page);
+		put_page(page);
 		rdata->pages[i] = NULL;
 	}
 	kref_put(&rdata->refcount, cifs_readdata_release);
@@ -3307,21 +3307,21 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
 
 	/* determine the eof that the server (probably) has */
 	eof = CIFS_I(rdata->mapping->host)->server_eof;
-	eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
+	eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
 	cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
 
 	rdata->got_bytes = 0;
-	rdata->tailsz = PAGE_CACHE_SIZE;
+	rdata->tailsz = PAGE_SIZE;
 	for (i = 0; i < nr_pages; i++) {
 		struct page *page = rdata->pages[i];
 
-		if (len >= PAGE_CACHE_SIZE) {
+		if (len >= PAGE_SIZE) {
 			/* enough data to fill the page */
 			iov.iov_base = kmap(page);
-			iov.iov_len = PAGE_CACHE_SIZE;
+			iov.iov_len = PAGE_SIZE;
 			cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
 				 i, page->index, iov.iov_base, iov.iov_len);
-			len -= PAGE_CACHE_SIZE;
+			len -= PAGE_SIZE;
 		} else if (len > 0) {
 			/* enough for partial page, fill and zero the rest */
 			iov.iov_base = kmap(page);
@@ -3329,7 +3329,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
 			cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
 				 i, page->index, iov.iov_base, iov.iov_len);
 			memset(iov.iov_base + len,
-				'\0', PAGE_CACHE_SIZE - len);
+				'\0', PAGE_SIZE - len);
 			rdata->tailsz = len;
 			len = 0;
 		} else if (page->index > eof_index) {
@@ -3341,12 +3341,12 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
 			 * to prevent the VFS from repeatedly attempting to
 			 * fill them until the writes are flushed.
 			 */
-			zero_user(page, 0, PAGE_CACHE_SIZE);
+			zero_user(page, 0, PAGE_SIZE);
 			lru_cache_add_file(page);
 			flush_dcache_page(page);
 			SetPageUptodate(page);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			rdata->pages[i] = NULL;
 			rdata->nr_pages--;
 			continue;
@@ -3354,7 +3354,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
 			/* no need to hold page hostage */
 			lru_cache_add_file(page);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			rdata->pages[i] = NULL;
 			rdata->nr_pages--;
 			continue;
@@ -3402,8 +3402,8 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
 	}
 
 	/* move first page to the tmplist */
-	*offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
-	*bytes = PAGE_CACHE_SIZE;
+	*offset = (loff_t)page->index << PAGE_SHIFT;
+	*bytes = PAGE_SIZE;
 	*nr_pages = 1;
 	list_move_tail(&page->lru, tmplist);
 
@@ -3415,7 +3415,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
 			break;
 
 		/* would this page push the read over the rsize? */
-		if (*bytes + PAGE_CACHE_SIZE > rsize)
+		if (*bytes + PAGE_SIZE > rsize)
 			break;
 
 		__SetPageLocked(page);
@@ -3424,7 +3424,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
 			break;
 		}
 		list_move_tail(&page->lru, tmplist);
-		(*bytes) += PAGE_CACHE_SIZE;
+		(*bytes) += PAGE_SIZE;
 		expected_index++;
 		(*nr_pages)++;
 	}
@@ -3493,7 +3493,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 		 * reach this point however since we set ra_pages to 0 when the
 		 * rsize is smaller than a cache page.
 		 */
-		if (unlikely(rsize < PAGE_CACHE_SIZE)) {
+		if (unlikely(rsize < PAGE_SIZE)) {
 			add_credits_and_wake_if(server, credits, 0);
 			return 0;
 		}
@@ -3512,7 +3512,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 				list_del(&page->lru);
 				lru_cache_add_file(page);
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 			}
 			rc = -ENOMEM;
 			add_credits_and_wake_if(server, credits, 0);
@@ -3524,7 +3524,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 		rdata->offset = offset;
 		rdata->bytes = bytes;
 		rdata->pid = pid;
-		rdata->pagesz = PAGE_CACHE_SIZE;
+		rdata->pagesz = PAGE_SIZE;
 		rdata->read_into_pages = cifs_readpages_read_into_pages;
 		rdata->credits = credits;
 
@@ -3542,7 +3542,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 				page = rdata->pages[i];
 				lru_cache_add_file(page);
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 			}
 			/* Fallback to the readpage in error/reconnect cases */
 			kref_put(&rdata->refcount, cifs_readdata_release);
@@ -3577,7 +3577,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
 	read_data = kmap(page);
 	/* for reads over a certain size could initiate async read ahead */
 
-	rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
+	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
 
 	if (rc < 0)
 		goto io_error;
@@ -3587,8 +3587,8 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
 	file_inode(file)->i_atime =
 		current_fs_time(file_inode(file)->i_sb);
 
-	if (PAGE_CACHE_SIZE > rc)
-		memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
+	if (PAGE_SIZE > rc)
+		memset(read_data + rc, 0, PAGE_SIZE - rc);
 
 	flush_dcache_page(page);
 	SetPageUptodate(page);
@@ -3608,7 +3608,7 @@ read_complete:
 
 static int cifs_readpage(struct file *file, struct page *page)
 {
-	loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
 	int rc = -EACCES;
 	unsigned int xid;
 
@@ -3679,8 +3679,8 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
 			struct page **pagep, void **fsdata)
 {
 	int oncethru = 0;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
+	pgoff_t index = pos >> PAGE_SHIFT;
+	loff_t offset = pos & (PAGE_SIZE - 1);
 	loff_t page_start = pos & PAGE_MASK;
 	loff_t i_size;
 	struct page *page;
@@ -3703,7 +3703,7 @@ start:
 	 * the server. If the write is short, we'll end up doing a sync write
 	 * instead.
 	 */
-	if (len == PAGE_CACHE_SIZE)
+	if (len == PAGE_SIZE)
 		goto out;
 
 	/*
@@ -3718,7 +3718,7 @@ start:
 		    (offset == 0 && (pos + len) >= i_size)) {
 			zero_user_segments(page, 0, offset,
 					   offset + len,
-					   PAGE_CACHE_SIZE);
+					   PAGE_SIZE);
 			/*
 			 * PageChecked means that the parts of the page
 			 * to which we're not writing are considered up
@@ -3737,7 +3737,7 @@ start:
 		 * do a sync write instead since PG_uptodate isn't set.
 		 */
 		cifs_readpage_worker(file, page, &page_start);
-		page_cache_release(page);
+		put_page(page);
 		oncethru = 1;
 		goto start;
 	} else {
@@ -3764,7 +3764,7 @@ static void cifs_invalidate_page(struct page *page, unsigned int offset,
 {
 	struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
 
-	if (offset == 0 && length == PAGE_CACHE_SIZE)
+	if (offset == 0 && length == PAGE_SIZE)
 		cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
 }
 
@@ -3772,7 +3772,7 @@ static int cifs_launder_page(struct page *page)
 {
 	int rc = 0;
 	loff_t range_start = page_offset(page);
-	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_ALL,
 		.nr_to_write = 0,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index aeb26dbfa1bf..5f9ad5c42180 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -59,7 +59,7 @@ static void cifs_set_ops(struct inode *inode)
 
 		/* check if server can support readpages */
 		if (cifs_sb_master_tcon(cifs_sb)->ses->server->maxBuf <
-				PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
+				PAGE_SIZE + MAX_CIFS_HDR_SIZE)
 			inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
 		else
 			inode->i_data.a_ops = &cifs_addr_ops;
@@ -2019,8 +2019,8 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 
 static int cifs_truncate_page(struct address_space *mapping, loff_t from)
 {
-	pgoff_t index = from >> PAGE_CACHE_SHIFT;
-	unsigned offset = from & (PAGE_CACHE_SIZE - 1);
+	pgoff_t index = from >> PAGE_SHIFT;
+	unsigned offset = from & (PAGE_SIZE - 1);
 	struct page *page;
 	int rc = 0;
 
@@ -2028,9 +2028,9 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
 	if (!page)
 		return -ENOMEM;
 
-	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+	zero_user_segment(page, offset, PAGE_SIZE);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return rc;
 }
 
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index a8f3b589a2df..cfd91320e869 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -71,8 +71,8 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
 	struct inode *inode;
 	struct dentry *root;
 
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = CONFIGFS_MAGIC;
 	sb->s_op = &configfs_ops;
 	sb->s_time_gran = 1;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index b862bc219cd7..2096654dd26d 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -152,7 +152,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
  */
 #define BLKS_PER_BUF_SHIFT	(2)
 #define BLKS_PER_BUF		(1 << BLKS_PER_BUF_SHIFT)
-#define BUFFER_SIZE		(BLKS_PER_BUF*PAGE_CACHE_SIZE)
+#define BUFFER_SIZE		(BLKS_PER_BUF*PAGE_SIZE)
 
 static unsigned char read_buffers[READ_BUFFERS][BUFFER_SIZE];
 static unsigned buffer_blocknr[READ_BUFFERS];
@@ -173,8 +173,8 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
 
 	if (!len)
 		return NULL;
-	blocknr = offset >> PAGE_CACHE_SHIFT;
-	offset &= PAGE_CACHE_SIZE - 1;
+	blocknr = offset >> PAGE_SHIFT;
+	offset &= PAGE_SIZE - 1;
 
 	/* Check if an existing buffer already has the data.. */
 	for (i = 0; i < READ_BUFFERS; i++) {
@@ -184,14 +184,14 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
 			continue;
 		if (blocknr < buffer_blocknr[i])
 			continue;
-		blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_CACHE_SHIFT;
+		blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_SHIFT;
 		blk_offset += offset;
 		if (blk_offset + len > BUFFER_SIZE)
 			continue;
 		return read_buffers[i] + blk_offset;
 	}
 
-	devsize = mapping->host->i_size >> PAGE_CACHE_SHIFT;
+	devsize = mapping->host->i_size >> PAGE_SHIFT;
 
 	/* Ok, read in BLKS_PER_BUF pages completely first. */
 	for (i = 0; i < BLKS_PER_BUF; i++) {
@@ -213,7 +213,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
 			wait_on_page_locked(page);
 			if (!PageUptodate(page)) {
 				/* asynchronous error */
-				page_cache_release(page);
+				put_page(page);
 				pages[i] = NULL;
 			}
 		}
@@ -229,12 +229,12 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
 		struct page *page = pages[i];
 
 		if (page) {
-			memcpy(data, kmap(page), PAGE_CACHE_SIZE);
+			memcpy(data, kmap(page), PAGE_SIZE);
 			kunmap(page);
-			page_cache_release(page);
+			put_page(page);
 		} else
-			memset(data, 0, PAGE_CACHE_SIZE);
-		data += PAGE_CACHE_SIZE;
+			memset(data, 0, PAGE_SIZE);
+		data += PAGE_SIZE;
 	}
 	return read_buffers[buffer] + offset;
 }
@@ -353,7 +353,7 @@ static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
 
 	buf->f_type = CRAMFS_MAGIC;
-	buf->f_bsize = PAGE_CACHE_SIZE;
+	buf->f_bsize = PAGE_SIZE;
 	buf->f_blocks = CRAMFS_SB(sb)->blocks;
 	buf->f_bfree = 0;
 	buf->f_bavail = 0;
@@ -496,7 +496,7 @@ static int cramfs_readpage(struct file *file, struct page *page)
 	int bytes_filled;
 	void *pgdata;
 
-	maxblock = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	maxblock = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	bytes_filled = 0;
 	pgdata = kmap(page);
 
@@ -516,14 +516,14 @@ static int cramfs_readpage(struct file *file, struct page *page)
 
 		if (compr_len == 0)
 			; /* hole */
-		else if (unlikely(compr_len > (PAGE_CACHE_SIZE << 1))) {
+		else if (unlikely(compr_len > (PAGE_SIZE << 1))) {
 			pr_err("bad compressed blocksize %u\n",
 				compr_len);
 			goto err;
 		} else {
 			mutex_lock(&read_mutex);
 			bytes_filled = cramfs_uncompress_block(pgdata,
-				 PAGE_CACHE_SIZE,
+				 PAGE_SIZE,
 				 cramfs_read(sb, start_offset, compr_len),
 				 compr_len);
 			mutex_unlock(&read_mutex);
@@ -532,7 +532,7 @@ static int cramfs_readpage(struct file *file, struct page *page)
 		}
 	}
 
-	memset(pgdata + bytes_filled, 0, PAGE_CACHE_SIZE - bytes_filled);
+	memset(pgdata + bytes_filled, 0, PAGE_SIZE - bytes_filled);
 	flush_dcache_page(page);
 	kunmap(page);
 	SetPageUptodate(page);
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 06cd1a22240b..7f5804537d30 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -175,10 +175,10 @@ static int do_page_crypto(struct inode *inode,
 			FS_XTS_TWEAK_SIZE - sizeof(index));
 
 	sg_init_table(&dst, 1);
-	sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
+	sg_set_page(&dst, dest_page, PAGE_SIZE, 0);
 	sg_init_table(&src, 1);
-	sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
-	skcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
+	sg_set_page(&src, src_page, PAGE_SIZE, 0);
+	skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE,
 					xts_tweak);
 	if (rw == FS_DECRYPT)
 		res = crypto_skcipher_decrypt(req);
@@ -287,7 +287,7 @@ int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk,
 	struct bio *bio;
 	int ret, err = 0;
 
-	BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE);
+	BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
 
 	ctx = fscrypt_get_ctx(inode);
 	if (IS_ERR(ctx))
diff --git a/fs/dax.c b/fs/dax.c
index 90322eb7498c..1f954166704a 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -323,7 +323,7 @@ static int dax_load_hole(struct address_space *mapping, struct page *page,
 	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (vmf->pgoff >= size) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -351,7 +351,7 @@ static int copy_user_bh(struct page *to, struct inode *inode,
 }
 
 #define NO_SECTOR -1
-#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_CACHE_SHIFT))
+#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))
 
 static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
 		sector_t sector, bool pmd_entry, bool dirty)
@@ -506,8 +506,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 	if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
 		return 0;
 
-	start_index = wbc->range_start >> PAGE_CACHE_SHIFT;
-	end_index = wbc->range_end >> PAGE_CACHE_SHIFT;
+	start_index = wbc->range_start >> PAGE_SHIFT;
+	end_index = wbc->range_end >> PAGE_SHIFT;
 	pmd_index = DAX_PMD_INDEX(start_index);
 
 	rcu_read_lock();
@@ -642,12 +642,12 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	page = find_get_page(mapping, vmf->pgoff);
 	if (page) {
 		if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
-			page_cache_release(page);
+			put_page(page);
 			return VM_FAULT_RETRY;
 		}
 		if (unlikely(page->mapping != mapping)) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 		size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -711,10 +711,10 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
 	if (page) {
 		unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
-							PAGE_CACHE_SIZE, 0);
+							PAGE_SIZE, 0);
 		delete_from_page_cache(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 	}
 
@@ -747,7 +747,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
  unlock_page:
 	if (page) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	goto out;
 }
@@ -1104,18 +1104,18 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
 							get_block_t get_block)
 {
 	struct buffer_head bh;
-	pgoff_t index = from >> PAGE_CACHE_SHIFT;
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	pgoff_t index = from >> PAGE_SHIFT;
+	unsigned offset = from & (PAGE_SIZE-1);
 	int err;
 
 	/* Block boundary? Nothing to do */
 	if (!length)
 		return 0;
-	BUG_ON((offset + length) > PAGE_CACHE_SIZE);
+	BUG_ON((offset + length) > PAGE_SIZE);
 
 	memset(&bh, 0, sizeof(bh));
 	bh.b_bdev = inode->i_sb->s_bdev;
-	bh.b_size = PAGE_CACHE_SIZE;
+	bh.b_size = PAGE_SIZE;
 	err = get_block(inode, index, &bh, 0);
 	if (err < 0)
 		return err;
@@ -1123,7 +1123,7 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
 		struct block_device *bdev = bh.b_bdev;
 		struct blk_dax_ctl dax = {
 			.sector = to_sector(&bh, inode),
-			.size = PAGE_CACHE_SIZE,
+			.size = PAGE_SIZE,
 		};
 
 		if (dax_map_atomic(bdev, &dax) < 0)
@@ -1154,7 +1154,7 @@ EXPORT_SYMBOL_GPL(dax_zero_page_range);
  */
 int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
 {
-	unsigned length = PAGE_CACHE_ALIGN(from) - from;
+	unsigned length = PAGE_ALIGN(from) - from;
 	return dax_zero_page_range(inode, from, length, get_block);
 }
 EXPORT_SYMBOL_GPL(dax_truncate_page);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 476f1ecbd1f0..472037732daf 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -172,7 +172,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 		 */
 		if (dio->page_errors == 0)
 			dio->page_errors = ret;
-		page_cache_get(page);
+		get_page(page);
 		dio->pages[0] = page;
 		sdio->head = 0;
 		sdio->tail = 1;
@@ -424,7 +424,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
 static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
 {
 	while (sdio->head < sdio->tail)
-		page_cache_release(dio->pages[sdio->head++]);
+		put_page(dio->pages[sdio->head++]);
 }
 
 /*
@@ -487,7 +487,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
 			if (dio->rw == READ && !PageCompound(page) &&
 					dio->should_dirty)
 				set_page_dirty_lock(page);
-			page_cache_release(page);
+			put_page(page);
 		}
 		err = bio->bi_error;
 		bio_put(bio);
@@ -696,7 +696,7 @@ static inline int dio_bio_add_page(struct dio_submit *sdio)
 		 */
 		if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE)
 			sdio->pages_in_io--;
-		page_cache_get(sdio->cur_page);
+		get_page(sdio->cur_page);
 		sdio->final_block_in_bio = sdio->cur_page_block +
 			(sdio->cur_page_len >> sdio->blkbits);
 		ret = 0;
@@ -810,13 +810,13 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 	 */
 	if (sdio->cur_page) {
 		ret = dio_send_cur_page(dio, sdio, map_bh);
-		page_cache_release(sdio->cur_page);
+		put_page(sdio->cur_page);
 		sdio->cur_page = NULL;
 		if (ret)
 			return ret;
 	}
 
-	page_cache_get(page);		/* It is in dio */
+	get_page(page);		/* It is in dio */
 	sdio->cur_page = page;
 	sdio->cur_page_offset = offset;
 	sdio->cur_page_len = len;
@@ -830,7 +830,7 @@ out:
 	if (sdio->boundary) {
 		ret = dio_send_cur_page(dio, sdio, map_bh);
 		dio_bio_submit(dio, sdio);
-		page_cache_release(sdio->cur_page);
+		put_page(sdio->cur_page);
 		sdio->cur_page = NULL;
 	}
 	return ret;
@@ -947,7 +947,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 
 				ret = get_more_blocks(dio, sdio, map_bh);
 				if (ret) {
-					page_cache_release(page);
+					put_page(page);
 					goto out;
 				}
 				if (!buffer_mapped(map_bh))
@@ -988,7 +988,7 @@ do_holes:
 
 				/* AKPM: eargh, -ENOTBLK is a hack */
 				if (dio->rw & WRITE) {
-					page_cache_release(page);
+					put_page(page);
 					return -ENOTBLK;
 				}
 
@@ -1001,7 +1001,7 @@ do_holes:
 				if (sdio->block_in_file >=
 						i_size_aligned >> blkbits) {
 					/* We hit eof */
-					page_cache_release(page);
+					put_page(page);
 					goto out;
 				}
 				zero_user(page, from, 1 << blkbits);
@@ -1041,7 +1041,7 @@ do_holes:
 						  sdio->next_block_for_io,
 						  map_bh);
 			if (ret) {
-				page_cache_release(page);
+				put_page(page);
 				goto out;
 			}
 			sdio->next_block_for_io += this_chunk_blocks;
@@ -1057,7 +1057,7 @@ next_block:
 		}
 
 		/* Drop the ref which was taken in get_user_pages() */
-		page_cache_release(page);
+		put_page(page);
 	}
 out:
 	return ret;
@@ -1281,7 +1281,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 		ret2 = dio_send_cur_page(dio, &sdio, &map_bh);
 		if (retval == 0)
 			retval = ret2;
-		page_cache_release(sdio.cur_page);
+		put_page(sdio.cur_page);
 		sdio.cur_page = NULL;
 	}
 	if (sdio.bio)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 00640e70ed7a..1ab012a27d9f 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -640,7 +640,7 @@ static int receive_from_sock(struct connection *con)
 		con->rx_page = alloc_page(GFP_ATOMIC);
 		if (con->rx_page == NULL)
 			goto out_resched;
-		cbuf_init(&con->cb, PAGE_CACHE_SIZE);
+		cbuf_init(&con->cb, PAGE_SIZE);
 	}
 
 	/*
@@ -657,7 +657,7 @@ static int receive_from_sock(struct connection *con)
 	 * buffer and the start of the currently used section (cb.base)
 	 */
 	if (cbuf_data(&con->cb) >= con->cb.base) {
-		iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb);
+		iov[0].iov_len = PAGE_SIZE - cbuf_data(&con->cb);
 		iov[1].iov_len = con->cb.base;
 		iov[1].iov_base = page_address(con->rx_page);
 		nvec = 2;
@@ -675,7 +675,7 @@ static int receive_from_sock(struct connection *con)
 	ret = dlm_process_incoming_buffer(con->nodeid,
 					  page_address(con->rx_page),
 					  con->cb.base, con->cb.len,
-					  PAGE_CACHE_SIZE);
+					  PAGE_SIZE);
 	if (ret == -EBADMSG) {
 		log_print("lowcomms: addr=%p, base=%u, len=%u, read=%d",
 			  page_address(con->rx_page), con->cb.base,
@@ -1416,7 +1416,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
 	spin_lock(&con->writequeue_lock);
 	e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
 	if ((&e->list == &con->writequeue) ||
-	    (PAGE_CACHE_SIZE - e->end < len)) {
+	    (PAGE_SIZE - e->end < len)) {
 		e = NULL;
 	} else {
 		offset = e->end;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 64026e53722a..d09cb4cdd09f 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -286,7 +286,7 @@ int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
 		pg = virt_to_page(addr);
 		offset = offset_in_page(addr);
 		sg_set_page(&sg[i], pg, 0, offset);
-		remainder_of_page = PAGE_CACHE_SIZE - offset;
+		remainder_of_page = PAGE_SIZE - offset;
 		if (size >= remainder_of_page) {
 			sg[i].length = remainder_of_page;
 			addr += remainder_of_page;
@@ -400,7 +400,7 @@ static loff_t lower_offset_for_page(struct ecryptfs_crypt_stat *crypt_stat,
 				    struct page *page)
 {
 	return ecryptfs_lower_header_size(crypt_stat) +
-	       ((loff_t)page->index << PAGE_CACHE_SHIFT);
+	       ((loff_t)page->index << PAGE_SHIFT);
 }
 
 /**
@@ -428,7 +428,7 @@ static int crypt_extent(struct ecryptfs_crypt_stat *crypt_stat,
 	size_t extent_size = crypt_stat->extent_size;
 	int rc;
 
-	extent_base = (((loff_t)page_index) * (PAGE_CACHE_SIZE / extent_size));
+	extent_base = (((loff_t)page_index) * (PAGE_SIZE / extent_size));
 	rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
 				(extent_base + extent_offset));
 	if (rc) {
@@ -498,7 +498,7 @@ int ecryptfs_encrypt_page(struct page *page)
 	}
 
 	for (extent_offset = 0;
-	     extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
+	     extent_offset < (PAGE_SIZE / crypt_stat->extent_size);
 	     extent_offset++) {
 		rc = crypt_extent(crypt_stat, enc_extent_page, page,
 				  extent_offset, ENCRYPT);
@@ -512,7 +512,7 @@ int ecryptfs_encrypt_page(struct page *page)
 	lower_offset = lower_offset_for_page(crypt_stat, page);
 	enc_extent_virt = kmap(enc_extent_page);
 	rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset,
-				  PAGE_CACHE_SIZE);
+				  PAGE_SIZE);
 	kunmap(enc_extent_page);
 	if (rc < 0) {
 		ecryptfs_printk(KERN_ERR,
@@ -560,7 +560,7 @@ int ecryptfs_decrypt_page(struct page *page)
 
 	lower_offset = lower_offset_for_page(crypt_stat, page);
 	page_virt = kmap(page);
-	rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_CACHE_SIZE,
+	rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_SIZE,
 				 ecryptfs_inode);
 	kunmap(page);
 	if (rc < 0) {
@@ -571,7 +571,7 @@ int ecryptfs_decrypt_page(struct page *page)
 	}
 
 	for (extent_offset = 0;
-	     extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
+	     extent_offset < (PAGE_SIZE / crypt_stat->extent_size);
 	     extent_offset++) {
 		rc = crypt_extent(crypt_stat, page, page,
 				  extent_offset, DECRYPT);
@@ -659,11 +659,11 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
 	if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
 		crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
 	else {
-		if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)
+		if (PAGE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)
 			crypt_stat->metadata_size =
 				ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
 		else
-			crypt_stat->metadata_size = PAGE_CACHE_SIZE;
+			crypt_stat->metadata_size = PAGE_SIZE;
 	}
 }
 
@@ -1442,7 +1442,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
 						ECRYPTFS_VALIDATE_HEADER_SIZE);
 	if (rc) {
 		/* metadata is not in the file header, so try xattrs */
-		memset(page_virt, 0, PAGE_CACHE_SIZE);
+		memset(page_virt, 0, PAGE_SIZE);
 		rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode);
 		if (rc) {
 			printk(KERN_DEBUG "Valid eCryptfs headers not found in "
@@ -1475,7 +1475,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
 	}
 out:
 	if (page_virt) {
-		memset(page_virt, 0, PAGE_CACHE_SIZE);
+		memset(page_virt, 0, PAGE_SIZE);
 		kmem_cache_free(ecryptfs_header_cache, page_virt);
 	}
 	return rc;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 121114e9a464..2a988f3a0cdb 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -765,8 +765,8 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
 		 * in which ia->ia_size is located. Fill in the end of
 		 * that page from (ia->ia_size & ~PAGE_CACHE_MASK) to
 		 * PAGE_CACHE_SIZE with zeros. */
-		size_t num_zeros = (PAGE_CACHE_SIZE
-				    - (ia->ia_size & ~PAGE_CACHE_MASK));
+		size_t num_zeros = (PAGE_SIZE
+				    - (ia->ia_size & ~PAGE_MASK));
 
 		if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
 			truncate_setsize(inode, ia->ia_size);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 9893d1538122..3cf1546dca82 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1798,7 +1798,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
 	 * added the our &auth_tok_list */
 	next_packet_is_auth_tok_packet = 1;
 	while (next_packet_is_auth_tok_packet) {
-		size_t max_packet_size = ((PAGE_CACHE_SIZE - 8) - i);
+		size_t max_packet_size = ((PAGE_SIZE - 8) - i);
 
 		switch (src[i]) {
 		case ECRYPTFS_TAG_3_PACKET_TYPE:
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 8b0b4a73116d..1698132d0e57 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -695,12 +695,12 @@ static struct ecryptfs_cache_info {
 	{
 		.cache = &ecryptfs_header_cache,
 		.name = "ecryptfs_headers",
-		.size = PAGE_CACHE_SIZE,
+		.size = PAGE_SIZE,
 	},
 	{
 		.cache = &ecryptfs_xattr_cache,
 		.name = "ecryptfs_xattr_cache",
-		.size = PAGE_CACHE_SIZE,
+		.size = PAGE_SIZE,
 	},
 	{
 		.cache = &ecryptfs_key_record_cache,
@@ -818,7 +818,7 @@ static int __init ecryptfs_init(void)
 {
 	int rc;
 
-	if (ECRYPTFS_DEFAULT_EXTENT_SIZE > PAGE_CACHE_SIZE) {
+	if (ECRYPTFS_DEFAULT_EXTENT_SIZE > PAGE_SIZE) {
 		rc = -EINVAL;
 		ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is "
 				"larger than the host's page size, and so "
@@ -826,7 +826,7 @@ static int __init ecryptfs_init(void)
 				"default eCryptfs extent size is [%u] bytes; "
 				"the page size is [%lu] bytes.\n",
 				ECRYPTFS_DEFAULT_EXTENT_SIZE,
-				(unsigned long)PAGE_CACHE_SIZE);
+				(unsigned long)PAGE_SIZE);
 		goto out;
 	}
 	rc = ecryptfs_init_kmem_caches();
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 1f5865263b3e..e6b1d80952b9 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -122,7 +122,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
 				       struct ecryptfs_crypt_stat *crypt_stat)
 {
 	loff_t extent_num_in_page = 0;
-	loff_t num_extents_per_page = (PAGE_CACHE_SIZE
+	loff_t num_extents_per_page = (PAGE_SIZE
 				       / crypt_stat->extent_size);
 	int rc = 0;
 
@@ -138,7 +138,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
 			char *page_virt;
 
 			page_virt = kmap_atomic(page);
-			memset(page_virt, 0, PAGE_CACHE_SIZE);
+			memset(page_virt, 0, PAGE_SIZE);
 			/* TODO: Support more than one header extent */
 			if (view_extent_num == 0) {
 				size_t written;
@@ -164,8 +164,8 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
 				 - crypt_stat->metadata_size);
 
 			rc = ecryptfs_read_lower_page_segment(
-				page, (lower_offset >> PAGE_CACHE_SHIFT),
-				(lower_offset & ~PAGE_CACHE_MASK),
+				page, (lower_offset >> PAGE_SHIFT),
+				(lower_offset & ~PAGE_MASK),
 				crypt_stat->extent_size, page->mapping->host);
 			if (rc) {
 				printk(KERN_ERR "%s: Error attempting to read "
@@ -198,7 +198,7 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
 
 	if (!crypt_stat || !(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
 		rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
-						      PAGE_CACHE_SIZE,
+						      PAGE_SIZE,
 						      page->mapping->host);
 	} else if (crypt_stat->flags & ECRYPTFS_VIEW_AS_ENCRYPTED) {
 		if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
@@ -215,7 +215,7 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
 
 		} else {
 			rc = ecryptfs_read_lower_page_segment(
-				page, page->index, 0, PAGE_CACHE_SIZE,
+				page, page->index, 0, PAGE_SIZE,
 				page->mapping->host);
 			if (rc) {
 				printk(KERN_ERR "Error reading page; rc = "
@@ -250,12 +250,12 @@ static int fill_zeros_to_end_of_page(struct page *page, unsigned int to)
 	struct inode *inode = page->mapping->host;
 	int end_byte_in_page;
 
-	if ((i_size_read(inode) / PAGE_CACHE_SIZE) != page->index)
+	if ((i_size_read(inode) / PAGE_SIZE) != page->index)
 		goto out;
-	end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE;
+	end_byte_in_page = i_size_read(inode) % PAGE_SIZE;
 	if (to > end_byte_in_page)
 		end_byte_in_page = to;
-	zero_user_segment(page, end_byte_in_page, PAGE_CACHE_SIZE);
+	zero_user_segment(page, end_byte_in_page, PAGE_SIZE);
 out:
 	return 0;
 }
@@ -279,7 +279,7 @@ static int ecryptfs_write_begin(struct file *file,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	struct page *page;
 	loff_t prev_page_end_size;
 	int rc = 0;
@@ -289,14 +289,14 @@ static int ecryptfs_write_begin(struct file *file,
 		return -ENOMEM;
 	*pagep = page;
 
-	prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT);
+	prev_page_end_size = ((loff_t)index << PAGE_SHIFT);
 	if (!PageUptodate(page)) {
 		struct ecryptfs_crypt_stat *crypt_stat =
 			&ecryptfs_inode_to_private(mapping->host)->crypt_stat;
 
 		if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
 			rc = ecryptfs_read_lower_page_segment(
-				page, index, 0, PAGE_CACHE_SIZE, mapping->host);
+				page, index, 0, PAGE_SIZE, mapping->host);
 			if (rc) {
 				printk(KERN_ERR "%s: Error attempting to read "
 				       "lower page segment; rc = [%d]\n",
@@ -322,7 +322,7 @@ static int ecryptfs_write_begin(struct file *file,
 				SetPageUptodate(page);
 			} else {
 				rc = ecryptfs_read_lower_page_segment(
-					page, index, 0, PAGE_CACHE_SIZE,
+					page, index, 0, PAGE_SIZE,
 					mapping->host);
 				if (rc) {
 					printk(KERN_ERR "%s: Error reading "
@@ -336,9 +336,9 @@ static int ecryptfs_write_begin(struct file *file,
 		} else {
 			if (prev_page_end_size
 			    >= i_size_read(page->mapping->host)) {
-				zero_user(page, 0, PAGE_CACHE_SIZE);
+				zero_user(page, 0, PAGE_SIZE);
 				SetPageUptodate(page);
-			} else if (len < PAGE_CACHE_SIZE) {
+			} else if (len < PAGE_SIZE) {
 				rc = ecryptfs_decrypt_page(page);
 				if (rc) {
 					printk(KERN_ERR "%s: Error decrypting "
@@ -371,11 +371,11 @@ static int ecryptfs_write_begin(struct file *file,
 	 * of page?  Zero it out. */
 	if ((i_size_read(mapping->host) == prev_page_end_size)
 	    && (pos != 0))
-		zero_user(page, 0, PAGE_CACHE_SIZE);
+		zero_user(page, 0, PAGE_SIZE);
 out:
 	if (unlikely(rc)) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		*pagep = NULL;
 	}
 	return rc;
@@ -437,7 +437,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
 	}
 	inode_lock(lower_inode);
 	size = lower_inode->i_op->getxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
-					   xattr_virt, PAGE_CACHE_SIZE);
+					   xattr_virt, PAGE_SIZE);
 	if (size < 0)
 		size = 8;
 	put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
@@ -479,8 +479,8 @@ static int ecryptfs_write_end(struct file *file,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata)
 {
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	pgoff_t index = pos >> PAGE_SHIFT;
+	unsigned from = pos & (PAGE_SIZE - 1);
 	unsigned to = from + copied;
 	struct inode *ecryptfs_inode = mapping->host;
 	struct ecryptfs_crypt_stat *crypt_stat =
@@ -500,7 +500,7 @@ static int ecryptfs_write_end(struct file *file,
 		goto out;
 	}
 	if (!PageUptodate(page)) {
-		if (copied < PAGE_CACHE_SIZE) {
+		if (copied < PAGE_SIZE) {
 			rc = 0;
 			goto out;
 		}
@@ -533,7 +533,7 @@ static int ecryptfs_write_end(struct file *file,
 		rc = copied;
 out:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return rc;
 }
 
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 09fe622274e4..158a3a39f82d 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -74,7 +74,7 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
 	loff_t offset;
 	int rc;
 
-	offset = ((((loff_t)page_for_lower->index) << PAGE_CACHE_SHIFT)
+	offset = ((((loff_t)page_for_lower->index) << PAGE_SHIFT)
 		  + offset_in_page);
 	virt = kmap(page_for_lower);
 	rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size);
@@ -123,9 +123,9 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
 	else
 		pos = offset;
 	while (pos < (offset + size)) {
-		pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT);
-		size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK);
-		size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page);
+		pgoff_t ecryptfs_page_idx = (pos >> PAGE_SHIFT);
+		size_t start_offset_in_page = (pos & ~PAGE_MASK);
+		size_t num_bytes = (PAGE_SIZE - start_offset_in_page);
 		loff_t total_remaining_bytes = ((offset + size) - pos);
 
 		if (fatal_signal_pending(current)) {
@@ -165,7 +165,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
 			 * Fill in zero values to the end of the page */
 			memset(((char *)ecryptfs_page_virt
 				+ start_offset_in_page), 0,
-				PAGE_CACHE_SIZE - start_offset_in_page);
+				PAGE_SIZE - start_offset_in_page);
 		}
 
 		/* pos >= offset, we are now writing the data request */
@@ -186,7 +186,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
 						ecryptfs_page,
 						start_offset_in_page,
 						data_offset);
-		page_cache_release(ecryptfs_page);
+		put_page(ecryptfs_page);
 		if (rc) {
 			printk(KERN_ERR "%s: Error encrypting "
 			       "page; rc = [%d]\n", __func__, rc);
@@ -262,7 +262,7 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
 	loff_t offset;
 	int rc;
 
-	offset = ((((loff_t)page_index) << PAGE_CACHE_SHIFT) + offset_in_page);
+	offset = ((((loff_t)page_index) << PAGE_SHIFT) + offset_in_page);
 	virt = kmap(page_for_ecryptfs);
 	rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode);
 	if (rc > 0)
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index dd029d13ea61..553c5d2db4a4 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -197,8 +197,8 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent)
 	efivarfs_sb = sb;
 
 	sb->s_maxbytes          = MAX_LFS_FILESIZE;
-	sb->s_blocksize         = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits    = PAGE_CACHE_SHIFT;
+	sb->s_blocksize         = PAGE_SIZE;
+	sb->s_blocksize_bits    = PAGE_SHIFT;
 	sb->s_magic             = EFIVARFS_MAGIC;
 	sb->s_op                = &efivarfs_ops;
 	sb->s_d_op		= &efivarfs_d_ops;
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index e5bb2abf77f9..547b93cbea63 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -41,16 +41,16 @@ static inline unsigned exofs_chunk_size(struct inode *inode)
 static inline void exofs_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 static unsigned exofs_last_byte(struct inode *inode, unsigned long page_nr)
 {
 	loff_t last_byte = inode->i_size;
 
-	last_byte -= page_nr << PAGE_CACHE_SHIFT;
-	if (last_byte > PAGE_CACHE_SIZE)
-		last_byte = PAGE_CACHE_SIZE;
+	last_byte -= page_nr << PAGE_SHIFT;
+	if (last_byte > PAGE_SIZE)
+		last_byte = PAGE_SIZE;
 	return last_byte;
 }
 
@@ -85,13 +85,13 @@ static void exofs_check_page(struct page *page)
 	unsigned chunk_size = exofs_chunk_size(dir);
 	char *kaddr = page_address(page);
 	unsigned offs, rec_len;
-	unsigned limit = PAGE_CACHE_SIZE;
+	unsigned limit = PAGE_SIZE;
 	struct exofs_dir_entry *p;
 	char *error;
 
 	/* if the page is the last one in the directory */
-	if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
-		limit = dir->i_size & ~PAGE_CACHE_MASK;
+	if ((dir->i_size >> PAGE_SHIFT) == page->index) {
+		limit = dir->i_size & ~PAGE_MASK;
 		if (limit & (chunk_size - 1))
 			goto Ebadsize;
 		if (!limit)
@@ -138,7 +138,7 @@ bad_entry:
 	EXOFS_ERR(
 		"ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - "
 		"offset=%lu, inode=0x%llu, rec_len=%d, name_len=%d\n",
-		dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+		dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
 		_LLU(le64_to_cpu(p->inode_no)),
 		rec_len, p->name_len);
 	goto fail;
@@ -147,7 +147,7 @@ Eend:
 	EXOFS_ERR("ERROR [exofs_check_page]: "
 		"entry in directory(0x%lx) spans the page boundary"
 		"offset=%lu, inode=0x%llx\n",
-		dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
+		dir->i_ino, (page->index<<PAGE_SHIFT)+offs,
 		_LLU(le64_to_cpu(p->inode_no)));
 fail:
 	SetPageChecked(page);
@@ -237,8 +237,8 @@ exofs_readdir(struct file *file, struct dir_context *ctx)
 {
 	loff_t pos = ctx->pos;
 	struct inode *inode = file_inode(file);
-	unsigned int offset = pos & ~PAGE_CACHE_MASK;
-	unsigned long n = pos >> PAGE_CACHE_SHIFT;
+	unsigned int offset = pos & ~PAGE_MASK;
+	unsigned long n = pos >> PAGE_SHIFT;
 	unsigned long npages = dir_pages(inode);
 	unsigned chunk_mask = ~(exofs_chunk_size(inode)-1);
 	int need_revalidate = (file->f_version != inode->i_version);
@@ -254,7 +254,7 @@ exofs_readdir(struct file *file, struct dir_context *ctx)
 		if (IS_ERR(page)) {
 			EXOFS_ERR("ERROR: bad page in directory(0x%lx)\n",
 				  inode->i_ino);
-			ctx->pos += PAGE_CACHE_SIZE - offset;
+			ctx->pos += PAGE_SIZE - offset;
 			return PTR_ERR(page);
 		}
 		kaddr = page_address(page);
@@ -262,7 +262,7 @@ exofs_readdir(struct file *file, struct dir_context *ctx)
 			if (offset) {
 				offset = exofs_validate_entry(kaddr, offset,
 								chunk_mask);
-				ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset;
+				ctx->pos = (n<<PAGE_SHIFT) + offset;
 			}
 			file->f_version = inode->i_version;
 			need_revalidate = 0;
@@ -449,7 +449,7 @@ int exofs_add_link(struct dentry *dentry, struct inode *inode)
 		kaddr = page_address(page);
 		dir_end = kaddr + exofs_last_byte(dir, n);
 		de = (struct exofs_dir_entry *)kaddr;
-		kaddr += PAGE_CACHE_SIZE - reclen;
+		kaddr += PAGE_SIZE - reclen;
 		while ((char *)de <= kaddr) {
 			if ((char *)de == dir_end) {
 				name_len = 0;
@@ -602,7 +602,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent)
 	kunmap_atomic(kaddr);
 	err = exofs_commit_chunk(page, 0, chunk_size);
 fail:
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 9eaf595aeaf8..49e1bd00b4ec 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -317,7 +317,7 @@ static int read_exec(struct page_collect *pcol)
 
 	if (!pcol->ios) {
 		int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, true,
-					     pcol->pg_first << PAGE_CACHE_SHIFT,
+					     pcol->pg_first << PAGE_SHIFT,
 					     pcol->length, &pcol->ios);
 
 		if (ret)
@@ -383,7 +383,7 @@ static int readpage_strip(void *data, struct page *page)
 	struct inode *inode = pcol->inode;
 	struct exofs_i_info *oi = exofs_i(inode);
 	loff_t i_size = i_size_read(inode);
-	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	pgoff_t end_index = i_size >> PAGE_SHIFT;
 	size_t len;
 	int ret;
 
@@ -397,9 +397,9 @@ static int readpage_strip(void *data, struct page *page)
 	pcol->that_locked_page = page;
 
 	if (page->index < end_index)
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 	else if (page->index == end_index)
-		len = i_size & ~PAGE_CACHE_MASK;
+		len = i_size & ~PAGE_MASK;
 	else
 		len = 0;
 
@@ -442,8 +442,8 @@ try_again:
 			goto fail;
 	}
 
-	if (len != PAGE_CACHE_SIZE)
-		zero_user(page, len, PAGE_CACHE_SIZE - len);
+	if (len != PAGE_SIZE)
+		zero_user(page, len, PAGE_SIZE - len);
 
 	EXOFS_DBGMSG2("    readpage_strip(0x%lx, 0x%lx) len=0x%zx\n",
 		     inode->i_ino, page->index, len);
@@ -609,7 +609,7 @@ static void __r4w_put_page(void *priv, struct page *page)
 
 	if ((pcol->that_locked_page != page) && (ZERO_PAGE(0) != page)) {
 		EXOFS_DBGMSG2("index=0x%lx\n", page->index);
-		page_cache_release(page);
+		put_page(page);
 		return;
 	}
 	EXOFS_DBGMSG2("that_locked_page index=0x%lx\n",
@@ -633,7 +633,7 @@ static int write_exec(struct page_collect *pcol)
 
 	BUG_ON(pcol->ios);
 	ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, false,
-				 pcol->pg_first << PAGE_CACHE_SHIFT,
+				 pcol->pg_first << PAGE_SHIFT,
 				 pcol->length, &pcol->ios);
 	if (unlikely(ret))
 		goto err;
@@ -696,7 +696,7 @@ static int writepage_strip(struct page *page,
 	struct inode *inode = pcol->inode;
 	struct exofs_i_info *oi = exofs_i(inode);
 	loff_t i_size = i_size_read(inode);
-	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	pgoff_t end_index = i_size >> PAGE_SHIFT;
 	size_t len;
 	int ret;
 
@@ -708,9 +708,9 @@ static int writepage_strip(struct page *page,
 
 	if (page->index < end_index)
 		/* in this case, the page is within the limits of the file */
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 	else {
-		len = i_size & ~PAGE_CACHE_MASK;
+		len = i_size & ~PAGE_MASK;
 
 		if (page->index > end_index || !len) {
 			/* in this case, the page is outside the limits
@@ -790,10 +790,10 @@ static int exofs_writepages(struct address_space *mapping,
 	long start, end, expected_pages;
 	int ret;
 
-	start = wbc->range_start >> PAGE_CACHE_SHIFT;
+	start = wbc->range_start >> PAGE_SHIFT;
 	end = (wbc->range_end == LLONG_MAX) ?
 			start + mapping->nrpages :
-			wbc->range_end >> PAGE_CACHE_SHIFT;
+			wbc->range_end >> PAGE_SHIFT;
 
 	if (start || end)
 		expected_pages = end - start + 1;
@@ -881,15 +881,15 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
 	}
 
 	 /* read modify write */
-	if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
+	if (!PageUptodate(page) && (len != PAGE_SIZE)) {
 		loff_t i_size = i_size_read(mapping->host);
-		pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+		pgoff_t end_index = i_size >> PAGE_SHIFT;
 		size_t rlen;
 
 		if (page->index < end_index)
-			rlen = PAGE_CACHE_SIZE;
+			rlen = PAGE_SIZE;
 		else if (page->index == end_index)
-			rlen = i_size & ~PAGE_CACHE_MASK;
+			rlen = i_size & ~PAGE_MASK;
 		else
 			rlen = 0;
 
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index c20d77df2679..622a686bb08b 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -292,11 +292,11 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
 out_dir:
 	if (dir_de) {
 		kunmap(dir_page);
-		page_cache_release(dir_page);
+		put_page(dir_page);
 	}
 out_old:
 	kunmap(old_page);
-	page_cache_release(old_page);
+	put_page(old_page);
 out:
 	return err;
 }
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 0c6638b40f21..1b45694de316 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -67,7 +67,7 @@ static inline unsigned ext2_chunk_size(struct inode *inode)
 static inline void ext2_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 /*
@@ -79,9 +79,9 @@ ext2_last_byte(struct inode *inode, unsigned long page_nr)
 {
 	unsigned last_byte = inode->i_size;
 
-	last_byte -= page_nr << PAGE_CACHE_SHIFT;
-	if (last_byte > PAGE_CACHE_SIZE)
-		last_byte = PAGE_CACHE_SIZE;
+	last_byte -= page_nr << PAGE_SHIFT;
+	if (last_byte > PAGE_SIZE)
+		last_byte = PAGE_SIZE;
 	return last_byte;
 }
 
@@ -118,12 +118,12 @@ static void ext2_check_page(struct page *page, int quiet)
 	char *kaddr = page_address(page);
 	u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count);
 	unsigned offs, rec_len;
-	unsigned limit = PAGE_CACHE_SIZE;
+	unsigned limit = PAGE_SIZE;
 	ext2_dirent *p;
 	char *error;
 
-	if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
-		limit = dir->i_size & ~PAGE_CACHE_MASK;
+	if ((dir->i_size >> PAGE_SHIFT) == page->index) {
+		limit = dir->i_size & ~PAGE_MASK;
 		if (limit & (chunk_size - 1))
 			goto Ebadsize;
 		if (!limit)
@@ -176,7 +176,7 @@ bad_entry:
 	if (!quiet)
 		ext2_error(sb, __func__, "bad entry in directory #%lu: : %s - "
 			"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
-			dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+			dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
 			(unsigned long) le32_to_cpu(p->inode),
 			rec_len, p->name_len);
 	goto fail;
@@ -186,7 +186,7 @@ Eend:
 		ext2_error(sb, "ext2_check_page",
 			"entry in directory #%lu spans the page boundary"
 			"offset=%lu, inode=%lu",
-			dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
+			dir->i_ino, (page->index<<PAGE_SHIFT)+offs,
 			(unsigned long) le32_to_cpu(p->inode));
 	}
 fail:
@@ -287,8 +287,8 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
 	loff_t pos = ctx->pos;
 	struct inode *inode = file_inode(file);
 	struct super_block *sb = inode->i_sb;
-	unsigned int offset = pos & ~PAGE_CACHE_MASK;
-	unsigned long n = pos >> PAGE_CACHE_SHIFT;
+	unsigned int offset = pos & ~PAGE_MASK;
+	unsigned long n = pos >> PAGE_SHIFT;
 	unsigned long npages = dir_pages(inode);
 	unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
 	unsigned char *types = NULL;
@@ -309,14 +309,14 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
 			ext2_error(sb, __func__,
 				   "bad page in #%lu",
 				   inode->i_ino);
-			ctx->pos += PAGE_CACHE_SIZE - offset;
+			ctx->pos += PAGE_SIZE - offset;
 			return PTR_ERR(page);
 		}
 		kaddr = page_address(page);
 		if (unlikely(need_revalidate)) {
 			if (offset) {
 				offset = ext2_validate_entry(kaddr, offset, chunk_mask);
-				ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset;
+				ctx->pos = (n<<PAGE_SHIFT) + offset;
 			}
 			file->f_version = inode->i_version;
 			need_revalidate = 0;
@@ -406,7 +406,7 @@ struct ext2_dir_entry_2 *ext2_find_entry (struct inode * dir,
 		if (++n >= npages)
 			n = 0;
 		/* next page is past the blocks we've got */
-		if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
+		if (unlikely(n > (dir->i_blocks >> (PAGE_SHIFT - 9)))) {
 			ext2_error(dir->i_sb, __func__,
 				"dir %lu size %lld exceeds block count %llu",
 				dir->i_ino, dir->i_size,
@@ -511,7 +511,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 		kaddr = page_address(page);
 		dir_end = kaddr + ext2_last_byte(dir, n);
 		de = (ext2_dirent *)kaddr;
-		kaddr += PAGE_CACHE_SIZE - reclen;
+		kaddr += PAGE_SIZE - reclen;
 		while ((char *)de <= kaddr) {
 			if ((char *)de == dir_end) {
 				/* We hit i_size */
@@ -655,7 +655,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
 	kunmap_atomic(kaddr);
 	err = ext2_commit_chunk(page, 0, chunk_size);
 fail:
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 7a2be8f7f3c3..d34843925b23 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -398,7 +398,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 			ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0);
 		else {
 			kunmap(dir_page);
-			page_cache_release(dir_page);
+			put_page(dir_page);
 		}
 		inode_dec_link_count(old_dir);
 	}
@@ -408,11 +408,11 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 out_dir:
 	if (dir_de) {
 		kunmap(dir_page);
-		page_cache_release(dir_page);
+		put_page(dir_page);
 	}
 out_old:
 	kunmap(old_page);
-	page_cache_release(old_page);
+	put_page(old_page);
 out:
 	return err;
 }
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index edc053a81914..2580ef3346ca 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -283,10 +283,10 @@ static int ext4_page_crypto(struct inode *inode,
 	       EXT4_XTS_TWEAK_SIZE - sizeof(index));
 
 	sg_init_table(&dst, 1);
-	sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
+	sg_set_page(&dst, dest_page, PAGE_SIZE, 0);
 	sg_init_table(&src, 1);
-	sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
-	skcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
+	sg_set_page(&src, src_page, PAGE_SIZE, 0);
+	skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE,
 				   xts_tweak);
 	if (rw == EXT4_DECRYPT)
 		res = crypto_skcipher_decrypt(req);
@@ -396,7 +396,7 @@ int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
 		 (unsigned long) inode->i_ino, lblk, len);
 #endif
 
-	BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE);
+	BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
 
 	ctx = ext4_get_crypto_ctx(inode);
 	if (IS_ERR(ctx))
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 50ba27cbed03..561d7308b393 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -155,13 +155,13 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 		err = ext4_map_blocks(NULL, inode, &map, 0);
 		if (err > 0) {
 			pgoff_t index = map.m_pblk >>
-					(PAGE_CACHE_SHIFT - inode->i_blkbits);
+					(PAGE_SHIFT - inode->i_blkbits);
 			if (!ra_has_index(&file->f_ra, index))
 				page_cache_sync_readahead(
 					sb->s_bdev->bd_inode->i_mapping,
 					&file->f_ra, file,
 					index, 1);
-			file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
+			file->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
 			bh = ext4_bread(NULL, inode, map.m_lblk, 0);
 			if (IS_ERR(bh)) {
 				err = PTR_ERR(bh);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 6659e216385e..0caece398eb8 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -428,8 +428,8 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 	lastoff = startoff;
 	endoff = (loff_t)end_blk << blkbits;
 
-	index = startoff >> PAGE_CACHE_SHIFT;
-	end = endoff >> PAGE_CACHE_SHIFT;
+	index = startoff >> PAGE_SHIFT;
+	end = endoff >> PAGE_SHIFT;
 
 	pagevec_init(&pvec, 0);
 	do {
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 7cbdd3752ba5..7bc6c855cc18 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -482,7 +482,7 @@ static int ext4_read_inline_page(struct inode *inode, struct page *page)
 	ret = ext4_read_inline_data(inode, kaddr, len, &iloc);
 	flush_dcache_page(page);
 	kunmap_atomic(kaddr);
-	zero_user_segment(page, len, PAGE_CACHE_SIZE);
+	zero_user_segment(page, len, PAGE_SIZE);
 	SetPageUptodate(page);
 	brelse(iloc.bh);
 
@@ -507,7 +507,7 @@ int ext4_readpage_inline(struct inode *inode, struct page *page)
 	if (!page->index)
 		ret = ext4_read_inline_page(inode, page);
 	else if (!PageUptodate(page)) {
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
 	}
 
@@ -595,7 +595,7 @@ retry:
 
 	if (ret) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 		ext4_orphan_add(handle, inode);
 		up_write(&EXT4_I(inode)->xattr_sem);
@@ -621,7 +621,7 @@ retry:
 out:
 	if (page) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	if (sem_held)
 		up_write(&EXT4_I(inode)->xattr_sem);
@@ -690,7 +690,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
 	if (!ext4_has_inline_data(inode)) {
 		ret = 0;
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		goto out_up_read;
 	}
 
@@ -815,7 +815,7 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
 	if (ret) {
 		up_read(&EXT4_I(inode)->xattr_sem);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		ext4_truncate_failed_write(inode);
 		return ret;
 	}
@@ -829,7 +829,7 @@ out:
 	up_read(&EXT4_I(inode)->xattr_sem);
 	if (page) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return ret;
 }
@@ -919,7 +919,7 @@ retry_journal:
 out_release_page:
 	up_read(&EXT4_I(inode)->xattr_sem);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 out_journal:
 	ext4_journal_stop(handle);
 out:
@@ -947,7 +947,7 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
 		i_size_changed = 1;
 	}
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	/*
 	 * Don't mark the inode dirty under page lock. First, it unnecessarily
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index dab84a2530ff..8a43c683eef9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1057,7 +1057,7 @@ int do_journal_get_write_access(handle_t *handle,
 static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
 				  get_block_t *get_block)
 {
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from = pos & (PAGE_SIZE - 1);
 	unsigned to = from + len;
 	struct inode *inode = page->mapping->host;
 	unsigned block_start, block_end;
@@ -1069,15 +1069,15 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
 	bool decrypt = false;
 
 	BUG_ON(!PageLocked(page));
-	BUG_ON(from > PAGE_CACHE_SIZE);
-	BUG_ON(to > PAGE_CACHE_SIZE);
+	BUG_ON(from > PAGE_SIZE);
+	BUG_ON(to > PAGE_SIZE);
 	BUG_ON(from > to);
 
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, blocksize, 0);
 	head = page_buffers(page);
 	bbits = ilog2(blocksize);
-	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+	block = (sector_t)page->index << (PAGE_SHIFT - bbits);
 
 	for (bh = head, block_start = 0; bh != head || !block_start;
 	    block++, block_start = block_end, bh = bh->b_this_page) {
@@ -1159,8 +1159,8 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
 	 * we allocate blocks but write fails for some reason
 	 */
 	needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
-	index = pos >> PAGE_CACHE_SHIFT;
-	from = pos & (PAGE_CACHE_SIZE - 1);
+	index = pos >> PAGE_SHIFT;
+	from = pos & (PAGE_SIZE - 1);
 	to = from + len;
 
 	if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -1188,7 +1188,7 @@ retry_grab:
 retry_journal:
 	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
 	if (IS_ERR(handle)) {
-		page_cache_release(page);
+		put_page(page);
 		return PTR_ERR(handle);
 	}
 
@@ -1196,7 +1196,7 @@ retry_journal:
 	if (page->mapping != mapping) {
 		/* The page got truncated from under us */
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		ext4_journal_stop(handle);
 		goto retry_grab;
 	}
@@ -1252,7 +1252,7 @@ retry_journal:
 		if (ret == -ENOSPC &&
 		    ext4_should_retry_alloc(inode->i_sb, &retries))
 			goto retry_journal;
-		page_cache_release(page);
+		put_page(page);
 		return ret;
 	}
 	*pagep = page;
@@ -1295,7 +1295,7 @@ static int ext4_write_end(struct file *file,
 		ret = ext4_jbd2_file_inode(handle, inode);
 		if (ret) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			goto errout;
 		}
 	}
@@ -1315,7 +1315,7 @@ static int ext4_write_end(struct file *file,
 	 */
 	i_size_changed = ext4_update_inode_size(inode, pos + copied);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (old_size < pos)
 		pagecache_isize_extended(inode, old_size, pos);
@@ -1399,7 +1399,7 @@ static int ext4_journalled_write_end(struct file *file,
 	int size_changed = 0;
 
 	trace_ext4_journalled_write_end(inode, pos, len, copied);
-	from = pos & (PAGE_CACHE_SIZE - 1);
+	from = pos & (PAGE_SIZE - 1);
 	to = from + len;
 
 	BUG_ON(!ext4_handle_valid(handle));
@@ -1423,7 +1423,7 @@ static int ext4_journalled_write_end(struct file *file,
 	ext4_set_inode_state(inode, EXT4_STATE_JDATA);
 	EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (old_size < pos)
 		pagecache_isize_extended(inode, old_size, pos);
@@ -1537,7 +1537,7 @@ static void ext4_da_page_release_reservation(struct page *page,
 	int num_clusters;
 	ext4_fsblk_t lblk;
 
-	BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
+	BUG_ON(stop > PAGE_SIZE || stop < length);
 
 	head = page_buffers(page);
 	bh = head;
@@ -1553,7 +1553,7 @@ static void ext4_da_page_release_reservation(struct page *page,
 			clear_buffer_delay(bh);
 		} else if (contiguous_blks) {
 			lblk = page->index <<
-			       (PAGE_CACHE_SHIFT - inode->i_blkbits);
+			       (PAGE_SHIFT - inode->i_blkbits);
 			lblk += (curr_off >> inode->i_blkbits) -
 				contiguous_blks;
 			ext4_es_remove_extent(inode, lblk, contiguous_blks);
@@ -1563,7 +1563,7 @@ static void ext4_da_page_release_reservation(struct page *page,
 	} while ((bh = bh->b_this_page) != head);
 
 	if (contiguous_blks) {
-		lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		lblk = page->index << (PAGE_SHIFT - inode->i_blkbits);
 		lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
 		ext4_es_remove_extent(inode, lblk, contiguous_blks);
 	}
@@ -1572,7 +1572,7 @@ static void ext4_da_page_release_reservation(struct page *page,
 	 * need to release the reserved space for that cluster. */
 	num_clusters = EXT4_NUM_B2C(sbi, to_release);
 	while (num_clusters > 0) {
-		lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
+		lblk = (page->index << (PAGE_SHIFT - inode->i_blkbits)) +
 			((num_clusters - 1) << sbi->s_cluster_bits);
 		if (sbi->s_cluster_ratio == 1 ||
 		    !ext4_find_delalloc_cluster(inode, lblk))
@@ -1619,8 +1619,8 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
 	end   = mpd->next_page - 1;
 	if (invalidate) {
 		ext4_lblk_t start, last;
-		start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-		last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		start = index << (PAGE_SHIFT - inode->i_blkbits);
+		last = end << (PAGE_SHIFT - inode->i_blkbits);
 		ext4_es_remove_extent(inode, start, last - start + 1);
 	}
 
@@ -1636,7 +1636,7 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
 			BUG_ON(!PageLocked(page));
 			BUG_ON(PageWriteback(page));
 			if (invalidate) {
-				block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+				block_invalidatepage(page, 0, PAGE_SIZE);
 				ClearPageUptodate(page);
 			}
 			unlock_page(page);
@@ -2007,10 +2007,10 @@ static int ext4_writepage(struct page *page,
 
 	trace_ext4_writepage(page);
 	size = i_size_read(inode);
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
+	if (page->index == size >> PAGE_SHIFT)
+		len = size & ~PAGE_MASK;
 	else
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 
 	page_bufs = page_buffers(page);
 	/*
@@ -2034,7 +2034,7 @@ static int ext4_writepage(struct page *page,
 				   ext4_bh_delay_or_unwritten)) {
 		redirty_page_for_writepage(wbc, page);
 		if ((current->flags & PF_MEMALLOC) ||
-		    (inode->i_sb->s_blocksize == PAGE_CACHE_SIZE)) {
+		    (inode->i_sb->s_blocksize == PAGE_SIZE)) {
 			/*
 			 * For memory cleaning there's no point in writing only
 			 * some buffers. So just bail out. Warn if we came here
@@ -2076,10 +2076,10 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
 	int err;
 
 	BUG_ON(page->index != mpd->first_page);
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
+	if (page->index == size >> PAGE_SHIFT)
+		len = size & ~PAGE_MASK;
 	else
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 	clear_page_dirty_for_io(page);
 	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
 	if (!err)
@@ -2213,7 +2213,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
 	int nr_pages, i;
 	struct inode *inode = mpd->inode;
 	struct buffer_head *head, *bh;
-	int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
+	int bpp_bits = PAGE_SHIFT - inode->i_blkbits;
 	pgoff_t start, end;
 	ext4_lblk_t lblk;
 	sector_t pblock;
@@ -2274,7 +2274,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
 			 * supports blocksize < pagesize as we will try to
 			 * convert potentially unmapped parts of inode.
 			 */
-			mpd->io_submit.io_end->size += PAGE_CACHE_SIZE;
+			mpd->io_submit.io_end->size += PAGE_SIZE;
 			/* Page fully mapped - let IO run! */
 			err = mpage_submit_page(mpd, page);
 			if (err < 0) {
@@ -2426,7 +2426,7 @@ update_disksize:
 	 * Update on-disk size after IO is submitted.  Races with
 	 * truncate are avoided by checking i_size under i_data_sem.
 	 */
-	disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
+	disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT;
 	if (disksize > EXT4_I(inode)->i_disksize) {
 		int err2;
 		loff_t i_size;
@@ -2562,7 +2562,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
 			mpd->next_page = page->index + 1;
 			/* Add all dirty buffers to mpd */
 			lblk = ((ext4_lblk_t)page->index) <<
-				(PAGE_CACHE_SHIFT - blkbits);
+				(PAGE_SHIFT - blkbits);
 			head = page_buffers(page);
 			err = mpage_process_page_bufs(mpd, head, head, lblk);
 			if (err <= 0)
@@ -2647,7 +2647,7 @@ static int ext4_writepages(struct address_space *mapping,
 		 * We may need to convert up to one extent per block in
 		 * the page and we may dirty the inode.
 		 */
-		rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
+		rsv_blocks = 1 + (PAGE_SIZE >> inode->i_blkbits);
 	}
 
 	/*
@@ -2678,8 +2678,8 @@ static int ext4_writepages(struct address_space *mapping,
 		mpd.first_page = writeback_index;
 		mpd.last_page = -1;
 	} else {
-		mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT;
-		mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT;
+		mpd.first_page = wbc->range_start >> PAGE_SHIFT;
+		mpd.last_page = wbc->range_end >> PAGE_SHIFT;
 	}
 
 	mpd.inode = inode;
@@ -2838,7 +2838,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
 	struct inode *inode = mapping->host;
 	handle_t *handle;
 
-	index = pos >> PAGE_CACHE_SHIFT;
+	index = pos >> PAGE_SHIFT;
 
 	if (ext4_nonda_switch(inode->i_sb)) {
 		*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
@@ -2881,7 +2881,7 @@ retry_journal:
 	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
 				ext4_da_write_credits(inode, pos, len));
 	if (IS_ERR(handle)) {
-		page_cache_release(page);
+		put_page(page);
 		return PTR_ERR(handle);
 	}
 
@@ -2889,7 +2889,7 @@ retry_journal:
 	if (page->mapping != mapping) {
 		/* The page got truncated from under us */
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		ext4_journal_stop(handle);
 		goto retry_grab;
 	}
@@ -2917,7 +2917,7 @@ retry_journal:
 		    ext4_should_retry_alloc(inode->i_sb, &retries))
 			goto retry_journal;
 
-		page_cache_release(page);
+		put_page(page);
 		return ret;
 	}
 
@@ -2965,7 +2965,7 @@ static int ext4_da_write_end(struct file *file,
 				      len, copied, page, fsdata);
 
 	trace_ext4_da_write_end(inode, pos, len, copied);
-	start = pos & (PAGE_CACHE_SIZE - 1);
+	start = pos & (PAGE_SIZE - 1);
 	end = start + copied - 1;
 
 	/*
@@ -3187,7 +3187,7 @@ static int __ext4_journalled_invalidatepage(struct page *page,
 	/*
 	 * If it's a full truncate we just forget about the pending dirtying
 	 */
-	if (offset == 0 && length == PAGE_CACHE_SIZE)
+	if (offset == 0 && length == PAGE_SIZE)
 		ClearPageChecked(page);
 
 	return jbd2_journal_invalidatepage(journal, page, offset, length);
@@ -3556,8 +3556,8 @@ void ext4_set_aops(struct inode *inode)
 static int __ext4_block_zero_page_range(handle_t *handle,
 		struct address_space *mapping, loff_t from, loff_t length)
 {
-	ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	ext4_fsblk_t index = from >> PAGE_SHIFT;
+	unsigned offset = from & (PAGE_SIZE-1);
 	unsigned blocksize, pos;
 	ext4_lblk_t iblock;
 	struct inode *inode = mapping->host;
@@ -3565,14 +3565,14 @@ static int __ext4_block_zero_page_range(handle_t *handle,
 	struct page *page;
 	int err = 0;
 
-	page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
+	page = find_or_create_page(mapping, from >> PAGE_SHIFT,
 				   mapping_gfp_constraint(mapping, ~__GFP_FS));
 	if (!page)
 		return -ENOMEM;
 
 	blocksize = inode->i_sb->s_blocksize;
 
-	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+	iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
 
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, blocksize, 0);
@@ -3614,7 +3614,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
 		    ext4_encrypted_inode(inode)) {
 			/* We expect the key to be set. */
 			BUG_ON(!ext4_has_encryption_key(inode));
-			BUG_ON(blocksize != PAGE_CACHE_SIZE);
+			BUG_ON(blocksize != PAGE_SIZE);
 			WARN_ON_ONCE(ext4_decrypt(page));
 		}
 	}
@@ -3638,7 +3638,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
 
 unlock:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 
@@ -3653,7 +3653,7 @@ static int ext4_block_zero_page_range(handle_t *handle,
 		struct address_space *mapping, loff_t from, loff_t length)
 {
 	struct inode *inode = mapping->host;
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	unsigned offset = from & (PAGE_SIZE-1);
 	unsigned blocksize = inode->i_sb->s_blocksize;
 	unsigned max = blocksize - (offset & (blocksize - 1));
 
@@ -3678,7 +3678,7 @@ static int ext4_block_zero_page_range(handle_t *handle,
 static int ext4_block_truncate_page(handle_t *handle,
 		struct address_space *mapping, loff_t from)
 {
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	unsigned offset = from & (PAGE_SIZE-1);
 	unsigned length;
 	unsigned blocksize;
 	struct inode *inode = mapping->host;
@@ -3816,7 +3816,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 	 */
 	if (offset + length > inode->i_size) {
 		length = inode->i_size +
-		   PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
+		   PAGE_SIZE - (inode->i_size & (PAGE_SIZE - 1)) -
 		   offset;
 	}
 
@@ -4891,23 +4891,23 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
 	tid_t commit_tid = 0;
 	int ret;
 
-	offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+	offset = inode->i_size & (PAGE_SIZE - 1);
 	/*
 	 * All buffers in the last page remain valid? Then there's nothing to
 	 * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
 	 * blocksize case
 	 */
-	if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
+	if (offset > PAGE_SIZE - (1 << inode->i_blkbits))
 		return;
 	while (1) {
 		page = find_lock_page(inode->i_mapping,
-				      inode->i_size >> PAGE_CACHE_SHIFT);
+				      inode->i_size >> PAGE_SHIFT);
 		if (!page)
 			return;
 		ret = __ext4_journalled_invalidatepage(page, offset,
-						PAGE_CACHE_SIZE - offset);
+						PAGE_SIZE - offset);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		if (ret != -EBUSY)
 			return;
 		commit_tid = 0;
@@ -5546,10 +5546,10 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 		goto out;
 	}
 
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
+	if (page->index == size >> PAGE_SHIFT)
+		len = size & ~PAGE_MASK;
 	else
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 	/*
 	 * Return if we have all the buffers mapped. This avoids the need to do
 	 * journal_start/journal_stop which can block and take a long time
@@ -5580,7 +5580,7 @@ retry_alloc:
 	ret = block_page_mkwrite(vma, vmf, get_block);
 	if (!ret && ext4_should_journal_data(inode)) {
 		if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
-			  PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
+			  PAGE_SIZE, NULL, do_journal_get_write_access)) {
 			unlock_page(page);
 			ret = VM_FAULT_SIGBUS;
 			ext4_journal_stop(handle);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 50e05df28f66..c12174711ce2 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -839,7 +839,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
 	sb = inode->i_sb;
 	ngroups = ext4_get_groups_count(sb);
 	blocksize = 1 << inode->i_blkbits;
-	blocks_per_page = PAGE_CACHE_SIZE / blocksize;
+	blocks_per_page = PAGE_SIZE / blocksize;
 
 	groups_per_page = blocks_per_page >> 1;
 	if (groups_per_page == 0)
@@ -993,7 +993,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
 	e4b->bd_buddy_page = NULL;
 	e4b->bd_bitmap_page = NULL;
 
-	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+	blocks_per_page = PAGE_SIZE / sb->s_blocksize;
 	/*
 	 * the buddy cache inode stores the block bitmap
 	 * and buddy information in consecutive blocks.
@@ -1028,11 +1028,11 @@ static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
 {
 	if (e4b->bd_bitmap_page) {
 		unlock_page(e4b->bd_bitmap_page);
-		page_cache_release(e4b->bd_bitmap_page);
+		put_page(e4b->bd_bitmap_page);
 	}
 	if (e4b->bd_buddy_page) {
 		unlock_page(e4b->bd_buddy_page);
-		page_cache_release(e4b->bd_buddy_page);
+		put_page(e4b->bd_buddy_page);
 	}
 }
 
@@ -1125,7 +1125,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
 	might_sleep();
 	mb_debug(1, "load group %u\n", group);
 
-	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+	blocks_per_page = PAGE_SIZE / sb->s_blocksize;
 	grp = ext4_get_group_info(sb, group);
 
 	e4b->bd_blkbits = sb->s_blocksize_bits;
@@ -1167,7 +1167,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
 			 * is yet to initialize the same. So
 			 * wait for it to initialize.
 			 */
-			page_cache_release(page);
+			put_page(page);
 		page = find_or_create_page(inode->i_mapping, pnum, gfp);
 		if (page) {
 			BUG_ON(page->mapping != inode->i_mapping);
@@ -1203,7 +1203,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
 	page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
 	if (page == NULL || !PageUptodate(page)) {
 		if (page)
-			page_cache_release(page);
+			put_page(page);
 		page = find_or_create_page(inode->i_mapping, pnum, gfp);
 		if (page) {
 			BUG_ON(page->mapping != inode->i_mapping);
@@ -1238,11 +1238,11 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
 
 err:
 	if (page)
-		page_cache_release(page);
+		put_page(page);
 	if (e4b->bd_bitmap_page)
-		page_cache_release(e4b->bd_bitmap_page);
+		put_page(e4b->bd_bitmap_page);
 	if (e4b->bd_buddy_page)
-		page_cache_release(e4b->bd_buddy_page);
+		put_page(e4b->bd_buddy_page);
 	e4b->bd_buddy = NULL;
 	e4b->bd_bitmap = NULL;
 	return ret;
@@ -1257,9 +1257,9 @@ static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
 {
 	if (e4b->bd_bitmap_page)
-		page_cache_release(e4b->bd_bitmap_page);
+		put_page(e4b->bd_bitmap_page);
 	if (e4b->bd_buddy_page)
-		page_cache_release(e4b->bd_buddy_page);
+		put_page(e4b->bd_buddy_page);
 }
 
 
@@ -2833,8 +2833,8 @@ static void ext4_free_data_callback(struct super_block *sb,
 		/* No more items in the per group rb tree
 		 * balance refcounts from ext4_mb_free_metadata()
 		 */
-		page_cache_release(e4b.bd_buddy_page);
-		page_cache_release(e4b.bd_bitmap_page);
+		put_page(e4b.bd_buddy_page);
+		put_page(e4b.bd_bitmap_page);
 	}
 	ext4_unlock_group(sb, entry->efd_group);
 	kmem_cache_free(ext4_free_data_cachep, entry);
@@ -4385,9 +4385,9 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
 		ext4_mb_put_pa(ac, ac->ac_sb, pa);
 	}
 	if (ac->ac_bitmap_page)
-		page_cache_release(ac->ac_bitmap_page);
+		put_page(ac->ac_bitmap_page);
 	if (ac->ac_buddy_page)
-		page_cache_release(ac->ac_buddy_page);
+		put_page(ac->ac_buddy_page);
 	if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
 		mutex_unlock(&ac->ac_lg->lg_mutex);
 	ext4_mb_collect_stats(ac);
@@ -4599,8 +4599,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 		 * otherwise we'll refresh it from
 		 * on-disk bitmap and lose not-yet-available
 		 * blocks */
-		page_cache_get(e4b->bd_buddy_page);
-		page_cache_get(e4b->bd_bitmap_page);
+		get_page(e4b->bd_buddy_page);
+		get_page(e4b->bd_bitmap_page);
 	}
 	while (*n) {
 		parent = *n;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 4098acc701c3..675b67e5d5c2 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -156,7 +156,7 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
 	page[1] = grab_cache_page_write_begin(mapping[1], index2, fl);
 	if (!page[1]) {
 		unlock_page(page[0]);
-		page_cache_release(page[0]);
+		put_page(page[0]);
 		return -ENOMEM;
 	}
 	/*
@@ -192,7 +192,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
 		create_empty_buffers(page, blocksize, 0);
 
 	head = page_buffers(page);
-	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	block = (sector_t)page->index << (PAGE_SHIFT - inode->i_blkbits);
 	for (bh = head, block_start = 0; bh != head || !block_start;
 	     block++, block_start = block_end, bh = bh->b_this_page) {
 		block_end = block_start + blocksize;
@@ -268,7 +268,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
 	int i, err2, jblocks, retries = 0;
 	int replaced_count = 0;
 	int from = data_offset_in_page << orig_inode->i_blkbits;
-	int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+	int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
 	struct super_block *sb = orig_inode->i_sb;
 	struct buffer_head *bh = NULL;
 
@@ -404,9 +404,9 @@ data_copy:
 
 unlock_pages:
 	unlock_page(pagep[0]);
-	page_cache_release(pagep[0]);
+	put_page(pagep[0]);
 	unlock_page(pagep[1]);
-	page_cache_release(pagep[1]);
+	put_page(pagep[1]);
 stop_journal:
 	ext4_journal_stop(handle);
 	if (*err == -ENOSPC &&
@@ -554,7 +554,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
 	struct inode *orig_inode = file_inode(o_filp);
 	struct inode *donor_inode = file_inode(d_filp);
 	struct ext4_ext_path *path = NULL;
-	int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+	int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
 	ext4_lblk_t o_end, o_start = orig_blk;
 	ext4_lblk_t d_start = donor_blk;
 	int ret;
@@ -648,9 +648,9 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
 		if (o_end - o_start < cur_len)
 			cur_len = o_end - o_start;
 
-		orig_page_index = o_start >> (PAGE_CACHE_SHIFT -
+		orig_page_index = o_start >> (PAGE_SHIFT -
 					       orig_inode->i_blkbits);
-		donor_page_index = d_start >> (PAGE_CACHE_SHIFT -
+		donor_page_index = d_start >> (PAGE_SHIFT -
 					       donor_inode->i_blkbits);
 		offset_in_page = o_start % blocks_per_page;
 		if (cur_len > blocks_per_page- offset_in_page)
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index d77d15f4b674..93ad0acf704c 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -432,8 +432,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 	 * the page size, the remaining memory is zeroed when mapped, and
 	 * writes to that region are not written out to the file."
 	 */
-	if (len < PAGE_CACHE_SIZE)
-		zero_user_segment(page, len, PAGE_CACHE_SIZE);
+	if (len < PAGE_SIZE)
+		zero_user_segment(page, len, PAGE_SIZE);
 	/*
 	 * In the first loop we prepare and mark buffers to submit. We have to
 	 * mark all buffers in the page before submitting so that
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 5dc5e95063de..ea27aa1a778c 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -140,7 +140,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
 
 	struct inode *inode = mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
-	const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
+	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
 	const unsigned blocksize = 1 << blkbits;
 	sector_t block_in_file;
 	sector_t last_block;
@@ -173,7 +173,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
 		if (page_has_buffers(page))
 			goto confused;
 
-		block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+		block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
 		last_block = block_in_file + nr_pages * blocks_per_page;
 		last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
 		if (last_block > last_block_in_file)
@@ -217,7 +217,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
 				set_error_page:
 					SetPageError(page);
 					zero_user_segment(page, 0,
-							  PAGE_CACHE_SIZE);
+							  PAGE_SIZE);
 					unlock_page(page);
 					goto next_page;
 				}
@@ -250,7 +250,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
 		}
 		if (first_hole != blocks_per_page) {
 			zero_user_segment(page, first_hole << blkbits,
-					  PAGE_CACHE_SIZE);
+					  PAGE_SIZE);
 			if (first_hole == 0) {
 				SetPageUptodate(page);
 				unlock_page(page);
@@ -319,7 +319,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
 			unlock_page(page);
 	next_page:
 		if (pages)
-			page_cache_release(page);
+			put_page(page);
 	}
 	BUG_ON(pages && !list_empty(pages));
 	if (bio)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 539297515896..0bb74aacb8c0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1784,7 +1784,7 @@ static int parse_options(char *options, struct super_block *sb,
 		int blocksize =
 			BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
 
-		if (blocksize < PAGE_CACHE_SIZE) {
+		if (blocksize < PAGE_SIZE) {
 			ext4_msg(sb, KERN_ERR, "can't mount with "
 				 "dioread_nolock if block size != PAGE_SIZE");
 			return 0;
@@ -3808,7 +3808,7 @@ no_journal:
 	}
 
 	if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
-	    (blocksize != PAGE_CACHE_SIZE)) {
+	    (blocksize != PAGE_SIZE)) {
 		ext4_msg(sb, KERN_ERR,
 			 "Unsupported blocksize for fs encryption");
 		goto failed_mount_wq;
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 6f7ee30a89ce..75ed5c2f0c16 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -80,12 +80,12 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
 	if (res <= plen)
 		paddr[res] = '\0';
 	if (cpage)
-		page_cache_release(cpage);
+		put_page(cpage);
 	set_delayed_call(done, kfree_link, paddr);
 	return paddr;
 errout:
 	if (cpage)
-		page_cache_release(cpage);
+		put_page(cpage);
 	kfree(paddr);
 	return ERR_PTR(res);
 }
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index e5c762b37239..53fec0872e60 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -223,7 +223,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 	/* Allocate a new bio */
 	bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->rw));
 
-	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
+	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
 		bio_put(bio);
 		return -EFAULT;
 	}
@@ -265,8 +265,8 @@ alloc_new:
 
 	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
 
-	if (bio_add_page(io->bio, bio_page, PAGE_CACHE_SIZE, 0) <
-							PAGE_CACHE_SIZE) {
+	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
+							PAGE_SIZE) {
 		__submit_merged_bio(io);
 		goto alloc_new;
 	}
@@ -406,7 +406,7 @@ got_it:
 	 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
 	 */
 	if (dn.data_blkaddr == NEW_ADDR) {
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
 		unlock_page(page);
 		return page;
@@ -517,7 +517,7 @@ struct page *get_new_data_page(struct inode *inode,
 		goto got_it;
 
 	if (dn.data_blkaddr == NEW_ADDR) {
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
 	} else {
 		f2fs_put_page(page, 1);
@@ -530,8 +530,8 @@ struct page *get_new_data_page(struct inode *inode,
 	}
 got_it:
 	if (new_i_size && i_size_read(inode) <
-				((loff_t)(index + 1) << PAGE_CACHE_SHIFT)) {
-		i_size_write(inode, ((loff_t)(index + 1) << PAGE_CACHE_SHIFT));
+				((loff_t)(index + 1) << PAGE_SHIFT)) {
+		i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
 		/* Only the directory inode sets new_i_size */
 		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
 	}
@@ -570,9 +570,9 @@ alloc:
 	/* update i_size */
 	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
 							dn->ofs_in_node;
-	if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
+	if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
 		i_size_write(dn->inode,
-				((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT));
+				((loff_t)(fofs + 1) << PAGE_SHIFT));
 	return 0;
 }
 
@@ -971,7 +971,7 @@ got_it:
 				goto confused;
 			}
 		} else {
-			zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+			zero_user_segment(page, 0, PAGE_SIZE);
 			SetPageUptodate(page);
 			unlock_page(page);
 			goto next_page;
@@ -1021,7 +1021,7 @@ submit_and_realloc:
 		goto next_page;
 set_error_page:
 		SetPageError(page);
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 		unlock_page(page);
 		goto next_page;
 confused:
@@ -1032,7 +1032,7 @@ confused:
 		unlock_page(page);
 next_page:
 		if (pages)
-			page_cache_release(page);
+			put_page(page);
 	}
 	BUG_ON(pages && !list_empty(pages));
 	if (bio)
@@ -1136,7 +1136,7 @@ static int f2fs_write_data_page(struct page *page,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	loff_t i_size = i_size_read(inode);
 	const pgoff_t end_index = ((unsigned long long) i_size)
-							>> PAGE_CACHE_SHIFT;
+							>> PAGE_SHIFT;
 	unsigned offset = 0;
 	bool need_balance_fs = false;
 	int err = 0;
@@ -1157,11 +1157,11 @@ static int f2fs_write_data_page(struct page *page,
 	 * If the offset is out-of-range of file size,
 	 * this page does not have to be written to disk.
 	 */
-	offset = i_size & (PAGE_CACHE_SIZE - 1);
+	offset = i_size & (PAGE_SIZE - 1);
 	if ((page->index >= end_index + 1) || !offset)
 		goto out;
 
-	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+	zero_user_segment(page, offset, PAGE_SIZE);
 write:
 	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
 		goto redirty_out;
@@ -1267,8 +1267,8 @@ next:
 			cycled = 0;
 		end = -1;
 	} else {
-		index = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		index = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 			range_whole = 1;
 		cycled = 1; /* ignore range_cyclic tests */
@@ -1448,11 +1448,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
 	 * the block addresses when there is no need to fill the page.
 	 */
 	if (!f2fs_has_inline_data(inode) && !f2fs_encrypted_inode(inode) &&
-					len == PAGE_CACHE_SIZE)
+					len == PAGE_SIZE)
 		return 0;
 
 	if (f2fs_has_inline_data(inode) ||
-			(pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
+			(pos & PAGE_MASK) >= i_size_read(inode)) {
 		f2fs_lock_op(sbi);
 		locked = true;
 	}
@@ -1513,7 +1513,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 	struct inode *inode = mapping->host;
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct page *page = NULL;
-	pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
+	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
 	bool need_balance = false;
 	block_t blkaddr = NULL_ADDR;
 	int err = 0;
@@ -1561,22 +1561,22 @@ repeat:
 	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
 		f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
 
-	if (len == PAGE_CACHE_SIZE)
+	if (len == PAGE_SIZE)
 		goto out_update;
 	if (PageUptodate(page))
 		goto out_clear;
 
-	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
-		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+	if ((pos & PAGE_MASK) >= i_size_read(inode)) {
+		unsigned start = pos & (PAGE_SIZE - 1);
 		unsigned end = start + len;
 
 		/* Reading beyond i_size is simple: memset to zero */
-		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
+		zero_user_segments(page, 0, start, end, PAGE_SIZE);
 		goto out_update;
 	}
 
 	if (blkaddr == NEW_ADDR) {
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 	} else {
 		struct f2fs_io_info fio = {
 			.sbi = sbi,
@@ -1688,7 +1688,7 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 
 	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
-		(offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE))
+		(offset % PAGE_SIZE || length != PAGE_SIZE))
 		return;
 
 	if (PageDirty(page)) {
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 4fb6ef88a34f..f4a61a5ff79f 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -164,7 +164,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 
 	/* build curseg */
 	si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE;
-	si->base_mem += PAGE_CACHE_SIZE * NR_CURSEG_TYPE;
+	si->base_mem += PAGE_SIZE * NR_CURSEG_TYPE;
 
 	/* build dirty segmap */
 	si->base_mem += sizeof(struct dirty_seglist_info);
@@ -201,9 +201,9 @@ get_cache:
 
 	si->page_mem = 0;
 	npages = NODE_MAPPING(sbi)->nrpages;
-	si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT;
+	si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
 	npages = META_MAPPING(sbi)->nrpages;
-	si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT;
+	si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
 }
 
 static int stat_show(struct seq_file *s, void *v)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 80641ad82745..af819571bce7 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -17,8 +17,8 @@
 
 static unsigned long dir_blocks(struct inode *inode)
 {
-	return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1))
-							>> PAGE_CACHE_SHIFT;
+	return ((unsigned long long) (i_size_read(inode) + PAGE_SIZE - 1))
+							>> PAGE_SHIFT;
 }
 
 static unsigned int dir_buckets(unsigned int level, int dir_level)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index bbe2cd1265d0..7a4558d17f36 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1294,7 +1294,7 @@ static inline void f2fs_put_page(struct page *page, int unlock)
 		f2fs_bug_on(F2FS_P_SB(page), !PageLocked(page));
 		unlock_page(page);
 	}
-	page_cache_release(page);
+	put_page(page);
 }
 
 static inline void f2fs_put_dnode(struct dnode_of_data *dn)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index b41c3579ea9e..443e07705c2a 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -74,11 +74,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
 		goto mapped;
 
 	/* page is wholly or partially inside EOF */
-	if (((loff_t)(page->index + 1) << PAGE_CACHE_SHIFT) >
+	if (((loff_t)(page->index + 1) << PAGE_SHIFT) >
 						i_size_read(inode)) {
 		unsigned offset;
-		offset = i_size_read(inode) & ~PAGE_CACHE_MASK;
-		zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+		offset = i_size_read(inode) & ~PAGE_MASK;
+		zero_user_segment(page, offset, PAGE_SIZE);
 	}
 	set_page_dirty(page);
 	SetPageUptodate(page);
@@ -346,11 +346,11 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
 		goto found;
 	}
 
-	pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT);
+	pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
 
 	dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
 
-	for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) {
+	for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
 		set_new_dnode(&dn, inode, NULL, NULL, 0);
 		err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
 		if (err && err != -ENOENT) {
@@ -370,7 +370,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
 		/* find data/hole in dnode block */
 		for (; dn.ofs_in_node < end_offset;
 				dn.ofs_in_node++, pgofs++,
-				data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) {
+				data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
 			block_t blkaddr;
 			blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
 
@@ -508,8 +508,8 @@ void truncate_data_blocks(struct dnode_of_data *dn)
 static int truncate_partial_data_page(struct inode *inode, u64 from,
 								bool cache_only)
 {
-	unsigned offset = from & (PAGE_CACHE_SIZE - 1);
-	pgoff_t index = from >> PAGE_CACHE_SHIFT;
+	unsigned offset = from & (PAGE_SIZE - 1);
+	pgoff_t index = from >> PAGE_SHIFT;
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
 
@@ -529,7 +529,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
 		return 0;
 truncate_out:
 	f2fs_wait_on_page_writeback(page, DATA, true);
-	zero_user(page, offset, PAGE_CACHE_SIZE - offset);
+	zero_user(page, offset, PAGE_SIZE - offset);
 	if (!cache_only || !f2fs_encrypted_inode(inode) ||
 					!S_ISREG(inode->i_mode))
 		set_page_dirty(page);
@@ -799,11 +799,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
 	if (ret)
 		return ret;
 
-	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
-	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
+	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
+	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
 
-	off_start = offset & (PAGE_CACHE_SIZE - 1);
-	off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+	off_start = offset & (PAGE_SIZE - 1);
+	off_end = (offset + len) & (PAGE_SIZE - 1);
 
 	if (pg_start == pg_end) {
 		ret = fill_zero(inode, pg_start, off_start,
@@ -813,7 +813,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
 	} else {
 		if (off_start) {
 			ret = fill_zero(inode, pg_start++, off_start,
-						PAGE_CACHE_SIZE - off_start);
+						PAGE_SIZE - off_start);
 			if (ret)
 				return ret;
 		}
@@ -830,8 +830,8 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
 
 			f2fs_balance_fs(sbi, true);
 
-			blk_start = (loff_t)pg_start << PAGE_CACHE_SHIFT;
-			blk_end = (loff_t)pg_end << PAGE_CACHE_SHIFT;
+			blk_start = (loff_t)pg_start << PAGE_SHIFT;
+			blk_end = (loff_t)pg_end << PAGE_SHIFT;
 			truncate_inode_pages_range(mapping, blk_start,
 					blk_end - 1);
 
@@ -954,8 +954,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	if (ret)
 		return ret;
 
-	pg_start = offset >> PAGE_CACHE_SHIFT;
-	pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
+	pg_start = offset >> PAGE_SHIFT;
+	pg_end = (offset + len) >> PAGE_SHIFT;
 
 	/* write out all dirty pages from offset */
 	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
@@ -1006,11 +1006,11 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 
 	truncate_pagecache_range(inode, offset, offset + len - 1);
 
-	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
-	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
+	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
+	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
 
-	off_start = offset & (PAGE_CACHE_SIZE - 1);
-	off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+	off_start = offset & (PAGE_SIZE - 1);
+	off_end = (offset + len) & (PAGE_SIZE - 1);
 
 	if (pg_start == pg_end) {
 		ret = fill_zero(inode, pg_start, off_start,
@@ -1024,12 +1024,12 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 	} else {
 		if (off_start) {
 			ret = fill_zero(inode, pg_start++, off_start,
-						PAGE_CACHE_SIZE - off_start);
+						PAGE_SIZE - off_start);
 			if (ret)
 				return ret;
 
 			new_size = max_t(loff_t, new_size,
-					(loff_t)pg_start << PAGE_CACHE_SHIFT);
+					(loff_t)pg_start << PAGE_SHIFT);
 		}
 
 		for (index = pg_start; index < pg_end; index++) {
@@ -1060,7 +1060,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 			f2fs_unlock_op(sbi);
 
 			new_size = max_t(loff_t, new_size,
-				(loff_t)(index + 1) << PAGE_CACHE_SHIFT);
+				(loff_t)(index + 1) << PAGE_SHIFT);
 		}
 
 		if (off_end) {
@@ -1117,8 +1117,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
 	truncate_pagecache(inode, offset);
 
-	pg_start = offset >> PAGE_CACHE_SHIFT;
-	pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
+	pg_start = offset >> PAGE_SHIFT;
+	pg_end = (offset + len) >> PAGE_SHIFT;
 	delta = pg_end - pg_start;
 	nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
 
@@ -1158,11 +1158,11 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
 
 	f2fs_balance_fs(sbi, true);
 
-	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
-	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
+	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
+	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
 
-	off_start = offset & (PAGE_CACHE_SIZE - 1);
-	off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+	off_start = offset & (PAGE_SIZE - 1);
+	off_end = (offset + len) & (PAGE_SIZE - 1);
 
 	f2fs_lock_op(sbi);
 
@@ -1180,12 +1180,12 @@ noalloc:
 		if (pg_start == pg_end)
 			new_size = offset + len;
 		else if (index == pg_start && off_start)
-			new_size = (loff_t)(index + 1) << PAGE_CACHE_SHIFT;
+			new_size = (loff_t)(index + 1) << PAGE_SHIFT;
 		else if (index == pg_end)
-			new_size = ((loff_t)index << PAGE_CACHE_SHIFT) +
+			new_size = ((loff_t)index << PAGE_SHIFT) +
 								off_end;
 		else
-			new_size += PAGE_CACHE_SIZE;
+			new_size += PAGE_SIZE;
 	}
 
 	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
@@ -1652,8 +1652,8 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
 	if (need_inplace_update(inode))
 		return -EINVAL;
 
-	pg_start = range->start >> PAGE_CACHE_SHIFT;
-	pg_end = (range->start + range->len) >> PAGE_CACHE_SHIFT;
+	pg_start = range->start >> PAGE_SHIFT;
+	pg_end = (range->start + range->len) >> PAGE_SHIFT;
 
 	f2fs_balance_fs(sbi, true);
 
@@ -1770,7 +1770,7 @@ clear_out:
 out:
 	inode_unlock(inode);
 	if (!err)
-		range->len = (u64)total << PAGE_CACHE_SHIFT;
+		range->len = (u64)total << PAGE_SHIFT;
 	return err;
 }
 
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 358214e9f707..a2fbe6f427d3 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -51,7 +51,7 @@ void read_inline_data(struct page *page, struct page *ipage)
 
 	f2fs_bug_on(F2FS_P_SB(page), page->index);
 
-	zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+	zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE);
 
 	/* Copy the whole inline data block */
 	src_addr = inline_data_addr(ipage);
@@ -93,7 +93,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
 	}
 
 	if (page->index)
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 	else
 		read_inline_data(page, ipage);
 
@@ -375,7 +375,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
 		goto out;
 
 	f2fs_wait_on_page_writeback(page, DATA, true);
-	zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+	zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE);
 
 	dentry_blk = kmap_atomic(page);
 
@@ -405,8 +405,8 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
 	stat_dec_inline_dir(dir);
 	clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
 
-	if (i_size_read(dir) < PAGE_CACHE_SIZE) {
-		i_size_write(dir, PAGE_CACHE_SIZE);
+	if (i_size_read(dir) < PAGE_SIZE) {
+		i_size_write(dir, PAGE_SIZE);
 		set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
 	}
 
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 7876f1052101..c1d9e9d2cb37 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1051,12 +1051,12 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
 	/* Null-terminate the name */
 	paddr[res] = '\0';
 
-	page_cache_release(cpage);
+	put_page(cpage);
 	set_delayed_call(done, kfree_link, paddr);
 	return paddr;
 errout:
 	fscrypt_fname_free_buffer(&pstr);
-	page_cache_release(cpage);
+	put_page(cpage);
 	return ERR_PTR(res);
 }
 
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 118321bd1a7f..1a33de9d84b1 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -46,11 +46,11 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
 	 */
 	if (type == FREE_NIDS) {
 		mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
-							PAGE_CACHE_SHIFT;
+							PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
 	} else if (type == NAT_ENTRIES) {
 		mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
-							PAGE_CACHE_SHIFT;
+							PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
 	} else if (type == DIRTY_DENTS) {
 		if (sbi->sb->s_bdi->wb.dirty_exceeded)
@@ -62,13 +62,13 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
 
 		for (i = 0; i <= UPDATE_INO; i++)
 			mem_size += (sbi->im[i].ino_num *
-				sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
+				sizeof(struct ino_entry)) >> PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
 	} else if (type == EXTENT_CACHE) {
 		mem_size = (atomic_read(&sbi->total_ext_tree) *
 				sizeof(struct extent_tree) +
 				atomic_read(&sbi->total_ext_node) *
-				sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
+				sizeof(struct extent_node)) >> PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
 	} else {
 		if (!sbi->sb->s_bdi->wb.dirty_exceeded)
@@ -121,7 +121,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
 
 	src_addr = page_address(src_page);
 	dst_addr = page_address(dst_page);
-	memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
+	memcpy(dst_addr, src_addr, PAGE_SIZE);
 	set_page_dirty(dst_page);
 	f2fs_put_page(src_page, 1);
 
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 0b30cd2aeebd..011942f94d64 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -591,7 +591,7 @@ out:
 
 	/* truncate meta pages to be used by the recovery */
 	truncate_inode_pages_range(META_MAPPING(sbi),
-			(loff_t)MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
+			(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
 
 	if (err) {
 		truncate_inode_pages_final(NODE_MAPPING(sbi));
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 6f16b39f0b52..540669d6978e 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -885,12 +885,12 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
 		}
 	}
 
-	sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
+	sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
 			SUM_FOOTER_SIZE) / SUMMARY_SIZE;
 	if (valid_sum_count <= sum_in_page)
 		return 1;
 	else if ((valid_sum_count - sum_in_page) <=
-		(PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
+		(PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
 		return 2;
 	return 3;
 }
@@ -909,9 +909,9 @@ void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
 	void *dst = page_address(page);
 
 	if (src)
-		memcpy(dst, src, PAGE_CACHE_SIZE);
+		memcpy(dst, src, PAGE_SIZE);
 	else
-		memset(dst, 0, PAGE_CACHE_SIZE);
+		memset(dst, 0, PAGE_SIZE);
 	set_page_dirty(page);
 	f2fs_put_page(page, 1);
 }
@@ -1596,7 +1596,7 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi)
 			s = (struct f2fs_summary *)(kaddr + offset);
 			seg_i->sum_blk->entries[j] = *s;
 			offset += SUMMARY_SIZE;
-			if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
+			if (offset + SUMMARY_SIZE <= PAGE_SIZE -
 						SUM_FOOTER_SIZE)
 				continue;
 
@@ -1757,7 +1757,7 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
 			*summary = seg_i->sum_blk->entries[j];
 			written_size += SUMMARY_SIZE;
 
-			if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
+			if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
 							SUM_FOOTER_SIZE)
 				continue;
 
@@ -1844,7 +1844,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
 
 	src_addr = page_address(src_page);
 	dst_addr = page_address(dst_page);
-	memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
+	memcpy(dst_addr, src_addr, PAGE_SIZE);
 
 	set_page_dirty(dst_page);
 	f2fs_put_page(src_page, 1);
@@ -2171,7 +2171,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
 
 	for (i = 0; i < NR_CURSEG_TYPE; i++) {
 		mutex_init(&array[i].curseg_mutex);
-		array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+		array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL);
 		if (!array[i].sum_blk)
 			return -ENOMEM;
 		init_rwsem(&array[i].journal_rwsem);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 15bb81f8dac2..53b766cdabf2 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1070,10 +1070,10 @@ static int sanity_check_raw_super(struct super_block *sb,
 	}
 
 	/* Currently, support only 4KB page cache size */
-	if (F2FS_BLKSIZE != PAGE_CACHE_SIZE) {
+	if (F2FS_BLKSIZE != PAGE_SIZE) {
 		f2fs_msg(sb, KERN_INFO,
 			"Invalid page_cache_size (%lu), supports only 4KB\n",
-			PAGE_CACHE_SIZE);
+			PAGE_SIZE);
 		return 1;
 	}
 
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index cb84f0fcc72a..bfc780c682fb 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -66,11 +66,11 @@ static int
 vxfs_immed_readpage(struct file *fp, struct page *pp)
 {
 	struct vxfs_inode_info	*vip = VXFS_INO(pp->mapping->host);
-	u_int64_t	offset = (u_int64_t)pp->index << PAGE_CACHE_SHIFT;
+	u_int64_t	offset = (u_int64_t)pp->index << PAGE_SHIFT;
 	caddr_t		kaddr;
 
 	kaddr = kmap(pp);
-	memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_CACHE_SIZE);
+	memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_SIZE);
 	kunmap(pp);
 	
 	flush_dcache_page(pp);
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 1cff72df0389..a49e0cfbb686 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -45,7 +45,7 @@
 /*
  * Number of VxFS blocks per page.
  */
-#define VXFS_BLOCK_PER_PAGE(sbp)  ((PAGE_CACHE_SIZE / (sbp)->s_blocksize))
+#define VXFS_BLOCK_PER_PAGE(sbp)  ((PAGE_SIZE / (sbp)->s_blocksize))
 
 
 static struct dentry *	vxfs_lookup(struct inode *, struct dentry *, unsigned int);
@@ -175,7 +175,7 @@ vxfs_inode_by_name(struct inode *dip, struct dentry *dp)
 	if (de) {
 		ino = de->d_ino;
 		kunmap(pp);
-		page_cache_release(pp);
+		put_page(pp);
 	}
 	
 	return (ino);
@@ -255,8 +255,8 @@ vxfs_readdir(struct file *fp, struct dir_context *ctx)
 	nblocks = dir_blocks(ip);
 	pblocks = VXFS_BLOCK_PER_PAGE(sbp);
 
-	page = pos >> PAGE_CACHE_SHIFT;
-	offset = pos & ~PAGE_CACHE_MASK;
+	page = pos >> PAGE_SHIFT;
+	offset = pos & ~PAGE_MASK;
 	block = (u_long)(pos >> sbp->s_blocksize_bits) % pblocks;
 
 	for (; page < npages; page++, block = 0) {
@@ -289,7 +289,7 @@ vxfs_readdir(struct file *fp, struct dir_context *ctx)
 					continue;
 
 				offset = (char *)de - kaddr;
-				ctx->pos = ((page << PAGE_CACHE_SHIFT) | offset) + 2;
+				ctx->pos = ((page << PAGE_SHIFT) | offset) + 2;
 				if (!dir_emit(ctx, de->d_name, de->d_namelen,
 					de->d_ino, DT_UNKNOWN)) {
 					vxfs_put_page(pp);
@@ -301,6 +301,6 @@ vxfs_readdir(struct file *fp, struct dir_context *ctx)
 		vxfs_put_page(pp);
 		offset = 0;
 	}
-	ctx->pos = ((page << PAGE_CACHE_SHIFT) | offset) + 2;
+	ctx->pos = ((page << PAGE_SHIFT) | offset) + 2;
 	return 0;
 }
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index 5d318c44f855..e806694d4145 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -50,7 +50,7 @@ inline void
 vxfs_put_page(struct page *pp)
 {
 	kunmap(pp);
-	page_cache_release(pp);
+	put_page(pp);
 }
 
 /**
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index fee81e8768c9..592cea54cea0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -33,7 +33,7 @@
 /*
  * 4MB minimal write chunk size
  */
-#define MIN_WRITEBACK_PAGES	(4096UL >> (PAGE_CACHE_SHIFT - 10))
+#define MIN_WRITEBACK_PAGES	(4096UL >> (PAGE_SHIFT - 10))
 
 struct wb_completion {
 	atomic_t		cnt;
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 6b35fc4860a0..3078b679fcd1 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -113,7 +113,7 @@ try_again:
 
 	wake_up_bit(&cookie->flags, 0);
 	if (xpage)
-		page_cache_release(xpage);
+		put_page(xpage);
 	__fscache_uncache_page(cookie, page);
 	return true;
 
@@ -164,7 +164,7 @@ static void fscache_end_page_write(struct fscache_object *object,
 	}
 	spin_unlock(&object->lock);
 	if (xpage)
-		page_cache_release(xpage);
+		put_page(xpage);
 }
 
 /*
@@ -884,7 +884,7 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie)
 		spin_unlock(&cookie->stores_lock);
 
 		for (i = n - 1; i >= 0; i--)
-			page_cache_release(results[i]);
+			put_page(results[i]);
 	}
 
 	_leave("");
@@ -982,7 +982,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 
 	radix_tree_tag_set(&cookie->stores, page->index,
 			   FSCACHE_COOKIE_PENDING_TAG);
-	page_cache_get(page);
+	get_page(page);
 
 	/* we only want one writer at a time, but we do need to queue new
 	 * writers after exclusive ops */
@@ -1026,7 +1026,7 @@ submit_failed:
 	radix_tree_delete(&cookie->stores, page->index);
 	spin_unlock(&cookie->stores_lock);
 	wake_cookie = __fscache_unuse_cookie(cookie);
-	page_cache_release(page);
+	put_page(page);
 	ret = -ENOBUFS;
 	goto nobufs;
 
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ebb5e37455a0..cbece1221417 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -897,7 +897,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 		return err;
 	}
 
-	page_cache_get(newpage);
+	get_page(newpage);
 
 	if (!(buf->flags & PIPE_BUF_FLAG_LRU))
 		lru_cache_add_file(newpage);
@@ -912,12 +912,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 
 	if (err) {
 		unlock_page(newpage);
-		page_cache_release(newpage);
+		put_page(newpage);
 		return err;
 	}
 
 	unlock_page(oldpage);
-	page_cache_release(oldpage);
+	put_page(oldpage);
 	cs->len = 0;
 
 	return 0;
@@ -951,7 +951,7 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
 	fuse_copy_finish(cs);
 
 	buf = cs->pipebufs;
-	page_cache_get(page);
+	get_page(page);
 	buf->page = page;
 	buf->offset = offset;
 	buf->len = count;
@@ -1435,7 +1435,7 @@ out_unlock:
 
 out:
 	for (; page_nr < cs.nr_segs; page_nr++)
-		page_cache_release(bufs[page_nr].page);
+		put_page(bufs[page_nr].page);
 
 	kfree(bufs);
 	return ret;
@@ -1632,8 +1632,8 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
 		goto out_up_killsb;
 
 	mapping = inode->i_mapping;
-	index = outarg.offset >> PAGE_CACHE_SHIFT;
-	offset = outarg.offset & ~PAGE_CACHE_MASK;
+	index = outarg.offset >> PAGE_SHIFT;
+	offset = outarg.offset & ~PAGE_MASK;
 	file_size = i_size_read(inode);
 	end = outarg.offset + outarg.size;
 	if (end > file_size) {
@@ -1652,13 +1652,13 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
 		if (!page)
 			goto out_iput;
 
-		this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
 		err = fuse_copy_page(cs, &page, offset, this_num, 0);
 		if (!err && offset == 0 &&
-		    (this_num == PAGE_CACHE_SIZE || file_size == end))
+		    (this_num == PAGE_SIZE || file_size == end))
 			SetPageUptodate(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
 		if (err)
 			goto out_iput;
@@ -1697,7 +1697,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
 	size_t total_len = 0;
 	int num_pages;
 
-	offset = outarg->offset & ~PAGE_CACHE_MASK;
+	offset = outarg->offset & ~PAGE_MASK;
 	file_size = i_size_read(inode);
 
 	num = outarg->size;
@@ -1720,7 +1720,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
 	req->page_descs[0].offset = offset;
 	req->end = fuse_retrieve_end;
 
-	index = outarg->offset >> PAGE_CACHE_SHIFT;
+	index = outarg->offset >> PAGE_SHIFT;
 
 	while (num && req->num_pages < num_pages) {
 		struct page *page;
@@ -1730,7 +1730,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
 		if (!page)
 			break;
 
-		this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
 		req->pages[req->num_pages] = page;
 		req->page_descs[req->num_pages].length = this_num;
 		req->num_pages++;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9dde38f12c07..719924d6c706 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -348,7 +348,7 @@ static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
 		pgoff_t curr_index;
 
 		BUG_ON(req->inode != inode);
-		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+		curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
 		if (idx_from < curr_index + req->num_pages &&
 		    curr_index <= idx_to) {
 			found = true;
@@ -683,11 +683,11 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode,
 		 * present there.
 		 */
 		int i;
-		int start_idx = num_read >> PAGE_CACHE_SHIFT;
-		size_t off = num_read & (PAGE_CACHE_SIZE - 1);
+		int start_idx = num_read >> PAGE_SHIFT;
+		size_t off = num_read & (PAGE_SIZE - 1);
 
 		for (i = start_idx; i < req->num_pages; i++) {
-			zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE);
+			zero_user_segment(req->pages[i], off, PAGE_SIZE);
 			off = 0;
 		}
 	} else {
@@ -704,7 +704,7 @@ static int fuse_do_readpage(struct file *file, struct page *page)
 	struct fuse_req *req;
 	size_t num_read;
 	loff_t pos = page_offset(page);
-	size_t count = PAGE_CACHE_SIZE;
+	size_t count = PAGE_SIZE;
 	u64 attr_ver;
 	int err;
 
@@ -789,7 +789,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
 		else
 			SetPageError(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	if (req->ff)
 		fuse_file_put(req->ff, false);
@@ -800,7 +800,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file)
 	struct fuse_file *ff = file->private_data;
 	struct fuse_conn *fc = ff->fc;
 	loff_t pos = page_offset(req->pages[0]);
-	size_t count = req->num_pages << PAGE_CACHE_SHIFT;
+	size_t count = req->num_pages << PAGE_SHIFT;
 
 	req->out.argpages = 1;
 	req->out.page_zeroing = 1;
@@ -836,7 +836,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
 
 	if (req->num_pages &&
 	    (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
-	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
+	     (req->num_pages + 1) * PAGE_SIZE > fc->max_read ||
 	     req->pages[req->num_pages - 1]->index + 1 != page->index)) {
 		int nr_alloc = min_t(unsigned, data->nr_pages,
 				     FUSE_MAX_PAGES_PER_REQ);
@@ -858,7 +858,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
 		return -EIO;
 	}
 
-	page_cache_get(page);
+	get_page(page);
 	req->pages[req->num_pages] = page;
 	req->page_descs[req->num_pages].length = PAGE_SIZE;
 	req->num_pages++;
@@ -1003,17 +1003,17 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
 	for (i = 0; i < req->num_pages; i++) {
 		struct page *page = req->pages[i];
 
-		if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE)
+		if (!req->out.h.error && !offset && count >= PAGE_SIZE)
 			SetPageUptodate(page);
 
-		if (count > PAGE_CACHE_SIZE - offset)
-			count -= PAGE_CACHE_SIZE - offset;
+		if (count > PAGE_SIZE - offset)
+			count -= PAGE_SIZE - offset;
 		else
 			count = 0;
 		offset = 0;
 
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	return res;
@@ -1024,7 +1024,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
 			       struct iov_iter *ii, loff_t pos)
 {
 	struct fuse_conn *fc = get_fuse_conn(mapping->host);
-	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned offset = pos & (PAGE_SIZE - 1);
 	size_t count = 0;
 	int err;
 
@@ -1034,8 +1034,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
 	do {
 		size_t tmp;
 		struct page *page;
-		pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-		size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset,
+		pgoff_t index = pos >> PAGE_SHIFT;
+		size_t bytes = min_t(size_t, PAGE_SIZE - offset,
 				     iov_iter_count(ii));
 
 		bytes = min_t(size_t, bytes, fc->max_write - count);
@@ -1059,7 +1059,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
 		iov_iter_advance(ii, tmp);
 		if (!tmp) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			bytes = min(bytes, iov_iter_single_seg_count(ii));
 			goto again;
 		}
@@ -1072,7 +1072,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
 		count += tmp;
 		pos += tmp;
 		offset += tmp;
-		if (offset == PAGE_CACHE_SIZE)
+		if (offset == PAGE_SIZE)
 			offset = 0;
 
 		if (!fc->big_writes)
@@ -1086,8 +1086,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
 static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
 {
 	return min_t(unsigned,
-		     ((pos + len - 1) >> PAGE_CACHE_SHIFT) -
-		     (pos >> PAGE_CACHE_SHIFT) + 1,
+		     ((pos + len - 1) >> PAGE_SHIFT) -
+		     (pos >> PAGE_SHIFT) + 1,
 		     FUSE_MAX_PAGES_PER_REQ);
 }
 
@@ -1205,8 +1205,8 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 			goto out;
 
 		invalidate_mapping_pages(file->f_mapping,
-					 pos >> PAGE_CACHE_SHIFT,
-					 endbyte >> PAGE_CACHE_SHIFT);
+					 pos >> PAGE_SHIFT,
+					 endbyte >> PAGE_SHIFT);
 
 		written += written_buffered;
 		iocb->ki_pos = pos + written_buffered;
@@ -1315,8 +1315,8 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 	size_t nmax = write ? fc->max_write : fc->max_read;
 	loff_t pos = *ppos;
 	size_t count = iov_iter_count(iter);
-	pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT;
-	pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT;
+	pgoff_t idx_from = pos >> PAGE_SHIFT;
+	pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT;
 	ssize_t res = 0;
 	struct fuse_req *req;
 	int err = 0;
@@ -1466,7 +1466,7 @@ __acquires(fc->lock)
 {
 	struct fuse_inode *fi = get_fuse_inode(req->inode);
 	struct fuse_write_in *inarg = &req->misc.write.in;
-	__u64 data_size = req->num_pages * PAGE_CACHE_SIZE;
+	__u64 data_size = req->num_pages * PAGE_SIZE;
 
 	if (!fc->connected)
 		goto out_free;
@@ -1727,7 +1727,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
 	list_del(&new_req->writepages_entry);
 	list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
 		BUG_ON(old_req->inode != new_req->inode);
-		curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+		curr_index = old_req->misc.write.in.offset >> PAGE_SHIFT;
 		if (curr_index <= page->index &&
 		    page->index < curr_index + old_req->num_pages) {
 			found = true;
@@ -1742,7 +1742,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
 	new_req->num_pages = 1;
 	for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
 		BUG_ON(tmp->inode != new_req->inode);
-		curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+		curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT;
 		if (tmp->num_pages == 1 &&
 		    curr_index == page->index) {
 			old_req = tmp;
@@ -1799,7 +1799,7 @@ static int fuse_writepages_fill(struct page *page,
 
 	if (req && req->num_pages &&
 	    (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
-	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
+	     (req->num_pages + 1) * PAGE_SIZE > fc->max_write ||
 	     data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
 		fuse_writepages_send(data);
 		data->req = NULL;
@@ -1924,7 +1924,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
 		loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, void **fsdata)
 {
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	struct fuse_conn *fc = get_fuse_conn(file_inode(file));
 	struct page *page;
 	loff_t fsize;
@@ -1938,15 +1938,15 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
 
 	fuse_wait_on_page_writeback(mapping->host, page->index);
 
-	if (PageUptodate(page) || len == PAGE_CACHE_SIZE)
+	if (PageUptodate(page) || len == PAGE_SIZE)
 		goto success;
 	/*
 	 * Check if the start this page comes after the end of file, in which
 	 * case the readpage can be optimized away.
 	 */
 	fsize = i_size_read(mapping->host);
-	if (fsize <= (pos & PAGE_CACHE_MASK)) {
-		size_t off = pos & ~PAGE_CACHE_MASK;
+	if (fsize <= (pos & PAGE_MASK)) {
+		size_t off = pos & ~PAGE_MASK;
 		if (off)
 			zero_user_segment(page, 0, off);
 		goto success;
@@ -1960,7 +1960,7 @@ success:
 
 cleanup:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 error:
 	return err;
 }
@@ -1973,16 +1973,16 @@ static int fuse_write_end(struct file *file, struct address_space *mapping,
 
 	if (!PageUptodate(page)) {
 		/* Zero any unwritten bytes at the end of the page */
-		size_t endoff = (pos + copied) & ~PAGE_CACHE_MASK;
+		size_t endoff = (pos + copied) & ~PAGE_MASK;
 		if (endoff)
-			zero_user_segment(page, endoff, PAGE_CACHE_SIZE);
+			zero_user_segment(page, endoff, PAGE_SIZE);
 		SetPageUptodate(page);
 	}
 
 	fuse_write_update_size(inode, pos + copied);
 	set_page_dirty(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return copied;
 }
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4d69d5c0bedc..1ce67668a8e1 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -339,11 +339,11 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
 
 	fuse_invalidate_attr(inode);
 	if (offset >= 0) {
-		pg_start = offset >> PAGE_CACHE_SHIFT;
+		pg_start = offset >> PAGE_SHIFT;
 		if (len <= 0)
 			pg_end = -1;
 		else
-			pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+			pg_end = (offset + len - 1) >> PAGE_SHIFT;
 		invalidate_inode_pages2_range(inode->i_mapping,
 					      pg_start, pg_end);
 	}
@@ -864,7 +864,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 		process_init_limits(fc, arg);
 
 		if (arg->minor >= 6) {
-			ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
+			ra_pages = arg->max_readahead / PAGE_SIZE;
 			if (arg->flags & FUSE_ASYNC_READ)
 				fc->async_read = 1;
 			if (!(arg->flags & FUSE_POSIX_LOCKS))
@@ -901,7 +901,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 			if (arg->time_gran && arg->time_gran <= 1000000000)
 				fc->sb->s_time_gran = arg->time_gran;
 		} else {
-			ra_pages = fc->max_read / PAGE_CACHE_SIZE;
+			ra_pages = fc->max_read / PAGE_SIZE;
 			fc->no_lock = 1;
 			fc->no_flock = 1;
 		}
@@ -922,7 +922,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 
 	arg->major = FUSE_KERNEL_VERSION;
 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
-	arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
+	arg->max_readahead = fc->bdi.ra_pages * PAGE_SIZE;
 	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
 		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
 		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
@@ -955,7 +955,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT;
 
@@ -1053,8 +1053,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 			goto err;
 #endif
 	} else {
-		sb->s_blocksize = PAGE_CACHE_SIZE;
-		sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+		sb->s_blocksize = PAGE_SIZE;
+		sb->s_blocksize_bits = PAGE_SHIFT;
 	}
 	sb->s_magic = FUSE_SUPER_MAGIC;
 	sb->s_op = &fuse_super_operations;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index aa016e4b8bec..1bbbee945f46 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -101,7 +101,7 @@ static int gfs2_writepage_common(struct page *page,
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	loff_t i_size = i_size_read(inode);
-	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	pgoff_t end_index = i_size >> PAGE_SHIFT;
 	unsigned offset;
 
 	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
@@ -109,9 +109,9 @@ static int gfs2_writepage_common(struct page *page,
 	if (current->journal_info)
 		goto redirty;
 	/* Is the page fully outside i_size? (truncate in progress) */
-	offset = i_size & (PAGE_CACHE_SIZE-1);
+	offset = i_size & (PAGE_SIZE-1);
 	if (page->index > end_index || (page->index == end_index && !offset)) {
-		page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
 		goto out;
 	}
 	return 1;
@@ -238,7 +238,7 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping,
 {
 	struct inode *inode = mapping->host;
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
+	unsigned nrblocks = nr_pages * (PAGE_SIZE/inode->i_sb->s_blocksize);
 	int i;
 	int ret;
 
@@ -366,8 +366,8 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
 			cycled = 0;
 		end = -1;
 	} else {
-		index = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		index = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 			range_whole = 1;
 		cycled = 1; /* ignore range_cyclic tests */
@@ -458,7 +458,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 	 * so we need to supply one here. It doesn't happen often.
 	 */
 	if (unlikely(page->index)) {
-		zero_user(page, 0, PAGE_CACHE_SIZE);
+		zero_user(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
 		return 0;
 	}
@@ -471,7 +471,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 	if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
 		dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
 	memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
-	memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
+	memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
 	kunmap_atomic(kaddr);
 	flush_dcache_page(page);
 	brelse(dibh);
@@ -560,8 +560,8 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
                        unsigned size)
 {
 	struct address_space *mapping = ip->i_inode.i_mapping;
-	unsigned long index = *pos / PAGE_CACHE_SIZE;
-	unsigned offset = *pos & (PAGE_CACHE_SIZE - 1);
+	unsigned long index = *pos / PAGE_SIZE;
+	unsigned offset = *pos & (PAGE_SIZE - 1);
 	unsigned copied = 0;
 	unsigned amt;
 	struct page *page;
@@ -569,15 +569,15 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
 
 	do {
 		amt = size - copied;
-		if (offset + size > PAGE_CACHE_SIZE)
-			amt = PAGE_CACHE_SIZE - offset;
+		if (offset + size > PAGE_SIZE)
+			amt = PAGE_SIZE - offset;
 		page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
 		if (IS_ERR(page))
 			return PTR_ERR(page);
 		p = kmap_atomic(page);
 		memcpy(buf + copied, p + offset, amt);
 		kunmap_atomic(p);
-		page_cache_release(page);
+		put_page(page);
 		copied += amt;
 		index++;
 		offset = 0;
@@ -651,8 +651,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 	unsigned requested = 0;
 	int alloc_required;
 	int error = 0;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	pgoff_t index = pos >> PAGE_SHIFT;
+	unsigned from = pos & (PAGE_SIZE - 1);
 	struct page *page;
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
@@ -697,7 +697,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 		rblocks += gfs2_rg_blocks(ip, requested);
 
 	error = gfs2_trans_begin(sdp, rblocks,
-				 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
+				 PAGE_SIZE/sdp->sd_sb.sb_bsize);
 	if (error)
 		goto out_trans_fail;
 
@@ -727,7 +727,7 @@ out:
 		return 0;
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	gfs2_trans_end(sdp);
 	if (pos + len > ip->i_inode.i_size)
@@ -827,7 +827,7 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
 	if (!PageUptodate(page))
 		SetPageUptodate(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (copied) {
 		if (inode->i_size < to)
@@ -877,7 +877,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 	struct buffer_head *dibh;
-	unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned int from = pos & (PAGE_SIZE - 1);
 	unsigned int to = from + len;
 	int ret;
 	struct gfs2_trans *tr = current->journal_info;
@@ -888,7 +888,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 	ret = gfs2_meta_inode_buffer(ip, &dibh);
 	if (unlikely(ret)) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		goto failed;
 	}
 
@@ -992,7 +992,7 @@ static void gfs2_invalidatepage(struct page *page, unsigned int offset,
 {
 	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
 	unsigned int stop = offset + length;
-	int partial_page = (offset || length < PAGE_CACHE_SIZE);
+	int partial_page = (offset || length < PAGE_SIZE);
 	struct buffer_head *bh, *head;
 	unsigned long pos = 0;
 
@@ -1082,7 +1082,7 @@ static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	 * the first place, mapping->nr_pages will always be zero.
 	 */
 	if (mapping->nrpages) {
-		loff_t lstart = offset & ~(PAGE_CACHE_SIZE - 1);
+		loff_t lstart = offset & ~(PAGE_SIZE - 1);
 		loff_t len = iov_iter_count(iter);
 		loff_t end = PAGE_ALIGN(offset + len) - 1;
 
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 0860f0b5b3f1..24ce1cdd434a 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -75,7 +75,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
 			dsize = dibh->b_size - sizeof(struct gfs2_dinode);
 
 		memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
-		memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
+		memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
 		kunmap(page);
 
 		SetPageUptodate(page);
@@ -98,7 +98,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
 
 	if (release) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	return 0;
@@ -932,8 +932,8 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
 {
 	struct inode *inode = mapping->host;
 	struct gfs2_inode *ip = GFS2_I(inode);
-	unsigned long index = from >> PAGE_CACHE_SHIFT;
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	unsigned long index = from >> PAGE_SHIFT;
+	unsigned offset = from & (PAGE_SIZE-1);
 	unsigned blocksize, iblock, length, pos;
 	struct buffer_head *bh;
 	struct page *page;
@@ -945,7 +945,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
 
 	blocksize = inode->i_sb->s_blocksize;
 	length = blocksize - (offset & (blocksize - 1));
-	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+	iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
 
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, blocksize, 0);
@@ -989,7 +989,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
 	mark_buffer_dirty(bh);
 unlock:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index c9384f932975..208efc70ad49 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -354,8 +354,8 @@ static int gfs2_allocate_page_backing(struct page *page)
 {
 	struct inode *inode = page->mapping->host;
 	struct buffer_head bh;
-	unsigned long size = PAGE_CACHE_SIZE;
-	u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	unsigned long size = PAGE_SIZE;
+	u64 lblock = page->index << (PAGE_SHIFT - inode->i_blkbits);
 
 	do {
 		bh.b_state = 0;
@@ -386,7 +386,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_alloc_parms ap = { .aflags = 0, };
 	unsigned long last_index;
-	u64 pos = page->index << PAGE_CACHE_SHIFT;
+	u64 pos = page->index << PAGE_SHIFT;
 	unsigned int data_blocks, ind_blocks, rblocks;
 	struct gfs2_holder gh;
 	loff_t size;
@@ -401,7 +401,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (ret)
 		goto out;
 
-	gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
+	gfs2_size_hint(vma->vm_file, pos, PAGE_SIZE);
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	ret = gfs2_glock_nq(&gh);
@@ -411,7 +411,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
 	set_bit(GIF_SW_PAGED, &ip->i_flags);
 
-	if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) {
+	if (!gfs2_write_alloc_required(ip, pos, PAGE_SIZE)) {
 		lock_page(page);
 		if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
 			ret = -EAGAIN;
@@ -424,7 +424,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (ret)
 		goto out_unlock;
 
-	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
+	gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
 	ap.target = data_blocks + ind_blocks;
 	ret = gfs2_quota_lock_check(ip, &ap);
 	if (ret)
@@ -447,7 +447,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	lock_page(page);
 	ret = -EINVAL;
 	size = i_size_read(inode);
-	last_index = (size - 1) >> PAGE_CACHE_SHIFT;
+	last_index = (size - 1) >> PAGE_SHIFT;
 	/* Check page index against inode size */
 	if (size == 0 || (page->index > last_index))
 		goto out_trans_end;
@@ -873,7 +873,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
 			rblocks += data_blocks ? data_blocks : 1;
 
 		error = gfs2_trans_begin(sdp, rblocks,
-					 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
+					 PAGE_SIZE/sdp->sd_sb.sb_bsize);
 		if (error)
 			goto out_trans_fail;
 
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index e137d96f1b17..0448524c11bc 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -124,7 +124,7 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
 	if (mapping == NULL)
 		mapping = &sdp->sd_aspace;
 
-	shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
+	shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
 	index = blkno >> shift;             /* convert block to page */
 	bufnum = blkno - (index << shift);  /* block buf index within page */
 
@@ -154,7 +154,7 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
 		map_bh(bh, sdp->sd_vfs, blkno);
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return bh;
 }
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a39891344259..ce7d69a2fdc0 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -701,7 +701,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
 	unsigned to_write = bytes, pg_off = off;
 	int done = 0;
 
-	blk = index << (PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift);
+	blk = index << (PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift);
 	boff = off % bsize;
 
 	page = find_or_create_page(mapping, index, GFP_NOFS);
@@ -753,13 +753,13 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
 	flush_dcache_page(page);
 	kunmap_atomic(kaddr);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return 0;
 
 unlock_out:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return -EIO;
 }
 
@@ -773,13 +773,13 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
 
 	nbytes = sizeof(struct gfs2_quota);
 
-	pg_beg = loc >> PAGE_CACHE_SHIFT;
-	pg_off = loc % PAGE_CACHE_SIZE;
+	pg_beg = loc >> PAGE_SHIFT;
+	pg_off = loc % PAGE_SIZE;
 
 	/* If the quota straddles a page boundary, split the write in two */
-	if ((pg_off + nbytes) > PAGE_CACHE_SIZE) {
+	if ((pg_off + nbytes) > PAGE_SIZE) {
 		pg_oflow = 1;
-		overflow = (pg_off + nbytes) - PAGE_CACHE_SIZE;
+		overflow = (pg_off + nbytes) - PAGE_SIZE;
 	}
 
 	ptr = qp;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 07c0265aa195..99a0bdac8796 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -918,9 +918,8 @@ static int read_rindex_entry(struct gfs2_inode *ip)
 		goto fail;
 
 	rgd->rd_gl->gl_object = rgd;
-	rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_CACHE_MASK;
-	rgd->rd_gl->gl_vm.end = PAGE_CACHE_ALIGN((rgd->rd_addr +
-						  rgd->rd_length) * bsize) - 1;
+	rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
+	rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
 	rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
 	rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
 	if (rgd->rd_data > sdp->sd_max_rg_data)
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 221719eac5de..d77d844b668b 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -278,14 +278,14 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
 
 	mapping = tree->inode->i_mapping;
 	off = (loff_t)cnid * tree->node_size;
-	block = off >> PAGE_CACHE_SHIFT;
-	node->page_offset = off & ~PAGE_CACHE_MASK;
+	block = off >> PAGE_SHIFT;
+	node->page_offset = off & ~PAGE_MASK;
 	for (i = 0; i < tree->pages_per_bnode; i++) {
 		page = read_mapping_page(mapping, block++, NULL);
 		if (IS_ERR(page))
 			goto fail;
 		if (PageError(page)) {
-			page_cache_release(page);
+			put_page(page);
 			goto fail;
 		}
 		node->page[i] = page;
@@ -401,7 +401,7 @@ void hfs_bnode_free(struct hfs_bnode *node)
 
 	for (i = 0; i < node->tree->pages_per_bnode; i++)
 		if (node->page[i])
-			page_cache_release(node->page[i]);
+			put_page(node->page[i]);
 	kfree(node);
 }
 
@@ -429,11 +429,11 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
 
 	pagep = node->page;
 	memset(kmap(*pagep) + node->page_offset, 0,
-	       min((int)PAGE_CACHE_SIZE, (int)tree->node_size));
+	       min((int)PAGE_SIZE, (int)tree->node_size));
 	set_page_dirty(*pagep);
 	kunmap(*pagep);
 	for (i = 1; i < tree->pages_per_bnode; i++) {
-		memset(kmap(*++pagep), 0, PAGE_CACHE_SIZE);
+		memset(kmap(*++pagep), 0, PAGE_SIZE);
 		set_page_dirty(*pagep);
 		kunmap(*pagep);
 	}
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 1ab19e660e69..37cdd955eceb 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -116,14 +116,14 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
 	}
 
 	tree->node_size_shift = ffs(size) - 1;
-	tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	tree->pages_per_bnode = (tree->node_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 	return tree;
 
 fail_page:
-	page_cache_release(page);
+	put_page(page);
 free_inode:
 	tree->inode->i_mapping->a_ops = &hfs_aops;
 	iput(tree->inode);
@@ -257,9 +257,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
 	off = off16;
 
 	off += node->page_offset;
-	pagep = node->page + (off >> PAGE_CACHE_SHIFT);
+	pagep = node->page + (off >> PAGE_SHIFT);
 	data = kmap(*pagep);
-	off &= ~PAGE_CACHE_MASK;
+	off &= ~PAGE_MASK;
 	idx = 0;
 
 	for (;;) {
@@ -279,7 +279,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
 					}
 				}
 			}
-			if (++off >= PAGE_CACHE_SIZE) {
+			if (++off >= PAGE_SIZE) {
 				kunmap(*pagep);
 				data = kmap(*++pagep);
 				off = 0;
@@ -302,9 +302,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
 		len = hfs_brec_lenoff(node, 0, &off16);
 		off = off16;
 		off += node->page_offset;
-		pagep = node->page + (off >> PAGE_CACHE_SHIFT);
+		pagep = node->page + (off >> PAGE_SHIFT);
 		data = kmap(*pagep);
-		off &= ~PAGE_CACHE_MASK;
+		off &= ~PAGE_MASK;
 	}
 }
 
@@ -348,9 +348,9 @@ void hfs_bmap_free(struct hfs_bnode *node)
 		len = hfs_brec_lenoff(node, 0, &off);
 	}
 	off += node->page_offset + nidx / 8;
-	page = node->page[off >> PAGE_CACHE_SHIFT];
+	page = node->page[off >> PAGE_SHIFT];
 	data = kmap(page);
-	off &= ~PAGE_CACHE_MASK;
+	off &= ~PAGE_MASK;
 	m = 1 << (~nidx & 7);
 	byte = data[off];
 	if (!(byte & m)) {
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 6686bf39a5b5..cb1e5faa2fb7 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -91,8 +91,8 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
 	if (!tree)
 		return 0;
 
-	if (tree->node_size >= PAGE_CACHE_SIZE) {
-		nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT);
+	if (tree->node_size >= PAGE_SIZE) {
+		nidx = page->index >> (tree->node_size_shift - PAGE_SHIFT);
 		spin_lock(&tree->hash_lock);
 		node = hfs_bnode_findhash(tree, nidx);
 		if (!node)
@@ -105,8 +105,8 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
 		}
 		spin_unlock(&tree->hash_lock);
 	} else {
-		nidx = page->index << (PAGE_CACHE_SHIFT - tree->node_size_shift);
-		i = 1 << (PAGE_CACHE_SHIFT - tree->node_size_shift);
+		nidx = page->index << (PAGE_SHIFT - tree->node_size_shift);
+		i = 1 << (PAGE_SHIFT - tree->node_size_shift);
 		spin_lock(&tree->hash_lock);
 		do {
 			node = hfs_bnode_findhash(tree, nidx++);
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index d2954451519e..c0ae274c0a22 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -13,7 +13,7 @@
 #include "hfsplus_fs.h"
 #include "hfsplus_raw.h"
 
-#define PAGE_CACHE_BITS	(PAGE_CACHE_SIZE * 8)
+#define PAGE_CACHE_BITS	(PAGE_SIZE * 8)
 
 int hfsplus_block_allocate(struct super_block *sb, u32 size,
 		u32 offset, u32 *max)
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 63924662aaf3..ce014ceb89ef 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -24,16 +24,16 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
 	int l;
 
 	off += node->page_offset;
-	pagep = node->page + (off >> PAGE_CACHE_SHIFT);
-	off &= ~PAGE_CACHE_MASK;
+	pagep = node->page + (off >> PAGE_SHIFT);
+	off &= ~PAGE_MASK;
 
-	l = min_t(int, len, PAGE_CACHE_SIZE - off);
+	l = min_t(int, len, PAGE_SIZE - off);
 	memcpy(buf, kmap(*pagep) + off, l);
 	kunmap(*pagep);
 
 	while ((len -= l) != 0) {
 		buf += l;
-		l = min_t(int, len, PAGE_CACHE_SIZE);
+		l = min_t(int, len, PAGE_SIZE);
 		memcpy(buf, kmap(*++pagep), l);
 		kunmap(*pagep);
 	}
@@ -77,17 +77,17 @@ void hfs_bnode_write(struct hfs_bnode *node, void *buf, int off, int len)
 	int l;
 
 	off += node->page_offset;
-	pagep = node->page + (off >> PAGE_CACHE_SHIFT);
-	off &= ~PAGE_CACHE_MASK;
+	pagep = node->page + (off >> PAGE_SHIFT);
+	off &= ~PAGE_MASK;
 
-	l = min_t(int, len, PAGE_CACHE_SIZE - off);
+	l = min_t(int, len, PAGE_SIZE - off);
 	memcpy(kmap(*pagep) + off, buf, l);
 	set_page_dirty(*pagep);
 	kunmap(*pagep);
 
 	while ((len -= l) != 0) {
 		buf += l;
-		l = min_t(int, len, PAGE_CACHE_SIZE);
+		l = min_t(int, len, PAGE_SIZE);
 		memcpy(kmap(*++pagep), buf, l);
 		set_page_dirty(*pagep);
 		kunmap(*pagep);
@@ -107,16 +107,16 @@ void hfs_bnode_clear(struct hfs_bnode *node, int off, int len)
 	int l;
 
 	off += node->page_offset;
-	pagep = node->page + (off >> PAGE_CACHE_SHIFT);
-	off &= ~PAGE_CACHE_MASK;
+	pagep = node->page + (off >> PAGE_SHIFT);
+	off &= ~PAGE_MASK;
 
-	l = min_t(int, len, PAGE_CACHE_SIZE - off);
+	l = min_t(int, len, PAGE_SIZE - off);
 	memset(kmap(*pagep) + off, 0, l);
 	set_page_dirty(*pagep);
 	kunmap(*pagep);
 
 	while ((len -= l) != 0) {
-		l = min_t(int, len, PAGE_CACHE_SIZE);
+		l = min_t(int, len, PAGE_SIZE);
 		memset(kmap(*++pagep), 0, l);
 		set_page_dirty(*pagep);
 		kunmap(*pagep);
@@ -136,20 +136,20 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst,
 	tree = src_node->tree;
 	src += src_node->page_offset;
 	dst += dst_node->page_offset;
-	src_page = src_node->page + (src >> PAGE_CACHE_SHIFT);
-	src &= ~PAGE_CACHE_MASK;
-	dst_page = dst_node->page + (dst >> PAGE_CACHE_SHIFT);
-	dst &= ~PAGE_CACHE_MASK;
+	src_page = src_node->page + (src >> PAGE_SHIFT);
+	src &= ~PAGE_MASK;
+	dst_page = dst_node->page + (dst >> PAGE_SHIFT);
+	dst &= ~PAGE_MASK;
 
 	if (src == dst) {
-		l = min_t(int, len, PAGE_CACHE_SIZE - src);
+		l = min_t(int, len, PAGE_SIZE - src);
 		memcpy(kmap(*dst_page) + src, kmap(*src_page) + src, l);
 		kunmap(*src_page);
 		set_page_dirty(*dst_page);
 		kunmap(*dst_page);
 
 		while ((len -= l) != 0) {
-			l = min_t(int, len, PAGE_CACHE_SIZE);
+			l = min_t(int, len, PAGE_SIZE);
 			memcpy(kmap(*++dst_page), kmap(*++src_page), l);
 			kunmap(*src_page);
 			set_page_dirty(*dst_page);
@@ -161,12 +161,12 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst,
 		do {
 			src_ptr = kmap(*src_page) + src;
 			dst_ptr = kmap(*dst_page) + dst;
-			if (PAGE_CACHE_SIZE - src < PAGE_CACHE_SIZE - dst) {
-				l = PAGE_CACHE_SIZE - src;
+			if (PAGE_SIZE - src < PAGE_SIZE - dst) {
+				l = PAGE_SIZE - src;
 				src = 0;
 				dst += l;
 			} else {
-				l = PAGE_CACHE_SIZE - dst;
+				l = PAGE_SIZE - dst;
 				src += l;
 				dst = 0;
 			}
@@ -195,11 +195,11 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
 	dst += node->page_offset;
 	if (dst > src) {
 		src += len - 1;
-		src_page = node->page + (src >> PAGE_CACHE_SHIFT);
-		src = (src & ~PAGE_CACHE_MASK) + 1;
+		src_page = node->page + (src >> PAGE_SHIFT);
+		src = (src & ~PAGE_MASK) + 1;
 		dst += len - 1;
-		dst_page = node->page + (dst >> PAGE_CACHE_SHIFT);
-		dst = (dst & ~PAGE_CACHE_MASK) + 1;
+		dst_page = node->page + (dst >> PAGE_SHIFT);
+		dst = (dst & ~PAGE_MASK) + 1;
 
 		if (src == dst) {
 			while (src < len) {
@@ -208,7 +208,7 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
 				set_page_dirty(*dst_page);
 				kunmap(*dst_page);
 				len -= src;
-				src = PAGE_CACHE_SIZE;
+				src = PAGE_SIZE;
 				src_page--;
 				dst_page--;
 			}
@@ -226,32 +226,32 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
 				dst_ptr = kmap(*dst_page) + dst;
 				if (src < dst) {
 					l = src;
-					src = PAGE_CACHE_SIZE;
+					src = PAGE_SIZE;
 					dst -= l;
 				} else {
 					l = dst;
 					src -= l;
-					dst = PAGE_CACHE_SIZE;
+					dst = PAGE_SIZE;
 				}
 				l = min(len, l);
 				memmove(dst_ptr - l, src_ptr - l, l);
 				kunmap(*src_page);
 				set_page_dirty(*dst_page);
 				kunmap(*dst_page);
-				if (dst == PAGE_CACHE_SIZE)
+				if (dst == PAGE_SIZE)
 					dst_page--;
 				else
 					src_page--;
 			} while ((len -= l));
 		}
 	} else {
-		src_page = node->page + (src >> PAGE_CACHE_SHIFT);
-		src &= ~PAGE_CACHE_MASK;
-		dst_page = node->page + (dst >> PAGE_CACHE_SHIFT);
-		dst &= ~PAGE_CACHE_MASK;
+		src_page = node->page + (src >> PAGE_SHIFT);
+		src &= ~PAGE_MASK;
+		dst_page = node->page + (dst >> PAGE_SHIFT);
+		dst &= ~PAGE_MASK;
 
 		if (src == dst) {
-			l = min_t(int, len, PAGE_CACHE_SIZE - src);
+			l = min_t(int, len, PAGE_SIZE - src);
 			memmove(kmap(*dst_page) + src,
 				kmap(*src_page) + src, l);
 			kunmap(*src_page);
@@ -259,7 +259,7 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
 			kunmap(*dst_page);
 
 			while ((len -= l) != 0) {
-				l = min_t(int, len, PAGE_CACHE_SIZE);
+				l = min_t(int, len, PAGE_SIZE);
 				memmove(kmap(*++dst_page),
 					kmap(*++src_page), l);
 				kunmap(*src_page);
@@ -272,13 +272,13 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
 			do {
 				src_ptr = kmap(*src_page) + src;
 				dst_ptr = kmap(*dst_page) + dst;
-				if (PAGE_CACHE_SIZE - src <
-						PAGE_CACHE_SIZE - dst) {
-					l = PAGE_CACHE_SIZE - src;
+				if (PAGE_SIZE - src <
+						PAGE_SIZE - dst) {
+					l = PAGE_SIZE - src;
 					src = 0;
 					dst += l;
 				} else {
-					l = PAGE_CACHE_SIZE - dst;
+					l = PAGE_SIZE - dst;
 					src += l;
 					dst = 0;
 				}
@@ -444,14 +444,14 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
 
 	mapping = tree->inode->i_mapping;
 	off = (loff_t)cnid << tree->node_size_shift;
-	block = off >> PAGE_CACHE_SHIFT;
-	node->page_offset = off & ~PAGE_CACHE_MASK;
+	block = off >> PAGE_SHIFT;
+	node->page_offset = off & ~PAGE_MASK;
 	for (i = 0; i < tree->pages_per_bnode; block++, i++) {
 		page = read_mapping_page(mapping, block, NULL);
 		if (IS_ERR(page))
 			goto fail;
 		if (PageError(page)) {
-			page_cache_release(page);
+			put_page(page);
 			goto fail;
 		}
 		node->page[i] = page;
@@ -569,7 +569,7 @@ void hfs_bnode_free(struct hfs_bnode *node)
 
 	for (i = 0; i < node->tree->pages_per_bnode; i++)
 		if (node->page[i])
-			page_cache_release(node->page[i]);
+			put_page(node->page[i]);
 	kfree(node);
 }
 
@@ -597,11 +597,11 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
 
 	pagep = node->page;
 	memset(kmap(*pagep) + node->page_offset, 0,
-	       min_t(int, PAGE_CACHE_SIZE, tree->node_size));
+	       min_t(int, PAGE_SIZE, tree->node_size));
 	set_page_dirty(*pagep);
 	kunmap(*pagep);
 	for (i = 1; i < tree->pages_per_bnode; i++) {
-		memset(kmap(*++pagep), 0, PAGE_CACHE_SIZE);
+		memset(kmap(*++pagep), 0, PAGE_SIZE);
 		set_page_dirty(*pagep);
 		kunmap(*pagep);
 	}
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 3345c7553edc..d9d1a36ba826 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -236,15 +236,15 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
 	tree->node_size_shift = ffs(size) - 1;
 
 	tree->pages_per_bnode =
-		(tree->node_size + PAGE_CACHE_SIZE - 1) >>
-		PAGE_CACHE_SHIFT;
+		(tree->node_size + PAGE_SIZE - 1) >>
+		PAGE_SHIFT;
 
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 	return tree;
 
  fail_page:
-	page_cache_release(page);
+	put_page(page);
  free_inode:
 	tree->inode->i_mapping->a_ops = &hfsplus_aops;
 	iput(tree->inode);
@@ -380,9 +380,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
 	off = off16;
 
 	off += node->page_offset;
-	pagep = node->page + (off >> PAGE_CACHE_SHIFT);
+	pagep = node->page + (off >> PAGE_SHIFT);
 	data = kmap(*pagep);
-	off &= ~PAGE_CACHE_MASK;
+	off &= ~PAGE_MASK;
 	idx = 0;
 
 	for (;;) {
@@ -403,7 +403,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
 					}
 				}
 			}
-			if (++off >= PAGE_CACHE_SIZE) {
+			if (++off >= PAGE_SIZE) {
 				kunmap(*pagep);
 				data = kmap(*++pagep);
 				off = 0;
@@ -426,9 +426,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
 		len = hfs_brec_lenoff(node, 0, &off16);
 		off = off16;
 		off += node->page_offset;
-		pagep = node->page + (off >> PAGE_CACHE_SHIFT);
+		pagep = node->page + (off >> PAGE_SHIFT);
 		data = kmap(*pagep);
-		off &= ~PAGE_CACHE_MASK;
+		off &= ~PAGE_MASK;
 	}
 }
 
@@ -475,9 +475,9 @@ void hfs_bmap_free(struct hfs_bnode *node)
 		len = hfs_brec_lenoff(node, 0, &off);
 	}
 	off += node->page_offset + nidx / 8;
-	page = node->page[off >> PAGE_CACHE_SHIFT];
+	page = node->page[off >> PAGE_SHIFT];
 	data = kmap(page);
-	off &= ~PAGE_CACHE_MASK;
+	off &= ~PAGE_MASK;
 	m = 1 << (~nidx & 7);
 	byte = data[off];
 	if (!(byte & m)) {
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 1a6394cdb54e..b28f39865c3a 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -87,9 +87,9 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
 	}
 	if (!tree)
 		return 0;
-	if (tree->node_size >= PAGE_CACHE_SIZE) {
+	if (tree->node_size >= PAGE_SIZE) {
 		nidx = page->index >>
-			(tree->node_size_shift - PAGE_CACHE_SHIFT);
+			(tree->node_size_shift - PAGE_SHIFT);
 		spin_lock(&tree->hash_lock);
 		node = hfs_bnode_findhash(tree, nidx);
 		if (!node)
@@ -103,8 +103,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
 		spin_unlock(&tree->hash_lock);
 	} else {
 		nidx = page->index <<
-			(PAGE_CACHE_SHIFT - tree->node_size_shift);
-		i = 1 << (PAGE_CACHE_SHIFT - tree->node_size_shift);
+			(PAGE_SHIFT - tree->node_size_shift);
+		i = 1 << (PAGE_SHIFT - tree->node_size_shift);
 		spin_lock(&tree->hash_lock);
 		do {
 			node = hfs_bnode_findhash(tree, nidx++);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 5d54490a136d..c35911362ff9 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -438,7 +438,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 	err = -EFBIG;
 	last_fs_block = sbi->total_blocks - 1;
 	last_fs_page = (last_fs_block << sbi->alloc_blksz_shift) >>
-			PAGE_CACHE_SHIFT;
+			PAGE_SHIFT;
 
 	if ((last_fs_block > (sector_t)(~0ULL) >> (sbi->alloc_blksz_shift - 9)) ||
 	    (last_fs_page > (pgoff_t)(~0ULL))) {
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index ab01530b4930..70e445ff0cff 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -220,7 +220,7 @@ check_attr_tree_state_again:
 
 	index = 0;
 	written = 0;
-	for (; written < node_size; index++, written += PAGE_CACHE_SIZE) {
+	for (; written < node_size; index++, written += PAGE_SIZE) {
 		void *kaddr;
 
 		page = read_mapping_page(mapping, index, NULL);
@@ -231,11 +231,11 @@ check_attr_tree_state_again:
 
 		kaddr = kmap_atomic(page);
 		memcpy(kaddr, buf + written,
-			min_t(size_t, PAGE_CACHE_SIZE, node_size - written));
+			min_t(size_t, PAGE_SIZE, node_size - written));
 		kunmap_atomic(kaddr);
 
 		set_page_dirty(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	hfsplus_mark_inode_dirty(attr_file, HFSPLUS_I_ATTR_DIRTY);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index d1abbee281d1..7016653f3e41 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -410,12 +410,12 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
 	struct inode *inode = mapping->host;
 	char *buffer;
 	loff_t base = page_offset(page);
-	int count = PAGE_CACHE_SIZE;
-	int end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	int count = PAGE_SIZE;
+	int end_index = inode->i_size >> PAGE_SHIFT;
 	int err;
 
 	if (page->index >= end_index)
-		count = inode->i_size & (PAGE_CACHE_SIZE-1);
+		count = inode->i_size & (PAGE_SIZE-1);
 
 	buffer = kmap(page);
 
@@ -447,7 +447,7 @@ static int hostfs_readpage(struct file *file, struct page *page)
 
 	buffer = kmap(page);
 	bytes_read = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
-			PAGE_CACHE_SIZE);
+			PAGE_SIZE);
 	if (bytes_read < 0) {
 		ClearPageUptodate(page);
 		SetPageError(page);
@@ -455,7 +455,7 @@ static int hostfs_readpage(struct file *file, struct page *page)
 		goto out;
 	}
 
-	memset(buffer + bytes_read, 0, PAGE_CACHE_SIZE - bytes_read);
+	memset(buffer + bytes_read, 0, PAGE_SIZE - bytes_read);
 
 	ClearPageError(page);
 	SetPageUptodate(page);
@@ -471,7 +471,7 @@ static int hostfs_write_begin(struct file *file, struct address_space *mapping,
 			      loff_t pos, unsigned len, unsigned flags,
 			      struct page **pagep, void **fsdata)
 {
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 
 	*pagep = grab_cache_page_write_begin(mapping, index, flags);
 	if (!*pagep)
@@ -485,14 +485,14 @@ static int hostfs_write_end(struct file *file, struct address_space *mapping,
 {
 	struct inode *inode = mapping->host;
 	void *buffer;
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from = pos & (PAGE_SIZE - 1);
 	int err;
 
 	buffer = kmap(page);
 	err = write_file(FILE_HOSTFS_I(file)->fd, &pos, buffer + from, copied);
 	kunmap(page);
 
-	if (!PageUptodate(page) && err == PAGE_CACHE_SIZE)
+	if (!PageUptodate(page) && err == PAGE_SIZE)
 		SetPageUptodate(page);
 
 	/*
@@ -502,7 +502,7 @@ static int hostfs_write_end(struct file *file, struct address_space *mapping,
 	if (err > 0 && (pos > inode->i_size))
 		inode->i_size = pos;
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return err;
 }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e1f465a389d5..afb7c7f05de5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -213,12 +213,12 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
 	int i, chunksize;
 
 	/* Find which 4k chunk and offset with in that chunk */
-	i = offset >> PAGE_CACHE_SHIFT;
-	offset = offset & ~PAGE_CACHE_MASK;
+	i = offset >> PAGE_SHIFT;
+	offset = offset & ~PAGE_MASK;
 
 	while (size) {
 		size_t n;
-		chunksize = PAGE_CACHE_SIZE;
+		chunksize = PAGE_SIZE;
 		if (offset)
 			chunksize -= offset;
 		if (chunksize > size)
@@ -285,7 +285,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
 			 * We have the page, copy it to user space buffer.
 			 */
 			copied = hugetlbfs_read_actor(page, offset, to, nr);
-			page_cache_release(page);
+			put_page(page);
 		}
 		offset += copied;
 		retval += copied;
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index f311bf084015..2e4e834d1a98 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -26,7 +26,7 @@
 #include "zisofs.h"
 
 /* This should probably be global. */
-static char zisofs_sink_page[PAGE_CACHE_SIZE];
+static char zisofs_sink_page[PAGE_SIZE];
 
 /*
  * This contains the zlib memory allocation and the mutex for the
@@ -70,11 +70,11 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
 		for ( i = 0 ; i < pcount ; i++ ) {
 			if (!pages[i])
 				continue;
-			memset(page_address(pages[i]), 0, PAGE_CACHE_SIZE);
+			memset(page_address(pages[i]), 0, PAGE_SIZE);
 			flush_dcache_page(pages[i]);
 			SetPageUptodate(pages[i]);
 		}
-		return ((loff_t)pcount) << PAGE_CACHE_SHIFT;
+		return ((loff_t)pcount) << PAGE_SHIFT;
 	}
 
 	/* Because zlib is not thread-safe, do all the I/O at the top. */
@@ -121,11 +121,11 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
 			if (pages[curpage]) {
 				stream.next_out = page_address(pages[curpage])
 						+ poffset;
-				stream.avail_out = PAGE_CACHE_SIZE - poffset;
+				stream.avail_out = PAGE_SIZE - poffset;
 				poffset = 0;
 			} else {
 				stream.next_out = (void *)&zisofs_sink_page;
-				stream.avail_out = PAGE_CACHE_SIZE;
+				stream.avail_out = PAGE_SIZE;
 			}
 		}
 		if (!stream.avail_in) {
@@ -220,14 +220,14 @@ static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
 	 * pages with the data we have anyway...
 	 */
 	start_off = page_offset(pages[full_page]);
-	end_off = min_t(loff_t, start_off + PAGE_CACHE_SIZE, inode->i_size);
+	end_off = min_t(loff_t, start_off + PAGE_SIZE, inode->i_size);
 
 	cstart_block = start_off >> zisofs_block_shift;
 	cend_block = (end_off + (1 << zisofs_block_shift) - 1)
 			>> zisofs_block_shift;
 
-	WARN_ON(start_off - (full_page << PAGE_CACHE_SHIFT) !=
-		((cstart_block << zisofs_block_shift) & PAGE_CACHE_MASK));
+	WARN_ON(start_off - (full_page << PAGE_SHIFT) !=
+		((cstart_block << zisofs_block_shift) & PAGE_MASK));
 
 	/* Find the pointer to this specific chunk */
 	/* Note: we're not using isonum_731() here because the data is known aligned */
@@ -260,10 +260,10 @@ static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
 		ret = zisofs_uncompress_block(inode, block_start, block_end,
 					      pcount, pages, poffset, &err);
 		poffset += ret;
-		pages += poffset >> PAGE_CACHE_SHIFT;
-		pcount -= poffset >> PAGE_CACHE_SHIFT;
-		full_page -= poffset >> PAGE_CACHE_SHIFT;
-		poffset &= ~PAGE_CACHE_MASK;
+		pages += poffset >> PAGE_SHIFT;
+		pcount -= poffset >> PAGE_SHIFT;
+		full_page -= poffset >> PAGE_SHIFT;
+		poffset &= ~PAGE_MASK;
 
 		if (err) {
 			brelse(bh);
@@ -282,7 +282,7 @@ static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
 
 	if (poffset && *pages) {
 		memset(page_address(*pages) + poffset, 0,
-		       PAGE_CACHE_SIZE - poffset);
+		       PAGE_SIZE - poffset);
 		flush_dcache_page(*pages);
 		SetPageUptodate(*pages);
 	}
@@ -302,12 +302,12 @@ static int zisofs_readpage(struct file *file, struct page *page)
 	int i, pcount, full_page;
 	unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
 	unsigned int zisofs_pages_per_cblock =
-		PAGE_CACHE_SHIFT <= zisofs_block_shift ?
-		(1 << (zisofs_block_shift - PAGE_CACHE_SHIFT)) : 0;
+		PAGE_SHIFT <= zisofs_block_shift ?
+		(1 << (zisofs_block_shift - PAGE_SHIFT)) : 0;
 	struct page *pages[max_t(unsigned, zisofs_pages_per_cblock, 1)];
 	pgoff_t index = page->index, end_index;
 
-	end_index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	end_index = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	/*
 	 * If this page is wholly outside i_size we just return zero;
 	 * do_generic_file_read() will handle this for us
@@ -318,7 +318,7 @@ static int zisofs_readpage(struct file *file, struct page *page)
 		return 0;
 	}
 
-	if (PAGE_CACHE_SHIFT <= zisofs_block_shift) {
+	if (PAGE_SHIFT <= zisofs_block_shift) {
 		/* We have already been given one page, this is the one
 		   we must do. */
 		full_page = index & (zisofs_pages_per_cblock - 1);
@@ -351,7 +351,7 @@ static int zisofs_readpage(struct file *file, struct page *page)
 			kunmap(pages[i]);
 			unlock_page(pages[i]);
 			if (i != full_page)
-				page_cache_release(pages[i]);
+				put_page(pages[i]);
 		}
 	}			
 
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index bcd2d41b318a..131dedc920d8 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1021,7 +1021,7 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock,
 		 * the page with useless information without generating any
 		 * I/O errors.
 		 */
-		if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
+		if (b_off > ((inode->i_size + PAGE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
 			printk(KERN_DEBUG "%s: block >= EOF (%lu, %llu)\n",
 				__func__, b_off,
 				(unsigned long long)inode->i_size);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 517f2de784cf..2ad98d6e19f4 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -81,11 +81,11 @@ static void release_buffer_page(struct buffer_head *bh)
 	if (!trylock_page(page))
 		goto nope;
 
-	page_cache_get(page);
+	get_page(page);
 	__brelse(bh);
 	try_to_free_buffers(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return;
 
 nope:
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index de73a9516a54..435f0b26ac20 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2221,7 +2221,7 @@ void jbd2_journal_ack_err(journal_t *journal)
 
 int jbd2_journal_blocks_per_page(struct inode *inode)
 {
-	return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+	return 1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
 }
 
 /*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 01e4652d88f6..67c103867bf8 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2263,7 +2263,7 @@ int jbd2_journal_invalidatepage(journal_t *journal,
 	struct buffer_head *head, *bh, *next;
 	unsigned int stop = offset + length;
 	unsigned int curr_off = 0;
-	int partial_page = (offset || length < PAGE_CACHE_SIZE);
+	int partial_page = (offset || length < PAGE_SIZE);
 	int may_free = 1;
 	int ret = 0;
 
@@ -2272,7 +2272,7 @@ int jbd2_journal_invalidatepage(journal_t *journal,
 	if (!page_has_buffers(page))
 		return 0;
 
-	BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
+	BUG_ON(stop > PAGE_SIZE || stop < length);
 
 	/* We will potentially be playing with lists other than just the
 	 * data lists (especially for journaled data mode), so be
diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c
index 1090eb64b90d..9d26b1b9fc01 100644
--- a/fs/jffs2/debug.c
+++ b/fs/jffs2/debug.c
@@ -95,15 +95,15 @@ __jffs2_dbg_fragtree_paranoia_check_nolock(struct jffs2_inode_info *f)
 			   rather than mucking around with actually reading the node
 			   and checking the compression type, which is the real way
 			   to tell a hole node. */
-			if (frag->ofs & (PAGE_CACHE_SIZE-1) && frag_prev(frag)
-					&& frag_prev(frag)->size < PAGE_CACHE_SIZE && frag_prev(frag)->node) {
+			if (frag->ofs & (PAGE_SIZE-1) && frag_prev(frag)
+					&& frag_prev(frag)->size < PAGE_SIZE && frag_prev(frag)->node) {
 				JFFS2_ERROR("REF_PRISTINE node at 0x%08x had a previous non-hole frag in the same page. Tell dwmw2.\n",
 					ref_offset(fn->raw));
 				bitched = 1;
 			}
 
-			if ((frag->ofs+frag->size) & (PAGE_CACHE_SIZE-1) && frag_next(frag)
-					&& frag_next(frag)->size < PAGE_CACHE_SIZE && frag_next(frag)->node) {
+			if ((frag->ofs+frag->size) & (PAGE_SIZE-1) && frag_next(frag)
+					&& frag_next(frag)->size < PAGE_SIZE && frag_next(frag)->node) {
 				JFFS2_ERROR("REF_PRISTINE node at 0x%08x (%08x-%08x) had a following non-hole frag in the same page. Tell dwmw2.\n",
 				       ref_offset(fn->raw), frag->ofs, frag->ofs+frag->size);
 				bitched = 1;
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index cad86bac3453..0e62dec3effc 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -87,14 +87,15 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
 	int ret;
 
 	jffs2_dbg(2, "%s(): ino #%lu, page at offset 0x%lx\n",
-		  __func__, inode->i_ino, pg->index << PAGE_CACHE_SHIFT);
+		  __func__, inode->i_ino, pg->index << PAGE_SHIFT);
 
 	BUG_ON(!PageLocked(pg));
 
 	pg_buf = kmap(pg);
 	/* FIXME: Can kmap fail? */
 
-	ret = jffs2_read_inode_range(c, f, pg_buf, pg->index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE);
+	ret = jffs2_read_inode_range(c, f, pg_buf, pg->index << PAGE_SHIFT,
+				     PAGE_SIZE);
 
 	if (ret) {
 		ClearPageUptodate(pg);
@@ -137,8 +138,8 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 	struct page *pg;
 	struct inode *inode = mapping->host;
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	uint32_t pageofs = index << PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
+	uint32_t pageofs = index << PAGE_SHIFT;
 	int ret = 0;
 
 	pg = grab_cache_page_write_begin(mapping, index, flags);
@@ -230,7 +231,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 
 out_page:
 	unlock_page(pg);
-	page_cache_release(pg);
+	put_page(pg);
 	return ret;
 }
 
@@ -245,14 +246,14 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 	struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
 	struct jffs2_raw_inode *ri;
-	unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned start = pos & (PAGE_SIZE - 1);
 	unsigned end = start + copied;
 	unsigned aligned_start = start & ~3;
 	int ret = 0;
 	uint32_t writtenlen = 0;
 
 	jffs2_dbg(1, "%s(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n",
-		  __func__, inode->i_ino, pg->index << PAGE_CACHE_SHIFT,
+		  __func__, inode->i_ino, pg->index << PAGE_SHIFT,
 		  start, end, pg->flags);
 
 	/* We need to avoid deadlock with page_cache_read() in
@@ -261,7 +262,7 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
 	   to re-lock it. */
 	BUG_ON(!PageUptodate(pg));
 
-	if (end == PAGE_CACHE_SIZE) {
+	if (end == PAGE_SIZE) {
 		/* When writing out the end of a page, write out the
 		   _whole_ page. This helps to reduce the number of
 		   nodes in files which have many short writes, like
@@ -275,7 +276,7 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
 		jffs2_dbg(1, "%s(): Allocation of raw inode failed\n",
 			  __func__);
 		unlock_page(pg);
-		page_cache_release(pg);
+		put_page(pg);
 		return -ENOMEM;
 	}
 
@@ -292,7 +293,7 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
 	kmap(pg);
 
 	ret = jffs2_write_inode_range(c, f, ri, page_address(pg) + aligned_start,
-				      (pg->index << PAGE_CACHE_SHIFT) + aligned_start,
+				      (pg->index << PAGE_SHIFT) + aligned_start,
 				      end - aligned_start, &writtenlen);
 
 	kunmap(pg);
@@ -329,6 +330,6 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
 	jffs2_dbg(1, "%s() returning %d\n",
 		  __func__, writtenlen > 0 ? writtenlen : ret);
 	unlock_page(pg);
-	page_cache_release(pg);
+	put_page(pg);
 	return writtenlen > 0 ? writtenlen : ret;
 }
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index bead25ae8fe4..ae2ebb26b446 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -586,8 +586,8 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
 		goto out_root;
 
 	sb->s_maxbytes = 0xFFFFFFFF;
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = JFFS2_SUPER_MAGIC;
 	if (!(sb->s_flags & MS_RDONLY))
 		jffs2_start_garbage_collect_thread(c);
@@ -685,7 +685,7 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c,
 	struct inode *inode = OFNI_EDONI_2SFFJ(f);
 	struct page *pg;
 
-	pg = read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SHIFT,
+	pg = read_cache_page(inode->i_mapping, offset >> PAGE_SHIFT,
 			     (void *)jffs2_do_readpage_unlock, inode);
 	if (IS_ERR(pg))
 		return (void *)pg;
@@ -701,7 +701,7 @@ void jffs2_gc_release_page(struct jffs2_sb_info *c,
 	struct page *pg = (void *)*priv;
 
 	kunmap(pg);
-	page_cache_release(pg);
+	put_page(pg);
 }
 
 static int jffs2_flash_setup(struct jffs2_sb_info *c) {
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 7e553f286775..9ed0f26cf023 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -552,7 +552,7 @@ static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_era
 				goto upnout;
 		}
 		/* We found a datanode. Do the GC */
-		if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
+		if((start >> PAGE_SHIFT) < ((end-1) >> PAGE_SHIFT)) {
 			/* It crosses a page boundary. Therefore, it must be a hole. */
 			ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
 		} else {
@@ -1192,8 +1192,8 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
 		struct jffs2_node_frag *frag;
 		uint32_t min, max;
 
-		min = start & ~(PAGE_CACHE_SIZE-1);
-		max = min + PAGE_CACHE_SIZE;
+		min = start & ~(PAGE_SIZE-1);
+		max = min + PAGE_SIZE;
 
 		frag = jffs2_lookup_node_frag(&f->fragtree, start);
 
@@ -1351,7 +1351,7 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
 		cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
 		datalen = end - offset;
 
-		writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
+		writebuf = pg_ptr + (offset & (PAGE_SIZE -1));
 
 		comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
 
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 9a5449bc3afb..b86c78d178c6 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -90,7 +90,7 @@ uint32_t jffs2_truncate_fragtree(struct jffs2_sb_info *c, struct rb_root *list,
 
 	/* If the last fragment starts at the RAM page boundary, it is
 	 * REF_PRISTINE irrespective of its size. */
-	if (frag->node && (frag->ofs & (PAGE_CACHE_SIZE - 1)) == 0) {
+	if (frag->node && (frag->ofs & (PAGE_SIZE - 1)) == 0) {
 		dbg_fragtree2("marking the last fragment 0x%08x-0x%08x REF_PRISTINE.\n",
 			frag->ofs, frag->ofs + frag->size);
 		frag->node->raw->flash_offset = ref_offset(frag->node->raw) | REF_PRISTINE;
@@ -237,7 +237,7 @@ static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *r
 		   If so, both 'this' and the new node get marked REF_NORMAL so
 		   the GC can take a look.
 		*/
-		if (lastend && (lastend-1) >> PAGE_CACHE_SHIFT == newfrag->ofs >> PAGE_CACHE_SHIFT) {
+		if (lastend && (lastend-1) >> PAGE_SHIFT == newfrag->ofs >> PAGE_SHIFT) {
 			if (this->node)
 				mark_ref_normal(this->node->raw);
 			mark_ref_normal(newfrag->node->raw);
@@ -382,7 +382,7 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in
 
 	/* If we now share a page with other nodes, mark either previous
 	   or next node REF_NORMAL, as appropriate.  */
-	if (newfrag->ofs & (PAGE_CACHE_SIZE-1)) {
+	if (newfrag->ofs & (PAGE_SIZE-1)) {
 		struct jffs2_node_frag *prev = frag_prev(newfrag);
 
 		mark_ref_normal(fn->raw);
@@ -391,7 +391,7 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in
 			mark_ref_normal(prev->node->raw);
 	}
 
-	if ((newfrag->ofs+newfrag->size) & (PAGE_CACHE_SIZE-1)) {
+	if ((newfrag->ofs+newfrag->size) & (PAGE_SIZE-1)) {
 		struct jffs2_node_frag *next = frag_next(newfrag);
 
 		if (next) {
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index b634de4c8101..7fb187ab2682 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -172,8 +172,8 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
 	   beginning of a page and runs to the end of the file, or if
 	   it's a hole node, mark it REF_PRISTINE, else REF_NORMAL.
 	*/
-	if ((je32_to_cpu(ri->dsize) >= PAGE_CACHE_SIZE) ||
-	    ( ((je32_to_cpu(ri->offset)&(PAGE_CACHE_SIZE-1))==0) &&
+	if ((je32_to_cpu(ri->dsize) >= PAGE_SIZE) ||
+	    ( ((je32_to_cpu(ri->offset)&(PAGE_SIZE-1))==0) &&
 	      (je32_to_cpu(ri->dsize)+je32_to_cpu(ri->offset) ==  je32_to_cpu(ri->isize)))) {
 		flash_ofs |= REF_PRISTINE;
 	} else {
@@ -366,7 +366,8 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
 			break;
 		}
 		mutex_lock(&f->sem);
-		datalen = min_t(uint32_t, writelen, PAGE_CACHE_SIZE - (offset & (PAGE_CACHE_SIZE-1)));
+		datalen = min_t(uint32_t, writelen,
+				PAGE_SIZE - (offset & (PAGE_SIZE-1)));
 		cdatalen = min_t(uint32_t, alloclen - sizeof(*ri), datalen);
 
 		comprtype = jffs2_compress(c, f, buf, &comprbuf, &datalen, &cdatalen);
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index a3eb316b1ac3..b60e015cc757 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -80,7 +80,7 @@ static inline void lock_metapage(struct metapage *mp)
 static struct kmem_cache *metapage_cache;
 static mempool_t *metapage_mempool;
 
-#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
+#define MPS_PER_PAGE (PAGE_SIZE >> L2PSIZE)
 
 #if MPS_PER_PAGE > 1
 
@@ -316,7 +316,7 @@ static void last_write_complete(struct page *page)
 	struct metapage *mp;
 	unsigned int offset;
 
-	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+	for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
 		mp = page_to_mp(page, offset);
 		if (mp && test_bit(META_io, &mp->flag)) {
 			if (mp->lsn)
@@ -366,12 +366,12 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
 	int bad_blocks = 0;
 
 	page_start = (sector_t)page->index <<
-		     (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		     (PAGE_SHIFT - inode->i_blkbits);
 	BUG_ON(!PageLocked(page));
 	BUG_ON(PageWriteback(page));
 	set_page_writeback(page);
 
-	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+	for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
 		mp = page_to_mp(page, offset);
 
 		if (!mp || !test_bit(META_dirty, &mp->flag))
@@ -416,7 +416,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
 			bio = NULL;
 		} else
 			inc_io(page);
-		xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits;
+		xlen = (PAGE_SIZE - offset) >> inode->i_blkbits;
 		pblock = metapage_get_blocks(inode, lblock, &xlen);
 		if (!pblock) {
 			printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
@@ -485,7 +485,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
 	struct inode *inode = page->mapping->host;
 	struct bio *bio = NULL;
 	int block_offset;
-	int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
+	int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
 	sector_t page_start;	/* address of page in fs blocks */
 	sector_t pblock;
 	int xlen;
@@ -494,7 +494,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
 
 	BUG_ON(!PageLocked(page));
 	page_start = (sector_t)page->index <<
-		     (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		     (PAGE_SHIFT - inode->i_blkbits);
 
 	block_offset = 0;
 	while (block_offset < blocks_per_page) {
@@ -542,7 +542,7 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
 	int ret = 1;
 	int offset;
 
-	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+	for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
 		mp = page_to_mp(page, offset);
 
 		if (!mp)
@@ -568,7 +568,7 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
 static void metapage_invalidatepage(struct page *page, unsigned int offset,
 				    unsigned int length)
 {
-	BUG_ON(offset || length < PAGE_CACHE_SIZE);
+	BUG_ON(offset || length < PAGE_SIZE);
 
 	BUG_ON(PageWriteback(page));
 
@@ -599,10 +599,10 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 		 inode->i_ino, lblock, absolute);
 
 	l2bsize = inode->i_blkbits;
-	l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
+	l2BlocksPerPage = PAGE_SHIFT - l2bsize;
 	page_index = lblock >> l2BlocksPerPage;
 	page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize;
-	if ((page_offset + size) > PAGE_CACHE_SIZE) {
+	if ((page_offset + size) > PAGE_SIZE) {
 		jfs_err("MetaData crosses page boundary!!");
 		jfs_err("lblock = %lx, size  = %d", lblock, size);
 		dump_stack();
@@ -621,7 +621,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 		mapping = inode->i_mapping;
 	}
 
-	if (new && (PSIZE == PAGE_CACHE_SIZE)) {
+	if (new && (PSIZE == PAGE_SIZE)) {
 		page = grab_cache_page(mapping, page_index);
 		if (!page) {
 			jfs_err("grab_cache_page failed!");
@@ -693,7 +693,7 @@ unlock:
 void grab_metapage(struct metapage * mp)
 {
 	jfs_info("grab_metapage: mp = 0x%p", mp);
-	page_cache_get(mp->page);
+	get_page(mp->page);
 	lock_page(mp->page);
 	mp->count++;
 	lock_metapage(mp);
@@ -706,12 +706,12 @@ void force_metapage(struct metapage *mp)
 	jfs_info("force_metapage: mp = 0x%p", mp);
 	set_bit(META_forcewrite, &mp->flag);
 	clear_bit(META_sync, &mp->flag);
-	page_cache_get(page);
+	get_page(page);
 	lock_page(page);
 	set_page_dirty(page);
 	write_one_page(page, 1);
 	clear_bit(META_forcewrite, &mp->flag);
-	page_cache_release(page);
+	put_page(page);
 }
 
 void hold_metapage(struct metapage *mp)
@@ -726,7 +726,7 @@ void put_metapage(struct metapage *mp)
 		unlock_page(mp->page);
 		return;
 	}
-	page_cache_get(mp->page);
+	get_page(mp->page);
 	mp->count++;
 	lock_metapage(mp);
 	unlock_page(mp->page);
@@ -746,7 +746,7 @@ void release_metapage(struct metapage * mp)
 	assert(mp->count);
 	if (--mp->count || mp->nohomeok) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		return;
 	}
 
@@ -764,13 +764,13 @@ void release_metapage(struct metapage * mp)
 	drop_metapage(page, mp);
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 void __invalidate_metapages(struct inode *ip, s64 addr, int len)
 {
 	sector_t lblock;
-	int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
+	int l2BlocksPerPage = PAGE_SHIFT - ip->i_blkbits;
 	int BlocksPerPage = 1 << l2BlocksPerPage;
 	/* All callers are interested in block device's mapping */
 	struct address_space *mapping =
@@ -788,7 +788,7 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len)
 		page = find_lock_page(mapping, lblock >> l2BlocksPerPage);
 		if (!page)
 			continue;
-		for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+		for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
 			mp = page_to_mp(page, offset);
 			if (!mp)
 				continue;
@@ -803,7 +803,7 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len)
 				remove_from_logsync(mp);
 		}
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 }
 
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index 337e9e51ac06..a869fb4a20d6 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -106,7 +106,7 @@ static inline void metapage_nohomeok(struct metapage *mp)
 	lock_page(page);
 	if (!mp->nohomeok++) {
 		mark_metapage_dirty(mp);
-		page_cache_get(page);
+		get_page(page);
 		wait_on_page_writeback(page);
 	}
 	unlock_page(page);
@@ -128,7 +128,7 @@ static inline void metapage_wait_for_io(struct metapage *mp)
 static inline void _metapage_homeok(struct metapage *mp)
 {
 	if (!--mp->nohomeok)
-		page_cache_release(mp->page);
+		put_page(mp->page);
 }
 
 static inline void metapage_homeok(struct metapage *mp)
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4f5d85ba8e23..78d599198bf5 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -596,7 +596,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	 * Page cache is indexed by long.
 	 * I would use MAX_LFS_FILESIZE, but it's only half as big
 	 */
-	sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1,
+	sb->s_maxbytes = min(((u64) PAGE_SIZE << 32) - 1,
 			     (u64)sb->s_maxbytes);
 #endif
 	sb->s_time_gran = 1;
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index b67dbccdaf88..f73541fbe7af 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -138,8 +138,8 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
 	struct dentry *root;
 
 	info->sb = sb;
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = magic;
 	sb->s_op = &kernfs_sops;
 	sb->s_time_gran = 1;
diff --git a/fs/libfs.c b/fs/libfs.c
index 0ca80b2af420..f3fa82ce9b70 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -25,7 +25,7 @@ int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
 {
 	struct inode *inode = d_inode(dentry);
 	generic_fillattr(inode, stat);
-	stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9);
+	stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
 	return 0;
 }
 EXPORT_SYMBOL(simple_getattr);
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(simple_getattr);
 int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	buf->f_type = dentry->d_sb->s_magic;
-	buf->f_bsize = PAGE_CACHE_SIZE;
+	buf->f_bsize = PAGE_SIZE;
 	buf->f_namelen = NAME_MAX;
 	return 0;
 }
@@ -395,7 +395,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
 	struct page *page;
 	pgoff_t index;
 
-	index = pos >> PAGE_CACHE_SHIFT;
+	index = pos >> PAGE_SHIFT;
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
@@ -403,10 +403,10 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
 
 	*pagep = page;
 
-	if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
-		unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	if (!PageUptodate(page) && (len != PAGE_SIZE)) {
+		unsigned from = pos & (PAGE_SIZE - 1);
 
-		zero_user_segments(page, 0, from, from + len, PAGE_CACHE_SIZE);
+		zero_user_segments(page, 0, from, from + len, PAGE_SIZE);
 	}
 	return 0;
 }
@@ -442,7 +442,7 @@ int simple_write_end(struct file *file, struct address_space *mapping,
 
 	/* zero the stale part of the page if we did a short copy */
 	if (copied < len) {
-		unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+		unsigned from = pos & (PAGE_SIZE - 1);
 
 		zero_user(page, from + copied, len - copied);
 	}
@@ -458,7 +458,7 @@ int simple_write_end(struct file *file, struct address_space *mapping,
 
 	set_page_dirty(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return copied;
 }
@@ -477,8 +477,8 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
 	struct dentry *dentry;
 	int i;
 
-	s->s_blocksize = PAGE_CACHE_SIZE;
-	s->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	s->s_blocksize = PAGE_SIZE;
+	s->s_blocksize_bits = PAGE_SHIFT;
 	s->s_magic = magic;
 	s->s_op = &simple_super_operations;
 	s->s_time_gran = 1;
@@ -994,12 +994,12 @@ int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks)
 {
 	u64 last_fs_block = num_blocks - 1;
 	u64 last_fs_page =
-		last_fs_block >> (PAGE_CACHE_SHIFT - blocksize_bits);
+		last_fs_block >> (PAGE_SHIFT - blocksize_bits);
 
 	if (unlikely(num_blocks == 0))
 		return 0;
 
-	if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT))
+	if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT))
 		return -EINVAL;
 
 	if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index a709d80c8ebc..cc26f8f215f5 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -64,7 +64,7 @@ static void writeseg_end_io(struct bio *bio)
 
 	bio_for_each_segment_all(bvec, bio, i) {
 		end_page_writeback(bvec->bv_page);
-		page_cache_release(bvec->bv_page);
+		put_page(bvec->bv_page);
 	}
 	bio_put(bio);
 	if (atomic_dec_and_test(&super->s_pending_writes))
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 9c501449450d..b76a62b1978f 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -46,9 +46,9 @@ static int loffs_mtd_write(struct super_block *sb, loff_t ofs, size_t len,
 
 	BUG_ON((ofs >= mtd->size) || (len > mtd->size - ofs));
 	BUG_ON(ofs != (ofs >> super->s_writeshift) << super->s_writeshift);
-	BUG_ON(len > PAGE_CACHE_SIZE);
-	page_start = ofs & PAGE_CACHE_MASK;
-	page_end = PAGE_CACHE_ALIGN(ofs + len) - 1;
+	BUG_ON(len > PAGE_SIZE);
+	page_start = ofs & PAGE_MASK;
+	page_end = PAGE_ALIGN(ofs + len) - 1;
 	ret = mtd_write(mtd, ofs, len, &retlen, buf);
 	if (ret || (retlen != len))
 		return -EIO;
@@ -82,7 +82,7 @@ static int logfs_mtd_erase_mapping(struct super_block *sb, loff_t ofs,
 		if (!page)
 			continue;
 		memset(page_address(page), 0xFF, PAGE_SIZE);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return 0;
 }
@@ -195,7 +195,7 @@ static int __logfs_mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
 		err = loffs_mtd_write(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
 					page_address(page));
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		if (err)
 			return err;
 	}
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 542468e9bfb4..ddbed2be5366 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -183,7 +183,7 @@ static struct page *logfs_get_dd_page(struct inode *dir, struct dentry *dentry)
 		if (name->len != be16_to_cpu(dd->namelen) ||
 				memcmp(name->name, dd->name, name->len)) {
 			kunmap_atomic(dd);
-			page_cache_release(page);
+			put_page(page);
 			continue;
 		}
 
@@ -238,7 +238,7 @@ static int logfs_unlink(struct inode *dir, struct dentry *dentry)
 		return PTR_ERR(page);
 	}
 	index = page->index;
-	page_cache_release(page);
+	put_page(page);
 
 	mutex_lock(&super->s_dirop_mutex);
 	logfs_add_transaction(dir, ta);
@@ -316,7 +316,7 @@ static int logfs_readdir(struct file *file, struct dir_context *ctx)
 				be16_to_cpu(dd->namelen),
 				be64_to_cpu(dd->ino), dd->type);
 		kunmap(page);
-		page_cache_release(page);
+		put_page(page);
 		if (full)
 			break;
 	}
@@ -349,7 +349,7 @@ static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
 	dd = kmap_atomic(page);
 	ino = be64_to_cpu(dd->ino);
 	kunmap_atomic(dd);
-	page_cache_release(page);
+	put_page(page);
 
 	inode = logfs_iget(dir->i_sb, ino);
 	if (IS_ERR(inode))
@@ -392,7 +392,7 @@ static int logfs_write_dir(struct inode *dir, struct dentry *dentry,
 
 		err = logfs_write_buf(dir, page, WF_LOCK);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		if (!err)
 			grow_dir(dir, index);
 		return err;
@@ -561,7 +561,7 @@ static int logfs_get_dd(struct inode *dir, struct dentry *dentry,
 	map = kmap_atomic(page);
 	memcpy(dd, map, sizeof(*dd));
 	kunmap_atomic(map);
-	page_cache_release(page);
+	put_page(page);
 	return 0;
 }
 
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 61eaeb1b6cac..f01ddfb1a03b 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -15,21 +15,21 @@ static int logfs_write_begin(struct file *file, struct address_space *mapping,
 {
 	struct inode *inode = mapping->host;
 	struct page *page;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
 
-	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
+	if ((len == PAGE_SIZE) || PageUptodate(page))
 		return 0;
-	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
-		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+	if ((pos & PAGE_MASK) >= i_size_read(inode)) {
+		unsigned start = pos & (PAGE_SIZE - 1);
 		unsigned end = start + len;
 
 		/* Reading beyond i_size is simple: memset to zero */
-		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
+		zero_user_segments(page, 0, start, end, PAGE_SIZE);
 		return 0;
 	}
 	return logfs_readpage_nolock(page);
@@ -41,11 +41,11 @@ static int logfs_write_end(struct file *file, struct address_space *mapping,
 {
 	struct inode *inode = mapping->host;
 	pgoff_t index = page->index;
-	unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned start = pos & (PAGE_SIZE - 1);
 	unsigned end = start + copied;
 	int ret = 0;
 
-	BUG_ON(PAGE_CACHE_SIZE != inode->i_sb->s_blocksize);
+	BUG_ON(PAGE_SIZE != inode->i_sb->s_blocksize);
 	BUG_ON(page->index > I3_BLOCKS);
 
 	if (copied < len) {
@@ -61,8 +61,8 @@ static int logfs_write_end(struct file *file, struct address_space *mapping,
 	if (copied == 0)
 		goto out; /* FIXME: do we need to update inode? */
 
-	if (i_size_read(inode) < (index << PAGE_CACHE_SHIFT) + end) {
-		i_size_write(inode, (index << PAGE_CACHE_SHIFT) + end);
+	if (i_size_read(inode) < (index << PAGE_SHIFT) + end) {
+		i_size_write(inode, (index << PAGE_SHIFT) + end);
 		mark_inode_dirty_sync(inode);
 	}
 
@@ -75,7 +75,7 @@ static int logfs_write_end(struct file *file, struct address_space *mapping,
 	}
 out:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return ret ? ret : copied;
 }
 
@@ -118,7 +118,7 @@ static int logfs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
 	loff_t i_size = i_size_read(inode);
-	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	pgoff_t end_index = i_size >> PAGE_SHIFT;
 	unsigned offset;
 	u64 bix;
 	level_t level;
@@ -142,7 +142,7 @@ static int logfs_writepage(struct page *page, struct writeback_control *wbc)
 		return __logfs_writepage(page);
 
 	 /* Is the page fully outside i_size? (truncate in progress) */
-	offset = i_size & (PAGE_CACHE_SIZE-1);
+	offset = i_size & (PAGE_SIZE-1);
 	if (bix > end_index || offset == 0) {
 		unlock_page(page);
 		return 0; /* don't care */
@@ -155,7 +155,7 @@ static int logfs_writepage(struct page *page, struct writeback_control *wbc)
 	 * the  page size, the remaining memory is zeroed when mapped, and
 	 * writes to that region are not written out to the file."
 	 */
-	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+	zero_user_segment(page, offset, PAGE_SIZE);
 	return __logfs_writepage(page);
 }
 
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 20973c9e52f8..3fb8c6d67303 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -281,7 +281,7 @@ static struct page *logfs_get_read_page(struct inode *inode, u64 bix,
 static void logfs_put_read_page(struct page *page)
 {
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 static void logfs_lock_write_page(struct page *page)
@@ -323,7 +323,7 @@ repeat:
 			return NULL;
 		err = add_to_page_cache_lru(page, mapping, index, GFP_NOFS);
 		if (unlikely(err)) {
-			page_cache_release(page);
+			put_page(page);
 			if (err == -EEXIST)
 				goto repeat;
 			return NULL;
@@ -342,7 +342,7 @@ static void logfs_unlock_write_page(struct page *page)
 static void logfs_put_write_page(struct page *page)
 {
 	logfs_unlock_write_page(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 static struct page *logfs_get_page(struct inode *inode, u64 bix, level_t level,
@@ -562,7 +562,7 @@ static void indirect_free_block(struct super_block *sb,
 
 	if (PagePrivate(page)) {
 		ClearPagePrivate(page);
-		page_cache_release(page);
+		put_page(page);
 		set_page_private(page, 0);
 	}
 	__free_block(sb, block);
@@ -655,7 +655,7 @@ static void alloc_data_block(struct inode *inode, struct page *page)
 	block->page = page;
 
 	SetPagePrivate(page);
-	page_cache_get(page);
+	get_page(page);
 	set_page_private(page, (unsigned long) block);
 
 	block->ops = &indirect_block_ops;
@@ -709,7 +709,7 @@ static u64 block_get_pointer(struct page *page, int index)
 
 static int logfs_read_empty(struct page *page)
 {
-	zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+	zero_user_segment(page, 0, PAGE_SIZE);
 	return 0;
 }
 
@@ -1660,7 +1660,7 @@ static int truncate_data_block(struct inode *inode, struct page *page,
 	if (err)
 		return err;
 
-	zero_user_segment(page, size - pageofs, PAGE_CACHE_SIZE);
+	zero_user_segment(page, size - pageofs, PAGE_SIZE);
 	return logfs_segment_write(inode, page, shadow);
 }
 
@@ -1919,7 +1919,7 @@ static void move_page_to_inode(struct inode *inode, struct page *page)
 	block->page = NULL;
 	if (PagePrivate(page)) {
 		ClearPagePrivate(page);
-		page_cache_release(page);
+		put_page(page);
 		set_page_private(page, 0);
 	}
 }
@@ -1940,7 +1940,7 @@ static void move_inode_to_page(struct page *page, struct inode *inode)
 
 	if (!PagePrivate(page)) {
 		SetPagePrivate(page);
-		page_cache_get(page);
+		get_page(page);
 		set_page_private(page, (unsigned long) block);
 	}
 
@@ -1971,7 +1971,7 @@ int logfs_read_inode(struct inode *inode)
 	logfs_disk_to_inode(di, inode);
 	kunmap_atomic(di);
 	move_page_to_inode(inode, page);
-	page_cache_release(page);
+	put_page(page);
 	return 0;
 }
 
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index d270e4b2ab6b..1efd6055f4b0 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -90,9 +90,9 @@ int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
 
 		if (!PagePrivate(page)) {
 			SetPagePrivate(page);
-			page_cache_get(page);
+			get_page(page);
 		}
-		page_cache_release(page);
+		put_page(page);
 
 		buf += copylen;
 		len -= copylen;
@@ -117,9 +117,9 @@ static void pad_partial_page(struct logfs_area *area)
 		memset(page_address(page) + offset, 0xff, len);
 		if (!PagePrivate(page)) {
 			SetPagePrivate(page);
-			page_cache_get(page);
+			get_page(page);
 		}
-		page_cache_release(page);
+		put_page(page);
 	}
 }
 
@@ -129,20 +129,20 @@ static void pad_full_pages(struct logfs_area *area)
 	struct logfs_super *super = logfs_super(sb);
 	u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
 	u32 len = super->s_segsize - area->a_used_bytes;
-	pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT;
-	pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT;
+	pgoff_t index = PAGE_ALIGN(ofs) >> PAGE_SHIFT;
+	pgoff_t no_indizes = len >> PAGE_SHIFT;
 	struct page *page;
 
 	while (no_indizes) {
 		page = get_mapping_page(sb, index, 0);
 		BUG_ON(!page); /* FIXME: reserve a pool */
 		SetPageUptodate(page);
-		memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
+		memset(page_address(page), 0xff, PAGE_SIZE);
 		if (!PagePrivate(page)) {
 			SetPagePrivate(page);
-			page_cache_get(page);
+			get_page(page);
 		}
-		page_cache_release(page);
+		put_page(page);
 		index++;
 		no_indizes--;
 	}
@@ -411,7 +411,7 @@ int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf)
 		if (IS_ERR(page))
 			return PTR_ERR(page);
 		memcpy(buf, page_address(page) + offset, copylen);
-		page_cache_release(page);
+		put_page(page);
 
 		buf += copylen;
 		len -= copylen;
@@ -499,7 +499,7 @@ static void move_btree_to_page(struct inode *inode, struct page *page,
 
 	if (!PagePrivate(page)) {
 		SetPagePrivate(page);
-		page_cache_get(page);
+		get_page(page);
 		set_page_private(page, (unsigned long) block);
 	}
 	block->ops = &indirect_block_ops;
@@ -554,7 +554,7 @@ void move_page_to_btree(struct page *page)
 
 	if (PagePrivate(page)) {
 		ClearPagePrivate(page);
-		page_cache_release(page);
+		put_page(page);
 		set_page_private(page, 0);
 	}
 	block->ops = &btree_block_ops;
@@ -723,9 +723,9 @@ void freeseg(struct super_block *sb, u32 segno)
 			continue;
 		if (PagePrivate(page)) {
 			ClearPagePrivate(page);
-			page_cache_release(page);
+			put_page(page);
 		}
-		page_cache_release(page);
+		put_page(page);
 	}
 }
 
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 54360293bcb5..5751082dba52 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -48,7 +48,7 @@ void emergency_read_end(struct page *page)
 	if (page == emergency_page)
 		mutex_unlock(&emergency_mutex);
 	else
-		page_cache_release(page);
+		put_page(page);
 }
 
 static void dump_segfile(struct super_block *sb)
@@ -206,7 +206,7 @@ static int write_one_sb(struct super_block *sb,
 	logfs_set_segment_erased(sb, segno, ec, 0);
 	logfs_write_ds(sb, ds, segno, ec);
 	err = super->s_devops->write_sb(sb, page);
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 
@@ -366,24 +366,24 @@ static struct page *find_super_block(struct super_block *sb)
 		return NULL;
 	last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
 	if (!last || IS_ERR(last)) {
-		page_cache_release(first);
+		put_page(first);
 		return NULL;
 	}
 
 	if (!logfs_check_ds(page_address(first))) {
-		page_cache_release(last);
+		put_page(last);
 		return first;
 	}
 
 	/* First one didn't work, try the second superblock */
 	if (!logfs_check_ds(page_address(last))) {
-		page_cache_release(first);
+		put_page(first);
 		return last;
 	}
 
 	/* Neither worked, sorry folks */
-	page_cache_release(first);
-	page_cache_release(last);
+	put_page(first);
+	put_page(last);
 	return NULL;
 }
 
@@ -425,7 +425,7 @@ static int __logfs_read_sb(struct super_block *sb)
 	super->s_data_levels = ds->ds_data_levels;
 	super->s_total_levels = super->s_ifile_levels + super->s_iblock_levels
 		+ super->s_data_levels;
-	page_cache_release(page);
+	put_page(page);
 	return 0;
 }
 
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index d19ac258105a..33957c07cd11 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -28,7 +28,7 @@ const struct file_operations minix_dir_operations = {
 static inline void dir_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 /*
@@ -38,10 +38,10 @@ static inline void dir_put_page(struct page *page)
 static unsigned
 minix_last_byte(struct inode *inode, unsigned long page_nr)
 {
-	unsigned last_byte = PAGE_CACHE_SIZE;
+	unsigned last_byte = PAGE_SIZE;
 
-	if (page_nr == (inode->i_size >> PAGE_CACHE_SHIFT))
-		last_byte = inode->i_size & (PAGE_CACHE_SIZE - 1);
+	if (page_nr == (inode->i_size >> PAGE_SHIFT))
+		last_byte = inode->i_size & (PAGE_SIZE - 1);
 	return last_byte;
 }
 
@@ -92,8 +92,8 @@ static int minix_readdir(struct file *file, struct dir_context *ctx)
 	if (pos >= inode->i_size)
 		return 0;
 
-	offset = pos & ~PAGE_CACHE_MASK;
-	n = pos >> PAGE_CACHE_SHIFT;
+	offset = pos & ~PAGE_MASK;
+	n = pos >> PAGE_SHIFT;
 
 	for ( ; n < npages; n++, offset = 0) {
 		char *p, *kaddr, *limit;
@@ -229,7 +229,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
 		lock_page(page);
 		kaddr = (char*)page_address(page);
 		dir_end = kaddr + minix_last_byte(dir, n);
-		limit = kaddr + PAGE_CACHE_SIZE - sbi->s_dirsize;
+		limit = kaddr + PAGE_SIZE - sbi->s_dirsize;
 		for (p = kaddr; p <= limit; p = minix_next_entry(p, sbi)) {
 			de = (minix_dirent *)p;
 			de3 = (minix3_dirent *)p;
@@ -327,7 +327,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
 	}
 
 	kaddr = kmap_atomic(page);
-	memset(kaddr, 0, PAGE_CACHE_SIZE);
+	memset(kaddr, 0, PAGE_SIZE);
 
 	if (sbi->s_version == MINIX_V3) {
 		minix3_dirent *de3 = (minix3_dirent *)kaddr;
@@ -350,7 +350,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
 
 	err = dir_commit_chunk(page, 0, 2 * sbi->s_dirsize);
 fail:
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index a795a11e50c7..2887d1d95ce2 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -243,11 +243,11 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
 out_dir:
 	if (dir_de) {
 		kunmap(dir_page);
-		page_cache_release(dir_page);
+		put_page(dir_page);
 	}
 out_old:
 	kunmap(old_page);
-	page_cache_release(old_page);
+	put_page(old_page);
 out:
 	return err;
 }
diff --git a/fs/mpage.c b/fs/mpage.c
index 6bd9fd90964e..e7083bfbabb3 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -107,7 +107,7 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
 		 * don't make any buffers if there is only one buffer on
 		 * the page and the page just needs to be set up to date
 		 */
-		if (inode->i_blkbits == PAGE_CACHE_SHIFT && 
+		if (inode->i_blkbits == PAGE_SHIFT &&
 		    buffer_uptodate(bh)) {
 			SetPageUptodate(page);    
 			return;
@@ -145,7 +145,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 {
 	struct inode *inode = page->mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
-	const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
+	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
 	const unsigned blocksize = 1 << blkbits;
 	sector_t block_in_file;
 	sector_t last_block;
@@ -162,7 +162,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 	if (page_has_buffers(page))
 		goto confused;
 
-	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+	block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
 	last_block = block_in_file + nr_pages * blocks_per_page;
 	last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
 	if (last_block > last_block_in_file)
@@ -249,7 +249,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 	}
 
 	if (first_hole != blocks_per_page) {
-		zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE);
+		zero_user_segment(page, first_hole << blkbits, PAGE_SIZE);
 		if (first_hole == 0) {
 			SetPageUptodate(page);
 			unlock_page(page);
@@ -380,7 +380,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 					&first_logical_block,
 					get_block, gfp);
 		}
-		page_cache_release(page);
+		put_page(page);
 	}
 	BUG_ON(!list_empty(pages));
 	if (bio)
@@ -472,7 +472,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
 	struct inode *inode = page->mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
 	unsigned long end_index;
-	const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
+	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
 	sector_t last_block;
 	sector_t block_in_file;
 	sector_t blocks[MAX_BUF_PER_PAGE];
@@ -542,7 +542,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
 	 * The page has no buffers: map it to disk
 	 */
 	BUG_ON(!PageUptodate(page));
-	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+	block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
 	last_block = (i_size - 1) >> blkbits;
 	map_bh.b_page = page;
 	for (page_block = 0; page_block < blocks_per_page; ) {
@@ -574,7 +574,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
 	first_unmapped = page_block;
 
 page_is_mapped:
-	end_index = i_size >> PAGE_CACHE_SHIFT;
+	end_index = i_size >> PAGE_SHIFT;
 	if (page->index >= end_index) {
 		/*
 		 * The page straddles i_size.  It must be zeroed out on each
@@ -584,11 +584,11 @@ page_is_mapped:
 		 * is zeroed when mapped, and writes to that region are not
 		 * written out to the file."
 		 */
-		unsigned offset = i_size & (PAGE_CACHE_SIZE - 1);
+		unsigned offset = i_size & (PAGE_SIZE - 1);
 
 		if (page->index > end_index || !offset)
 			goto confused;
-		zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+		zero_user_segment(page, offset, PAGE_SIZE);
 	}
 
 	/*
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index b7f8eaeea5d8..bfdad003ee56 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -510,7 +510,7 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx)
 			kunmap(ctl.page);
 			SetPageUptodate(ctl.page);
 			unlock_page(ctl.page);
-			page_cache_release(ctl.page);
+			put_page(ctl.page);
 			ctl.page = NULL;
 		}
 		ctl.idx  = 0;
@@ -520,7 +520,7 @@ invalid_cache:
 	if (ctl.page) {
 		kunmap(ctl.page);
 		unlock_page(ctl.page);
-		page_cache_release(ctl.page);
+		put_page(ctl.page);
 		ctl.page = NULL;
 	}
 	ctl.cache = cache;
@@ -554,14 +554,14 @@ finished:
 		kunmap(ctl.page);
 		SetPageUptodate(ctl.page);
 		unlock_page(ctl.page);
-		page_cache_release(ctl.page);
+		put_page(ctl.page);
 	}
 	if (page) {
 		cache->head = ctl.head;
 		kunmap(page);
 		SetPageUptodate(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 out:
 	return result;
@@ -649,7 +649,7 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
 			kunmap(ctl.page);
 			SetPageUptodate(ctl.page);
 			unlock_page(ctl.page);
-			page_cache_release(ctl.page);
+			put_page(ctl.page);
 		}
 		ctl.cache = NULL;
 		ctl.idx  -= NCP_DIRCACHE_SIZE;
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 5233fbc1747a..17cfb743b5bf 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -191,7 +191,7 @@ struct ncp_cache_head {
 	int		eof;
 };
 
-#define NCP_DIRCACHE_SIZE	((int)(PAGE_CACHE_SIZE/sizeof(struct dentry *)))
+#define NCP_DIRCACHE_SIZE	((int)(PAGE_SIZE/sizeof(struct dentry *)))
 union ncp_dir_cache {
 	struct ncp_cache_head	head;
 	struct dentry		*dentry[NCP_DIRCACHE_SIZE];
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 02e4d87d2ed3..17a42e4eb872 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -231,7 +231,7 @@ bl_read_pagelist(struct nfs_pgio_header *header)
 	size_t bytes_left = header->args.count;
 	unsigned int pg_offset = header->args.pgbase, pg_len;
 	struct page **pages = header->args.pages;
-	int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
+	int pg_index = header->args.pgbase >> PAGE_SHIFT;
 	const bool is_dio = (header->dreq != NULL);
 	struct blk_plug plug;
 	int i;
@@ -263,13 +263,13 @@ bl_read_pagelist(struct nfs_pgio_header *header)
 		}
 
 		if (is_dio) {
-			if (pg_offset + bytes_left > PAGE_CACHE_SIZE)
-				pg_len = PAGE_CACHE_SIZE - pg_offset;
+			if (pg_offset + bytes_left > PAGE_SIZE)
+				pg_len = PAGE_SIZE - pg_offset;
 			else
 				pg_len = bytes_left;
 		} else {
 			BUG_ON(pg_offset != 0);
-			pg_len = PAGE_CACHE_SIZE;
+			pg_len = PAGE_SIZE;
 		}
 
 		if (is_hole(&be)) {
@@ -339,9 +339,9 @@ static void bl_write_cleanup(struct work_struct *work)
 
 	if (likely(!hdr->pnfs_error)) {
 		struct pnfs_block_layout *bl = BLK_LSEG2EXT(hdr->lseg);
-		u64 start = hdr->args.offset & (loff_t)PAGE_CACHE_MASK;
+		u64 start = hdr->args.offset & (loff_t)PAGE_MASK;
 		u64 end = (hdr->args.offset + hdr->args.count +
-			PAGE_CACHE_SIZE - 1) & (loff_t)PAGE_CACHE_MASK;
+			PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
 
 		ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
 					(end - start) >> SECTOR_SHIFT);
@@ -373,7 +373,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
 	loff_t offset = header->args.offset;
 	size_t count = header->args.count;
 	struct page **pages = header->args.pages;
-	int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
+	int pg_index = header->args.pgbase >> PAGE_SHIFT;
 	unsigned int pg_len;
 	struct blk_plug plug;
 	int i;
@@ -392,7 +392,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
 	blk_start_plug(&plug);
 
 	/* we always write out the whole page */
-	offset = offset & (loff_t)PAGE_CACHE_MASK;
+	offset = offset & (loff_t)PAGE_MASK;
 	isect = offset >> SECTOR_SHIFT;
 
 	for (i = pg_index; i < header->page_array.npages; i++) {
@@ -408,7 +408,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
 			extent_length = be.be_length - (isect - be.be_f_offset);
 		}
 
-		pg_len = PAGE_CACHE_SIZE;
+		pg_len = PAGE_SIZE;
 		bio = do_add_page_to_bio(bio, header->page_array.npages - i,
 					 WRITE, isect, pages[i], &map, &be,
 					 bl_end_io_write, par,
@@ -820,7 +820,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
 	pgoff_t end;
 
 	/* Optimize common case that writes from 0 to end of file */
-	end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
+	end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
 	if (end != inode->i_mapping->nrpages) {
 		rcu_read_lock();
 		end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
@@ -828,9 +828,9 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
 	}
 
 	if (!end)
-		return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT);
+		return i_size_read(inode) - (idx << PAGE_SHIFT);
 	else
-		return (end - idx) << PAGE_CACHE_SHIFT;
+		return (end - idx) << PAGE_SHIFT;
 }
 
 static void
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index bc21205309e0..18e6fd0b9506 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -40,8 +40,8 @@
 #include "../pnfs.h"
 #include "../netns.h"
 
-#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
-#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
+#define PAGE_CACHE_SECTORS (PAGE_SIZE >> SECTOR_SHIFT)
+#define PAGE_CACHE_SECTOR_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
 #define SECTOR_SIZE (1 << SECTOR_SHIFT)
 
 struct pnfs_block_dev;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index d6d5d2a48e83..0c96528db94a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -736,7 +736,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
 		server->rsize = max_rpc_payload;
 	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
 		server->rsize = NFS_MAX_FILE_IO_SIZE;
-	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	server->rpages = (server->rsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
 	server->backing_dev_info.name = "nfs";
 	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
@@ -745,13 +745,13 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
 		server->wsize = max_rpc_payload;
 	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
 		server->wsize = NFS_MAX_FILE_IO_SIZE;
-	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	server->wpages = (server->wsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
 	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
 
 	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
-	if (server->dtsize > PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES)
-		server->dtsize = PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES;
+	if (server->dtsize > PAGE_SIZE * NFS_MAX_READDIR_PAGES)
+		server->dtsize = PAGE_SIZE * NFS_MAX_READDIR_PAGES;
 	if (server->dtsize > server->rsize)
 		server->dtsize = server->rsize;
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4bfa7d8bcade..adef506c5786 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -707,7 +707,7 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
 {
 	if (!desc->page->mapping)
 		nfs_readdir_clear_array(desc->page);
-	page_cache_release(desc->page);
+	put_page(desc->page);
 	desc->page = NULL;
 }
 
@@ -1923,7 +1923,7 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 		 * add_to_page_cache_lru() grabs an extra page refcount.
 		 * Drop it here to avoid leaking this page later.
 		 */
-		page_cache_release(page);
+		put_page(page);
 	} else
 		__free_page(page);
 
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 7a0cfd3266e5..c93826e4a8c6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -269,7 +269,7 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
 {
 	unsigned int i;
 	for (i = 0; i < npages; i++)
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 }
 
 void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
@@ -1003,7 +1003,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 		      iov_iter_count(iter));
 
 	pos = iocb->ki_pos;
-	end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT;
+	end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT;
 
 	inode_lock(inode);
 
@@ -1013,7 +1013,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 
 	if (mapping->nrpages) {
 		result = invalidate_inode_pages2_range(mapping,
-					pos >> PAGE_CACHE_SHIFT, end);
+					pos >> PAGE_SHIFT, end);
 		if (result)
 			goto out_unlock;
 	}
@@ -1042,7 +1042,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 
 	if (mapping->nrpages) {
 		invalidate_inode_pages2_range(mapping,
-					      pos >> PAGE_CACHE_SHIFT, end);
+					      pos >> PAGE_SHIFT, end);
 	}
 
 	inode_unlock(inode);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 89bf093d342a..be01095b97ae 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -320,7 +320,7 @@ static int nfs_want_read_modify_write(struct file *file, struct page *page,
 			loff_t pos, unsigned len)
 {
 	unsigned int pglen = nfs_page_length(page);
-	unsigned int offset = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned int offset = pos & (PAGE_SIZE - 1);
 	unsigned int end = offset + len;
 
 	if (pnfs_ld_read_whole_page(file->f_mapping->host)) {
@@ -351,7 +351,7 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
 			struct page **pagep, void **fsdata)
 {
 	int ret;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	struct page *page;
 	int once_thru = 0;
 
@@ -380,12 +380,12 @@ start:
 	ret = nfs_flush_incompatible(file, page);
 	if (ret) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	} else if (!once_thru &&
 		   nfs_want_read_modify_write(file, page, pos, len)) {
 		once_thru = 1;
 		ret = nfs_readpage(file, page);
-		page_cache_release(page);
+		put_page(page);
 		if (!ret)
 			goto start;
 	}
@@ -396,7 +396,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata)
 {
-	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned offset = pos & (PAGE_SIZE - 1);
 	struct nfs_open_context *ctx = nfs_file_open_context(file);
 	int status;
 
@@ -413,20 +413,20 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
 
 		if (pglen == 0) {
 			zero_user_segments(page, 0, offset,
-					end, PAGE_CACHE_SIZE);
+					end, PAGE_SIZE);
 			SetPageUptodate(page);
 		} else if (end >= pglen) {
-			zero_user_segment(page, end, PAGE_CACHE_SIZE);
+			zero_user_segment(page, end, PAGE_SIZE);
 			if (offset == 0)
 				SetPageUptodate(page);
 		} else
-			zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
+			zero_user_segment(page, pglen, PAGE_SIZE);
 	}
 
 	status = nfs_updatepage(file, page, offset, copied);
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (status < 0)
 		return status;
@@ -454,7 +454,7 @@ static void nfs_invalidate_page(struct page *page, unsigned int offset,
 	dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %u, %u)\n",
 		 page, offset, length);
 
-	if (offset != 0 || length < PAGE_CACHE_SIZE)
+	if (offset != 0 || length < PAGE_SIZE)
 		return;
 	/* Cancel any unstarted writes on this page */
 	nfs_wb_page_cancel(page_file_mapping(page)->host, page);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 565f8135ae1f..f1d1d2c472e9 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -638,11 +638,11 @@ unsigned int nfs_page_length(struct page *page)
 
 	if (i_size > 0) {
 		pgoff_t page_index = page_file_index(page);
-		pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+		pgoff_t end_index = (i_size - 1) >> PAGE_SHIFT;
 		if (page_index < end_index)
-			return PAGE_CACHE_SIZE;
+			return PAGE_SIZE;
 		if (page_index == end_index)
-			return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
+			return ((i_size - 1) & ~PAGE_MASK) + 1;
 	}
 	return 0;
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e4441216804..88474a4fc669 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5001,7 +5001,7 @@ static int decode_space_limit(struct xdr_stream *xdr,
 		blocksize = be32_to_cpup(p);
 		maxsize = (uint64_t)nblocks * (uint64_t)blocksize;
 	}
-	maxsize >>= PAGE_CACHE_SHIFT;
+	maxsize >>= PAGE_SHIFT;
 	*pagemod_limit = min_t(u64, maxsize, ULONG_MAX);
 	return 0;
 out_overflow:
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 9aebffb40505..049c1b1f2932 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -486,7 +486,7 @@ static void __r4w_put_page(void *priv, struct page *page)
 	dprintk("%s: index=0x%lx\n", __func__,
 		(page == ZERO_PAGE(0)) ? -1UL : page->index);
 	if (ZERO_PAGE(0) != page)
-		page_cache_release(page);
+		put_page(page);
 	return;
 }
 
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 8ce4f61cbaa5..1f6db4231057 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -342,7 +342,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
 	 * update_nfs_request below if the region is not locked. */
 	req->wb_page    = page;
 	req->wb_index	= page_file_index(page);
-	page_cache_get(page);
+	get_page(page);
 	req->wb_offset  = offset;
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
@@ -392,7 +392,7 @@ static void nfs_clear_request(struct nfs_page *req)
 	struct nfs_lock_context *l_ctx = req->wb_lock_context;
 
 	if (page != NULL) {
-		page_cache_release(page);
+		put_page(page);
 		req->wb_page = NULL;
 	}
 	if (l_ctx != NULL) {
@@ -904,7 +904,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
 				return false;
 		} else {
 			if (req->wb_pgbase != 0 ||
-			    prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
+			    prev->wb_pgbase + prev->wb_bytes != PAGE_SIZE)
 				return false;
 		}
 	}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 2fa483e6dbe2..89a5ef4df08a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -841,7 +841,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 
 		i_size = i_size_read(ino);
 
-		lgp->args.minlength = PAGE_CACHE_SIZE;
+		lgp->args.minlength = PAGE_SIZE;
 		if (lgp->args.minlength > range->length)
 			lgp->args.minlength = range->length;
 		if (range->iomode == IOMODE_READ) {
@@ -1618,13 +1618,13 @@ lookup_again:
 		spin_unlock(&clp->cl_lock);
 	}
 
-	pg_offset = arg.offset & ~PAGE_CACHE_MASK;
+	pg_offset = arg.offset & ~PAGE_MASK;
 	if (pg_offset) {
 		arg.offset -= pg_offset;
 		arg.length += pg_offset;
 	}
 	if (arg.length != NFS4_MAX_UINT64)
-		arg.length = PAGE_CACHE_ALIGN(arg.length);
+		arg.length = PAGE_ALIGN(arg.length);
 
 	lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
 	atomic_dec(&lo->plh_outstanding);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index eb31e23e7def..6776d7a7839e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -46,7 +46,7 @@ static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
 static
 int nfs_return_empty_page(struct page *page)
 {
-	zero_user(page, 0, PAGE_CACHE_SIZE);
+	zero_user(page, 0, PAGE_SIZE);
 	SetPageUptodate(page);
 	unlock_page(page);
 	return 0;
@@ -118,8 +118,8 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 		unlock_page(page);
 		return PTR_ERR(new);
 	}
-	if (len < PAGE_CACHE_SIZE)
-		zero_user_segment(page, len, PAGE_CACHE_SIZE);
+	if (len < PAGE_SIZE)
+		zero_user_segment(page, len, PAGE_SIZE);
 
 	nfs_pageio_init_read(&pgio, inode, false,
 			     &nfs_async_read_completion_ops);
@@ -295,7 +295,7 @@ int nfs_readpage(struct file *file, struct page *page)
 	int		error;
 
 	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
-		page, PAGE_CACHE_SIZE, page_file_index(page));
+		page, PAGE_SIZE, page_file_index(page));
 	nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
 	nfs_add_stats(inode, NFSIOS_READPAGES, 1);
 
@@ -361,8 +361,8 @@ readpage_async_filler(void *data, struct page *page)
 	if (IS_ERR(new))
 		goto out_error;
 
-	if (len < PAGE_CACHE_SIZE)
-		zero_user_segment(page, len, PAGE_CACHE_SIZE);
+	if (len < PAGE_SIZE)
+		zero_user_segment(page, len, PAGE_SIZE);
 	if (!nfs_pageio_add_request(desc->pgio, new)) {
 		nfs_list_remove_request(new);
 		nfs_readpage_release(new);
@@ -424,8 +424,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 
 	pgm = &pgio.pg_mirrors[0];
 	NFS_I(inode)->read_io += pgm->pg_bytes_written;
-	npages = (pgm->pg_bytes_written + PAGE_CACHE_SIZE - 1) >>
-		 PAGE_CACHE_SHIFT;
+	npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >>
+		 PAGE_SHIFT;
 	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
 read_complete:
 	put_nfs_open_context(desc.ctx);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5754835a2886..5f4fd53e5764 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -150,7 +150,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
 
 	spin_lock(&inode->i_lock);
 	i_size = i_size_read(inode);
-	end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+	end_index = (i_size - 1) >> PAGE_SHIFT;
 	if (i_size > 0 && page_file_index(page) < end_index)
 		goto out;
 	end = page_file_offset(page) + ((loff_t)offset+count);
@@ -1942,7 +1942,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
 {
 	loff_t range_start = page_file_offset(page);
-	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_ALL,
 		.nr_to_write = 0,
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 27f75bcbeb30..a9fb3636c142 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -458,7 +458,7 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
 	struct buffer_head *pbh;
 	__u64 key;
 
-	key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT -
+	key = page_index(bh->b_page) << (PAGE_SHIFT -
 					 bmap->b_inode->i_blkbits);
 	for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
 		key++;
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index a35ae35e6932..e0c9daf9aa22 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -62,7 +62,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
 	set_buffer_uptodate(bh);
 
 	unlock_page(bh->b_page);
-	page_cache_release(bh->b_page);
+	put_page(bh->b_page);
 	return bh;
 }
 
@@ -128,7 +128,7 @@ found:
 
 out_locked:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 
@@ -146,7 +146,7 @@ void nilfs_btnode_delete(struct buffer_head *bh)
 	pgoff_t index = page_index(page);
 	int still_dirty;
 
-	page_cache_get(page);
+	get_page(page);
 	lock_page(page);
 	wait_on_page_writeback(page);
 
@@ -154,7 +154,7 @@ void nilfs_btnode_delete(struct buffer_head *bh)
 	still_dirty = PageDirty(page);
 	mapping = page->mapping;
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (!still_dirty && mapping)
 		invalidate_inode_pages2_range(mapping, index, index);
@@ -181,7 +181,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
 	obh = ctxt->bh;
 	ctxt->newbh = NULL;
 
-	if (inode->i_blkbits == PAGE_CACHE_SHIFT) {
+	if (inode->i_blkbits == PAGE_SHIFT) {
 		lock_page(obh->b_page);
 		/*
 		 * We cannot call radix_tree_preload for the kernels older
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 6b8b92b19cec..e08f064e4bd7 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -58,7 +58,7 @@ static inline unsigned nilfs_chunk_size(struct inode *inode)
 static inline void nilfs_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 /*
@@ -69,9 +69,9 @@ static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr)
 {
 	unsigned last_byte = inode->i_size;
 
-	last_byte -= page_nr << PAGE_CACHE_SHIFT;
-	if (last_byte > PAGE_CACHE_SIZE)
-		last_byte = PAGE_CACHE_SIZE;
+	last_byte -= page_nr << PAGE_SHIFT;
+	if (last_byte > PAGE_SIZE)
+		last_byte = PAGE_SIZE;
 	return last_byte;
 }
 
@@ -109,12 +109,12 @@ static void nilfs_check_page(struct page *page)
 	unsigned chunk_size = nilfs_chunk_size(dir);
 	char *kaddr = page_address(page);
 	unsigned offs, rec_len;
-	unsigned limit = PAGE_CACHE_SIZE;
+	unsigned limit = PAGE_SIZE;
 	struct nilfs_dir_entry *p;
 	char *error;
 
-	if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
-		limit = dir->i_size & ~PAGE_CACHE_MASK;
+	if ((dir->i_size >> PAGE_SHIFT) == page->index) {
+		limit = dir->i_size & ~PAGE_MASK;
 		if (limit & (chunk_size - 1))
 			goto Ebadsize;
 		if (!limit)
@@ -161,7 +161,7 @@ Espan:
 bad_entry:
 	nilfs_error(sb, "nilfs_check_page", "bad entry in directory #%lu: %s - "
 		    "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
-		    dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+		    dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
 		    (unsigned long) le64_to_cpu(p->inode),
 		    rec_len, p->name_len);
 	goto fail;
@@ -170,7 +170,7 @@ Eend:
 	nilfs_error(sb, "nilfs_check_page",
 		    "entry in directory #%lu spans the page boundary"
 		    "offset=%lu, inode=%lu",
-		    dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
+		    dir->i_ino, (page->index<<PAGE_SHIFT)+offs,
 		    (unsigned long) le64_to_cpu(p->inode));
 fail:
 	SetPageChecked(page);
@@ -256,8 +256,8 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx)
 	loff_t pos = ctx->pos;
 	struct inode *inode = file_inode(file);
 	struct super_block *sb = inode->i_sb;
-	unsigned int offset = pos & ~PAGE_CACHE_MASK;
-	unsigned long n = pos >> PAGE_CACHE_SHIFT;
+	unsigned int offset = pos & ~PAGE_MASK;
+	unsigned long n = pos >> PAGE_SHIFT;
 	unsigned long npages = dir_pages(inode);
 /*	unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */
 
@@ -272,7 +272,7 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx)
 		if (IS_ERR(page)) {
 			nilfs_error(sb, __func__, "bad page in #%lu",
 				    inode->i_ino);
-			ctx->pos += PAGE_CACHE_SIZE - offset;
+			ctx->pos += PAGE_SIZE - offset;
 			return -EIO;
 		}
 		kaddr = page_address(page);
@@ -361,7 +361,7 @@ nilfs_find_entry(struct inode *dir, const struct qstr *qstr,
 		if (++n >= npages)
 			n = 0;
 		/* next page is past the blocks we've got */
-		if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
+		if (unlikely(n > (dir->i_blocks >> (PAGE_SHIFT - 9)))) {
 			nilfs_error(dir->i_sb, __func__,
 			       "dir %lu size %lld exceeds block count %llu",
 			       dir->i_ino, dir->i_size,
@@ -401,7 +401,7 @@ ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
 	if (de) {
 		res = le64_to_cpu(de->inode);
 		kunmap(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return res;
 }
@@ -460,7 +460,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
 		kaddr = page_address(page);
 		dir_end = kaddr + nilfs_last_byte(dir, n);
 		de = (struct nilfs_dir_entry *)kaddr;
-		kaddr += PAGE_CACHE_SIZE - reclen;
+		kaddr += PAGE_SIZE - reclen;
 		while ((char *)de <= kaddr) {
 			if ((char *)de == dir_end) {
 				/* We hit i_size */
@@ -603,7 +603,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
 	kunmap_atomic(kaddr);
 	nilfs_commit_chunk(page, mapping, 0, chunk_size);
 fail:
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 748ca238915a..0224b7826ace 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -115,7 +115,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
 
  failed:
 	unlock_page(bh->b_page);
-	page_cache_release(bh->b_page);
+	put_page(bh->b_page);
 	return err;
 }
 
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 21a1e2e0d92f..534631358b13 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -249,7 +249,7 @@ static int nilfs_set_page_dirty(struct page *page)
 		if (nr_dirty)
 			nilfs_set_file_dirty(inode, nr_dirty);
 	} else if (ret) {
-		unsigned nr_dirty = 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits);
 
 		nilfs_set_file_dirty(inode, nr_dirty);
 	}
@@ -291,7 +291,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
 			   struct page *page, void *fsdata)
 {
 	struct inode *inode = mapping->host;
-	unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned start = pos & (PAGE_SIZE - 1);
 	unsigned nr_dirty;
 	int err;
 
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 1125f40233ff..f6982b9153d5 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -110,7 +110,7 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
 
  failed_bh:
 	unlock_page(bh->b_page);
-	page_cache_release(bh->b_page);
+	put_page(bh->b_page);
 	brelse(bh);
 
  failed_unlock:
@@ -170,7 +170,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
 
  failed_bh:
 	unlock_page(bh->b_page);
-	page_cache_release(bh->b_page);
+	put_page(bh->b_page);
 	brelse(bh);
  failed:
 	return ret;
@@ -363,7 +363,7 @@ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
 int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
 {
 	pgoff_t index = (pgoff_t)block >>
-		(PAGE_CACHE_SHIFT - inode->i_blkbits);
+		(PAGE_SHIFT - inode->i_blkbits);
 	struct page *page;
 	unsigned long first_block;
 	int ret = 0;
@@ -376,7 +376,7 @@ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
 	wait_on_page_writeback(page);
 
 	first_block = (unsigned long)index <<
-		(PAGE_CACHE_SHIFT - inode->i_blkbits);
+		(PAGE_SHIFT - inode->i_blkbits);
 	if (page_has_buffers(page)) {
 		struct buffer_head *bh;
 
@@ -385,7 +385,7 @@ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
 	}
 	still_dirty = PageDirty(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (still_dirty ||
 	    invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0)
@@ -578,7 +578,7 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
 	}
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return 0;
 }
 
@@ -597,7 +597,7 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh)
 			bh_frozen = nilfs_page_get_nth_block(page, n);
 		}
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return bh_frozen;
 }
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 7ccdb961eea9..151bc19d47c0 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -431,11 +431,11 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 out_dir:
 	if (dir_de) {
 		kunmap(dir_page);
-		page_cache_release(dir_page);
+		put_page(dir_page);
 	}
 out_old:
 	kunmap(old_page);
-	page_cache_release(old_page);
+	put_page(old_page);
 out:
 	nilfs_transaction_abort(old_dir->i_sb);
 	return err;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index c20df77eff99..489391561cda 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -50,7 +50,7 @@ __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, 1 << blkbits, b_state);
 
-	first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits);
+	first_block = (unsigned long)index << (PAGE_SHIFT - blkbits);
 	bh = nilfs_page_get_nth_block(page, block - first_block);
 
 	touch_buffer(bh);
@@ -64,7 +64,7 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
 				      unsigned long b_state)
 {
 	int blkbits = inode->i_blkbits;
-	pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
+	pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits);
 	struct page *page;
 	struct buffer_head *bh;
 
@@ -75,7 +75,7 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
 	bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
 	if (unlikely(!bh)) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		return NULL;
 	}
 	return bh;
@@ -288,7 +288,7 @@ repeat:
 		__set_page_dirty_nobuffers(dpage);
 
 		unlock_page(dpage);
-		page_cache_release(dpage);
+		put_page(dpage);
 		unlock_page(page);
 	}
 	pagevec_release(&pvec);
@@ -333,7 +333,7 @@ repeat:
 			WARN_ON(PageDirty(dpage));
 			nilfs_copy_page(dpage, page, 0);
 			unlock_page(dpage);
-			page_cache_release(dpage);
+			put_page(dpage);
 		} else {
 			struct page *page2;
 
@@ -350,7 +350,7 @@ repeat:
 			if (unlikely(err < 0)) {
 				WARN_ON(err == -EEXIST);
 				page->mapping = NULL;
-				page_cache_release(page); /* for cache */
+				put_page(page); /* for cache */
 			} else {
 				page->mapping = dmap;
 				dmap->nrpages++;
@@ -523,8 +523,8 @@ unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
 	if (inode->i_mapping->nrpages == 0)
 		return 0;
 
-	index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-	nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	index = start_blk >> (PAGE_SHIFT - inode->i_blkbits);
+	nblocks_in_page = 1U << (PAGE_SHIFT - inode->i_blkbits);
 
 	pagevec_init(&pvec, 0);
 
@@ -537,7 +537,7 @@ repeat:
 	if (length > 0 && pvec.pages[0]->index > index)
 		goto out;
 
-	b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	b = pvec.pages[0]->index << (PAGE_SHIFT - inode->i_blkbits);
 	i = 0;
 	do {
 		page = pvec.pages[i];
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 9b4f205d1173..5afa77fadc11 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -544,14 +544,14 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
 				blocksize, page, NULL);
 
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
 		(*nr_salvaged_blocks)++;
 		goto next;
 
  failed_page:
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
  failed_inode:
 		printk(KERN_WARNING
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 3b65adaae7e4..4317f72568e6 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2070,7 +2070,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
 			goto failed_to_write;
 
 		if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE ||
-		    nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) {
+		    nilfs->ns_blocksize_bits != PAGE_SHIFT) {
 			/*
 			 * At this point, we avoid double buffering
 			 * for blocksize < pagesize because page dirty
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 7521e11db728..a474e7ef92ea 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -74,7 +74,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 
 		set_buffer_uptodate(bh);
 
-		file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
+		file_ofs = ((s64)page->index << PAGE_SHIFT) +
 				bh_offset(bh);
 		read_lock_irqsave(&ni->size_lock, flags);
 		init_size = ni->initialized_size;
@@ -142,7 +142,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		u32 rec_size;
 
 		rec_size = ni->itype.index.block_size;
-		recs = PAGE_CACHE_SIZE / rec_size;
+		recs = PAGE_SIZE / rec_size;
 		/* Should have been verified before we got here... */
 		BUG_ON(!recs);
 		local_irq_save(flags);
@@ -229,7 +229,7 @@ static int ntfs_read_block(struct page *page)
 	 * fully truncated, truncate will throw it away as soon as we unlock
 	 * it so no need to worry what we do with it.
 	 */
-	iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
+	iblock = (s64)page->index << (PAGE_SHIFT - blocksize_bits);
 	read_lock_irqsave(&ni->size_lock, flags);
 	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
 	init_size = ni->initialized_size;
@@ -412,9 +412,9 @@ retry_readpage:
 	vi = page->mapping->host;
 	i_size = i_size_read(vi);
 	/* Is the page fully outside i_size? (truncate in progress) */
-	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
-			PAGE_CACHE_SHIFT)) {
-		zero_user(page, 0, PAGE_CACHE_SIZE);
+	if (unlikely(page->index >= (i_size + PAGE_SIZE - 1) >>
+			PAGE_SHIFT)) {
+		zero_user(page, 0, PAGE_SIZE);
 		ntfs_debug("Read outside i_size - truncated?");
 		goto done;
 	}
@@ -463,7 +463,7 @@ retry_readpage:
 	 * ok to ignore the compressed flag here.
 	 */
 	if (unlikely(page->index > 0)) {
-		zero_user(page, 0, PAGE_CACHE_SIZE);
+		zero_user(page, 0, PAGE_SIZE);
 		goto done;
 	}
 	if (!NInoAttr(ni))
@@ -509,7 +509,7 @@ retry_readpage:
 			le16_to_cpu(ctx->attr->data.resident.value_offset),
 			attr_len);
 	/* Zero the remainder of the page. */
-	memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+	memset(addr + attr_len, 0, PAGE_SIZE - attr_len);
 	flush_dcache_page(page);
 	kunmap_atomic(addr);
 put_unm_err_out:
@@ -599,7 +599,7 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
 	/* NOTE: Different naming scheme to ntfs_read_block()! */
 
 	/* The first block in the page. */
-	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
+	block = (s64)page->index << (PAGE_SHIFT - blocksize_bits);
 
 	read_lock_irqsave(&ni->size_lock, flags);
 	i_size = i_size_read(vi);
@@ -925,7 +925,7 @@ static int ntfs_write_mst_block(struct page *page,
 	ntfs_volume *vol = ni->vol;
 	u8 *kaddr;
 	unsigned int rec_size = ni->itype.index.block_size;
-	ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
+	ntfs_inode *locked_nis[PAGE_SIZE / rec_size];
 	struct buffer_head *bh, *head, *tbh, *rec_start_bh;
 	struct buffer_head *bhs[MAX_BUF_PER_PAGE];
 	runlist_element *rl;
@@ -949,7 +949,7 @@ static int ntfs_write_mst_block(struct page *page,
 			(NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
 	bh_size = vol->sb->s_blocksize;
 	bh_size_bits = vol->sb->s_blocksize_bits;
-	max_bhs = PAGE_CACHE_SIZE / bh_size;
+	max_bhs = PAGE_SIZE / bh_size;
 	BUG_ON(!max_bhs);
 	BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
 
@@ -961,13 +961,13 @@ static int ntfs_write_mst_block(struct page *page,
 	BUG_ON(!bh);
 
 	rec_size_bits = ni->itype.index.block_size_bits;
-	BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
+	BUG_ON(!(PAGE_SIZE >> rec_size_bits));
 	bhs_per_rec = rec_size >> bh_size_bits;
 	BUG_ON(!bhs_per_rec);
 
 	/* The first block in the page. */
 	rec_block = block = (sector_t)page->index <<
-			(PAGE_CACHE_SHIFT - bh_size_bits);
+			(PAGE_SHIFT - bh_size_bits);
 
 	/* The first out of bounds block for the data size. */
 	dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
@@ -1133,7 +1133,7 @@ lock_retry_remap:
 			unsigned long mft_no;
 
 			/* Get the mft record number. */
-			mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
+			mft_no = (((s64)page->index << PAGE_SHIFT) + ofs)
 					>> rec_size_bits;
 			/* Check whether to write this mft record. */
 			tni = NULL;
@@ -1249,7 +1249,7 @@ do_mirror:
 				continue;
 			ofs = bh_offset(tbh);
 			/* Get the mft record number. */
-			mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
+			mft_no = (((s64)page->index << PAGE_SHIFT) + ofs)
 					>> rec_size_bits;
 			if (mft_no < vol->mftmirr_size)
 				ntfs_sync_mft_mirror(vol, mft_no,
@@ -1300,7 +1300,7 @@ done:
 		 * Set page error if there is only one ntfs record in the page.
 		 * Otherwise we would loose per-record granularity.
 		 */
-		if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
+		if (ni->itype.index.block_size == PAGE_SIZE)
 			SetPageError(page);
 		NVolSetErrors(vol);
 	}
@@ -1308,7 +1308,7 @@ done:
 		ntfs_debug("Page still contains one or more dirty ntfs "
 				"records.  Redirtying the page starting at "
 				"record 0x%lx.", page->index <<
-				(PAGE_CACHE_SHIFT - rec_size_bits));
+				(PAGE_SHIFT - rec_size_bits));
 		redirty_page_for_writepage(wbc, page);
 		unlock_page(page);
 	} else {
@@ -1365,13 +1365,13 @@ retry_writepage:
 	BUG_ON(!PageLocked(page));
 	i_size = i_size_read(vi);
 	/* Is the page fully outside i_size? (truncate in progress) */
-	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
-			PAGE_CACHE_SHIFT)) {
+	if (unlikely(page->index >= (i_size + PAGE_SIZE - 1) >>
+			PAGE_SHIFT)) {
 		/*
 		 * The page may have dirty, unmapped buffers.  Make them
 		 * freeable here, so the page does not leak.
 		 */
-		block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		block_invalidatepage(page, 0, PAGE_SIZE);
 		unlock_page(page);
 		ntfs_debug("Write outside i_size - truncated?");
 		return 0;
@@ -1414,10 +1414,10 @@ retry_writepage:
 	/* NInoNonResident() == NInoIndexAllocPresent() */
 	if (NInoNonResident(ni)) {
 		/* We have to zero every time due to mmap-at-end-of-file. */
-		if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
+		if (page->index >= (i_size >> PAGE_SHIFT)) {
 			/* The page straddles i_size. */
-			unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
-			zero_user_segment(page, ofs, PAGE_CACHE_SIZE);
+			unsigned int ofs = i_size & ~PAGE_MASK;
+			zero_user_segment(page, ofs, PAGE_SIZE);
 		}
 		/* Handle mst protected attributes. */
 		if (NInoMstProtected(ni))
@@ -1500,7 +1500,7 @@ retry_writepage:
 			le16_to_cpu(ctx->attr->data.resident.value_offset),
 			addr, attr_len);
 	/* Zero out of bounds area in the page cache page. */
-	memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+	memset(addr + attr_len, 0, PAGE_SIZE - attr_len);
 	kunmap_atomic(addr);
 	flush_dcache_page(page);
 	flush_dcache_mft_record_page(ctx->ntfs_ino);
diff --git a/fs/ntfs/aops.h b/fs/ntfs/aops.h
index caecc58f529c..37cd7e45dcbc 100644
--- a/fs/ntfs/aops.h
+++ b/fs/ntfs/aops.h
@@ -40,7 +40,7 @@
 static inline void ntfs_unmap_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 /**
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 250ed5b20c8f..44a39a099b54 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -152,7 +152,7 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn, ntfs_attr_search_ctx *ctx)
 			if (old_ctx.base_ntfs_ino && old_ctx.ntfs_ino !=
 					old_ctx.base_ntfs_ino) {
 				put_this_page = old_ctx.ntfs_ino->page;
-				page_cache_get(put_this_page);
+				get_page(put_this_page);
 			}
 			/*
 			 * Reinitialize the search context so we can lookup the
@@ -275,7 +275,7 @@ retry_map:
 		 * the pieces anyway.
 		 */
 		if (put_this_page)
-			page_cache_release(put_this_page);
+			put_page(put_this_page);
 	}
 	return err;
 }
@@ -1660,7 +1660,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
 		memcpy(kaddr, (u8*)a +
 				le16_to_cpu(a->data.resident.value_offset),
 				attr_size);
-		memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size);
+		memset(kaddr + attr_size, 0, PAGE_SIZE - attr_size);
 		kunmap_atomic(kaddr);
 		flush_dcache_page(page);
 		SetPageUptodate(page);
@@ -1748,7 +1748,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
 	if (page) {
 		set_page_dirty(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	ntfs_debug("Done.");
 	return 0;
@@ -1835,7 +1835,7 @@ rl_err_out:
 		ntfs_free(rl);
 page_err_out:
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	if (err == -EINVAL)
 		err = -EIO;
@@ -2513,17 +2513,17 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 	BUG_ON(NInoEncrypted(ni));
 	mapping = VFS_I(ni)->i_mapping;
 	/* Work out the starting index and page offset. */
-	idx = ofs >> PAGE_CACHE_SHIFT;
-	start_ofs = ofs & ~PAGE_CACHE_MASK;
+	idx = ofs >> PAGE_SHIFT;
+	start_ofs = ofs & ~PAGE_MASK;
 	/* Work out the ending index and page offset. */
 	end = ofs + cnt;
-	end_ofs = end & ~PAGE_CACHE_MASK;
+	end_ofs = end & ~PAGE_MASK;
 	/* If the end is outside the inode size return -ESPIPE. */
 	if (unlikely(end > i_size_read(VFS_I(ni)))) {
 		ntfs_error(vol->sb, "Request exceeds end of attribute.");
 		return -ESPIPE;
 	}
-	end >>= PAGE_CACHE_SHIFT;
+	end >>= PAGE_SHIFT;
 	/* If there is a first partial page, need to do it the slow way. */
 	if (start_ofs) {
 		page = read_mapping_page(mapping, idx, NULL);
@@ -2536,7 +2536,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 		 * If the last page is the same as the first page, need to
 		 * limit the write to the end offset.
 		 */
-		size = PAGE_CACHE_SIZE;
+		size = PAGE_SIZE;
 		if (idx == end)
 			size = end_ofs;
 		kaddr = kmap_atomic(page);
@@ -2544,7 +2544,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 		flush_dcache_page(page);
 		kunmap_atomic(kaddr);
 		set_page_dirty(page);
-		page_cache_release(page);
+		put_page(page);
 		balance_dirty_pages_ratelimited(mapping);
 		cond_resched();
 		if (idx == end)
@@ -2561,7 +2561,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 			return -ENOMEM;
 		}
 		kaddr = kmap_atomic(page);
-		memset(kaddr, val, PAGE_CACHE_SIZE);
+		memset(kaddr, val, PAGE_SIZE);
 		flush_dcache_page(page);
 		kunmap_atomic(kaddr);
 		/*
@@ -2585,7 +2585,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 		set_page_dirty(page);
 		/* Finally unlock and release the page. */
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		balance_dirty_pages_ratelimited(mapping);
 		cond_resched();
 	}
@@ -2602,7 +2602,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 		flush_dcache_page(page);
 		kunmap_atomic(kaddr);
 		set_page_dirty(page);
-		page_cache_release(page);
+		put_page(page);
 		balance_dirty_pages_ratelimited(mapping);
 		cond_resched();
 	}
diff --git a/fs/ntfs/bitmap.c b/fs/ntfs/bitmap.c
index 0809cf876098..ec130c588d2b 100644
--- a/fs/ntfs/bitmap.c
+++ b/fs/ntfs/bitmap.c
@@ -67,8 +67,8 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
 	 * Calculate the indices for the pages containing the first and last
 	 * bits, i.e. @start_bit and @start_bit + @cnt - 1, respectively.
 	 */
-	index = start_bit >> (3 + PAGE_CACHE_SHIFT);
-	end_index = (start_bit + cnt - 1) >> (3 + PAGE_CACHE_SHIFT);
+	index = start_bit >> (3 + PAGE_SHIFT);
+	end_index = (start_bit + cnt - 1) >> (3 + PAGE_SHIFT);
 
 	/* Get the page containing the first bit (@start_bit). */
 	mapping = vi->i_mapping;
@@ -82,7 +82,7 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
 	kaddr = page_address(page);
 
 	/* Set @pos to the position of the byte containing @start_bit. */
-	pos = (start_bit >> 3) & ~PAGE_CACHE_MASK;
+	pos = (start_bit >> 3) & ~PAGE_MASK;
 
 	/* Calculate the position of @start_bit in the first byte. */
 	bit = start_bit & 7;
@@ -108,7 +108,7 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
 	 * Depending on @value, modify all remaining whole bytes in the page up
 	 * to @cnt.
 	 */
-	len = min_t(s64, cnt >> 3, PAGE_CACHE_SIZE - pos);
+	len = min_t(s64, cnt >> 3, PAGE_SIZE - pos);
 	memset(kaddr + pos, value ? 0xff : 0, len);
 	cnt -= len << 3;
 
@@ -132,7 +132,7 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
 		 * Depending on @value, modify all remaining whole bytes in the
 		 * page up to @cnt.
 		 */
-		len = min_t(s64, cnt >> 3, PAGE_CACHE_SIZE);
+		len = min_t(s64, cnt >> 3, PAGE_SIZE);
 		memset(kaddr, value ? 0xff : 0, len);
 		cnt -= len << 3;
 	}
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index f82498c35e78..b6074a56661b 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -104,7 +104,7 @@ static void zero_partial_compressed_page(struct page *page,
 	unsigned int kp_ofs;
 
 	ntfs_debug("Zeroing page region outside initialized size.");
-	if (((s64)page->index << PAGE_CACHE_SHIFT) >= initialized_size) {
+	if (((s64)page->index << PAGE_SHIFT) >= initialized_size) {
 		/*
 		 * FIXME: Using clear_page() will become wrong when we get
 		 * PAGE_CACHE_SIZE != PAGE_SIZE but for now there is no problem.
@@ -112,8 +112,8 @@ static void zero_partial_compressed_page(struct page *page,
 		clear_page(kp);
 		return;
 	}
-	kp_ofs = initialized_size & ~PAGE_CACHE_MASK;
-	memset(kp + kp_ofs, 0, PAGE_CACHE_SIZE - kp_ofs);
+	kp_ofs = initialized_size & ~PAGE_MASK;
+	memset(kp + kp_ofs, 0, PAGE_SIZE - kp_ofs);
 	return;
 }
 
@@ -123,7 +123,7 @@ static void zero_partial_compressed_page(struct page *page,
 static inline void handle_bounds_compressed_page(struct page *page,
 		const loff_t i_size, const s64 initialized_size)
 {
-	if ((page->index >= (initialized_size >> PAGE_CACHE_SHIFT)) &&
+	if ((page->index >= (initialized_size >> PAGE_SHIFT)) &&
 			(initialized_size < i_size))
 		zero_partial_compressed_page(page, initialized_size);
 	return;
@@ -241,7 +241,7 @@ return_error:
 				if (di == xpage)
 					*xpage_done = 1;
 				else
-					page_cache_release(dp);
+					put_page(dp);
 				dest_pages[di] = NULL;
 			}
 		}
@@ -274,7 +274,7 @@ return_error:
 		cb = cb_sb_end;
 
 		/* Advance destination position to next sub-block. */
-		*dest_ofs = (*dest_ofs + NTFS_SB_SIZE) & ~PAGE_CACHE_MASK;
+		*dest_ofs = (*dest_ofs + NTFS_SB_SIZE) & ~PAGE_MASK;
 		if (!*dest_ofs && (++*dest_index > dest_max_index))
 			goto return_overflow;
 		goto do_next_sb;
@@ -301,7 +301,7 @@ return_error:
 
 		/* Advance destination position to next sub-block. */
 		*dest_ofs += NTFS_SB_SIZE;
-		if (!(*dest_ofs &= ~PAGE_CACHE_MASK)) {
+		if (!(*dest_ofs &= ~PAGE_MASK)) {
 finalize_page:
 			/*
 			 * First stage: add current page index to array of
@@ -335,7 +335,7 @@ do_next_tag:
 			*dest_ofs += nr_bytes;
 		}
 		/* We have finished the current sub-block. */
-		if (!(*dest_ofs &= ~PAGE_CACHE_MASK))
+		if (!(*dest_ofs &= ~PAGE_MASK))
 			goto finalize_page;
 		goto do_next_sb;
 	}
@@ -498,13 +498,13 @@ int ntfs_read_compressed_block(struct page *page)
 	VCN vcn;
 	LCN lcn;
 	/* The first wanted vcn (minimum alignment is PAGE_CACHE_SIZE). */
-	VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >>
+	VCN start_vcn = (((s64)index << PAGE_SHIFT) & ~cb_size_mask) >>
 			vol->cluster_size_bits;
 	/*
 	 * The first vcn after the last wanted vcn (minimum alignment is again
 	 * PAGE_CACHE_SIZE.
 	 */
-	VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1)
+	VCN end_vcn = ((((s64)(index + 1UL) << PAGE_SHIFT) + cb_size - 1)
 			& ~cb_size_mask) >> vol->cluster_size_bits;
 	/* Number of compression blocks (cbs) in the wanted vcn range. */
 	unsigned int nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits
@@ -515,7 +515,7 @@ int ntfs_read_compressed_block(struct page *page)
 	 * guarantees of start_vcn and end_vcn, no need to round up here.
 	 */
 	unsigned int nr_pages = (end_vcn - start_vcn) <<
-			vol->cluster_size_bits >> PAGE_CACHE_SHIFT;
+			vol->cluster_size_bits >> PAGE_SHIFT;
 	unsigned int xpage, max_page, cur_page, cur_ofs, i;
 	unsigned int cb_clusters, cb_max_ofs;
 	int block, max_block, cb_max_page, bhs_size, nr_bhs, err = 0;
@@ -549,7 +549,7 @@ int ntfs_read_compressed_block(struct page *page)
 	 * We have already been given one page, this is the one we must do.
 	 * Once again, the alignment guarantees keep it simple.
 	 */
-	offset = start_vcn << vol->cluster_size_bits >> PAGE_CACHE_SHIFT;
+	offset = start_vcn << vol->cluster_size_bits >> PAGE_SHIFT;
 	xpage = index - offset;
 	pages[xpage] = page;
 	/*
@@ -560,13 +560,13 @@ int ntfs_read_compressed_block(struct page *page)
 	i_size = i_size_read(VFS_I(ni));
 	initialized_size = ni->initialized_size;
 	read_unlock_irqrestore(&ni->size_lock, flags);
-	max_page = ((i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
+	max_page = ((i_size + PAGE_SIZE - 1) >> PAGE_SHIFT) -
 			offset;
 	/* Is the page fully outside i_size? (truncate in progress) */
 	if (xpage >= max_page) {
 		kfree(bhs);
 		kfree(pages);
-		zero_user(page, 0, PAGE_CACHE_SIZE);
+		zero_user(page, 0, PAGE_SIZE);
 		ntfs_debug("Compressed read outside i_size - truncated?");
 		SetPageUptodate(page);
 		unlock_page(page);
@@ -591,7 +591,7 @@ int ntfs_read_compressed_block(struct page *page)
 				continue;
 			}
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			pages[i] = NULL;
 		}
 	}
@@ -735,9 +735,9 @@ lock_retry_remap:
 	ntfs_debug("Successfully read the compression block.");
 
 	/* The last page and maximum offset within it for the current cb. */
-	cb_max_page = (cur_page << PAGE_CACHE_SHIFT) + cur_ofs + cb_size;
-	cb_max_ofs = cb_max_page & ~PAGE_CACHE_MASK;
-	cb_max_page >>= PAGE_CACHE_SHIFT;
+	cb_max_page = (cur_page << PAGE_SHIFT) + cur_ofs + cb_size;
+	cb_max_ofs = cb_max_page & ~PAGE_MASK;
+	cb_max_page >>= PAGE_SHIFT;
 
 	/* Catch end of file inside a compression block. */
 	if (cb_max_page > max_page)
@@ -762,7 +762,7 @@ lock_retry_remap:
 					clear_page(page_address(page));
 				else
 					memset(page_address(page) + cur_ofs, 0,
-							PAGE_CACHE_SIZE -
+							PAGE_SIZE -
 							cur_ofs);
 				flush_dcache_page(page);
 				kunmap(page);
@@ -771,10 +771,10 @@ lock_retry_remap:
 				if (cur_page == xpage)
 					xpage_done = 1;
 				else
-					page_cache_release(page);
+					put_page(page);
 				pages[cur_page] = NULL;
 			}
-			cb_pos += PAGE_CACHE_SIZE - cur_ofs;
+			cb_pos += PAGE_SIZE - cur_ofs;
 			cur_ofs = 0;
 			if (cb_pos >= cb_end)
 				break;
@@ -816,8 +816,8 @@ lock_retry_remap:
 			page = pages[cur_page];
 			if (page)
 				memcpy(page_address(page) + cur_ofs, cb_pos,
-						PAGE_CACHE_SIZE - cur_ofs);
-			cb_pos += PAGE_CACHE_SIZE - cur_ofs;
+						PAGE_SIZE - cur_ofs);
+			cb_pos += PAGE_SIZE - cur_ofs;
 			cur_ofs = 0;
 			if (cb_pos >= cb_end)
 				break;
@@ -850,10 +850,10 @@ lock_retry_remap:
 				if (cur2_page == xpage)
 					xpage_done = 1;
 				else
-					page_cache_release(page);
+					put_page(page);
 				pages[cur2_page] = NULL;
 			}
-			cb_pos2 += PAGE_CACHE_SIZE - cur_ofs2;
+			cb_pos2 += PAGE_SIZE - cur_ofs2;
 			cur_ofs2 = 0;
 			if (cb_pos2 >= cb_end)
 				break;
@@ -884,7 +884,7 @@ lock_retry_remap:
 					kunmap(page);
 					unlock_page(page);
 					if (prev_cur_page != xpage)
-						page_cache_release(page);
+						put_page(page);
 					pages[prev_cur_page] = NULL;
 				}
 			}
@@ -914,7 +914,7 @@ lock_retry_remap:
 			kunmap(page);
 			unlock_page(page);
 			if (cur_page != xpage)
-				page_cache_release(page);
+				put_page(page);
 			pages[cur_page] = NULL;
 		}
 	}
@@ -961,7 +961,7 @@ err_out:
 			kunmap(page);
 			unlock_page(page);
 			if (i != xpage)
-				page_cache_release(page);
+				put_page(page);
 		}
 	}
 	kfree(pages);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index b2eff5816adc..3cdce162592d 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -319,7 +319,7 @@ descend_into_child_node:
 	 * disk if necessary.
 	 */
 	page = ntfs_map_page(ia_mapping, vcn <<
-			dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT);
+			dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
 	if (IS_ERR(page)) {
 		ntfs_error(sb, "Failed to map directory index page, error %ld.",
 				-PTR_ERR(page));
@@ -331,9 +331,9 @@ descend_into_child_node:
 fast_descend_into_child_node:
 	/* Get to the index allocation block. */
 	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
-			dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK));
+			dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
 	/* Bounds checks. */
-	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
+	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
 		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
 				"inode 0x%lx or driver bug.", dir_ni->mft_no);
 		goto unm_err_out;
@@ -366,7 +366,7 @@ fast_descend_into_child_node:
 		goto unm_err_out;
 	}
 	index_end = (u8*)ia + dir_ni->itype.index.block_size;
-	if (index_end > kaddr + PAGE_CACHE_SIZE) {
+	if (index_end > kaddr + PAGE_SIZE) {
 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
 				"0x%lx crosses page boundary. Impossible! "
 				"Cannot access! This is probably a bug in the "
@@ -559,9 +559,9 @@ found_it2:
 			/* If vcn is in the same page cache page as old_vcn we
 			 * recycle the mapped page. */
 			if (old_vcn << vol->cluster_size_bits >>
-					PAGE_CACHE_SHIFT == vcn <<
+					PAGE_SHIFT == vcn <<
 					vol->cluster_size_bits >>
-					PAGE_CACHE_SHIFT)
+					PAGE_SHIFT)
 				goto fast_descend_into_child_node;
 			unlock_page(page);
 			ntfs_unmap_page(page);
@@ -1246,15 +1246,15 @@ skip_index_root:
 		goto iput_err_out;
 	}
 	/* Get the starting bit position in the current bitmap page. */
-	cur_bmp_pos = bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1);
-	bmp_pos &= ~(u64)((PAGE_CACHE_SIZE * 8) - 1);
+	cur_bmp_pos = bmp_pos & ((PAGE_SIZE * 8) - 1);
+	bmp_pos &= ~(u64)((PAGE_SIZE * 8) - 1);
 get_next_bmp_page:
 	ntfs_debug("Reading bitmap with page index 0x%llx, bit ofs 0x%llx",
-			(unsigned long long)bmp_pos >> (3 + PAGE_CACHE_SHIFT),
+			(unsigned long long)bmp_pos >> (3 + PAGE_SHIFT),
 			(unsigned long long)bmp_pos &
-			(unsigned long long)((PAGE_CACHE_SIZE * 8) - 1));
+			(unsigned long long)((PAGE_SIZE * 8) - 1));
 	bmp_page = ntfs_map_page(bmp_mapping,
-			bmp_pos >> (3 + PAGE_CACHE_SHIFT));
+			bmp_pos >> (3 + PAGE_SHIFT));
 	if (IS_ERR(bmp_page)) {
 		ntfs_error(sb, "Reading index bitmap failed.");
 		err = PTR_ERR(bmp_page);
@@ -1270,9 +1270,9 @@ find_next_index_buffer:
 		 * If we have reached the end of the bitmap page, get the next
 		 * page, and put away the old one.
 		 */
-		if (unlikely((cur_bmp_pos >> 3) >= PAGE_CACHE_SIZE)) {
+		if (unlikely((cur_bmp_pos >> 3) >= PAGE_SIZE)) {
 			ntfs_unmap_page(bmp_page);
-			bmp_pos += PAGE_CACHE_SIZE * 8;
+			bmp_pos += PAGE_SIZE * 8;
 			cur_bmp_pos = 0;
 			goto get_next_bmp_page;
 		}
@@ -1285,8 +1285,8 @@ find_next_index_buffer:
 	ntfs_debug("Handling index buffer 0x%llx.",
 			(unsigned long long)bmp_pos + cur_bmp_pos);
 	/* If the current index buffer is in the same page we reuse the page. */
-	if ((prev_ia_pos & (s64)PAGE_CACHE_MASK) !=
-			(ia_pos & (s64)PAGE_CACHE_MASK)) {
+	if ((prev_ia_pos & (s64)PAGE_MASK) !=
+			(ia_pos & (s64)PAGE_MASK)) {
 		prev_ia_pos = ia_pos;
 		if (likely(ia_page != NULL)) {
 			unlock_page(ia_page);
@@ -1296,7 +1296,7 @@ find_next_index_buffer:
 		 * Map the page cache page containing the current ia_pos,
 		 * reading it from disk if necessary.
 		 */
-		ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_CACHE_SHIFT);
+		ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_SHIFT);
 		if (IS_ERR(ia_page)) {
 			ntfs_error(sb, "Reading index allocation data failed.");
 			err = PTR_ERR(ia_page);
@@ -1307,10 +1307,10 @@ find_next_index_buffer:
 		kaddr = (u8*)page_address(ia_page);
 	}
 	/* Get the current index buffer. */
-	ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_CACHE_MASK &
-			~(s64)(ndir->itype.index.block_size - 1)));
+	ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_MASK &
+					  ~(s64)(ndir->itype.index.block_size - 1)));
 	/* Bounds checks. */
-	if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE)) {
+	if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE)) {
 		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
 				"inode 0x%lx or driver bug.", vdir->i_ino);
 		goto err_out;
@@ -1348,7 +1348,7 @@ find_next_index_buffer:
 		goto err_out;
 	}
 	index_end = (u8*)ia + ndir->itype.index.block_size;
-	if (unlikely(index_end > kaddr + PAGE_CACHE_SIZE)) {
+	if (unlikely(index_end > kaddr + PAGE_SIZE)) {
 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
 				"0x%lx crosses page boundary. Impossible! "
 				"Cannot access! This is probably a bug in the "
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index bed4d427dfae..2dae60857544 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -220,8 +220,8 @@ do_non_resident_extend:
 		m = NULL;
 	}
 	mapping = vi->i_mapping;
-	index = old_init_size >> PAGE_CACHE_SHIFT;
-	end_index = (new_init_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	index = old_init_size >> PAGE_SHIFT;
+	end_index = (new_init_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	do {
 		/*
 		 * Read the page.  If the page is not present, this will zero
@@ -233,7 +233,7 @@ do_non_resident_extend:
 			goto init_err_out;
 		}
 		if (unlikely(PageError(page))) {
-			page_cache_release(page);
+			put_page(page);
 			err = -EIO;
 			goto init_err_out;
 		}
@@ -242,13 +242,13 @@ do_non_resident_extend:
 		 * enough to make ntfs_writepage() work.
 		 */
 		write_lock_irqsave(&ni->size_lock, flags);
-		ni->initialized_size = (s64)(index + 1) << PAGE_CACHE_SHIFT;
+		ni->initialized_size = (s64)(index + 1) << PAGE_SHIFT;
 		if (ni->initialized_size > new_init_size)
 			ni->initialized_size = new_init_size;
 		write_unlock_irqrestore(&ni->size_lock, flags);
 		/* Set the page dirty so it gets written out. */
 		set_page_dirty(page);
-		page_cache_release(page);
+		put_page(page);
 		/*
 		 * Play nice with the vm and the rest of the system.  This is
 		 * very much needed as we can potentially be modifying the
@@ -543,7 +543,7 @@ out:
 err_out:
 	while (nr > 0) {
 		unlock_page(pages[--nr]);
-		page_cache_release(pages[nr]);
+		put_page(pages[nr]);
 	}
 	goto out;
 }
@@ -653,7 +653,7 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
 	u = 0;
 do_next_page:
 	page = pages[u];
-	bh_pos = (s64)page->index << PAGE_CACHE_SHIFT;
+	bh_pos = (s64)page->index << PAGE_SHIFT;
 	bh = head = page_buffers(page);
 	do {
 		VCN cdelta;
@@ -810,11 +810,11 @@ map_buffer_cached:
 					
 				kaddr = kmap_atomic(page);
 				if (bh_pos < pos) {
-					pofs = bh_pos & ~PAGE_CACHE_MASK;
+					pofs = bh_pos & ~PAGE_MASK;
 					memset(kaddr + pofs, 0, pos - bh_pos);
 				}
 				if (bh_end > end) {
-					pofs = end & ~PAGE_CACHE_MASK;
+					pofs = end & ~PAGE_MASK;
 					memset(kaddr + pofs, 0, bh_end - end);
 				}
 				kunmap_atomic(kaddr);
@@ -942,7 +942,7 @@ rl_not_mapped_enoent:
 		 * unmapped.  This can only happen when the cluster size is
 		 * less than the page cache size.
 		 */
-		if (unlikely(vol->cluster_size < PAGE_CACHE_SIZE)) {
+		if (unlikely(vol->cluster_size < PAGE_SIZE)) {
 			bh_cend = (bh_end + vol->cluster_size - 1) >>
 					vol->cluster_size_bits;
 			if ((bh_cend <= cpos || bh_cpos >= cend)) {
@@ -1208,7 +1208,7 @@ rl_not_mapped_enoent:
 		wait_on_buffer(bh);
 		if (likely(buffer_uptodate(bh))) {
 			page = bh->b_page;
-			bh_pos = ((s64)page->index << PAGE_CACHE_SHIFT) +
+			bh_pos = ((s64)page->index << PAGE_SHIFT) +
 					bh_offset(bh);
 			/*
 			 * If the buffer overflows the initialized size, need
@@ -1350,7 +1350,7 @@ rl_not_mapped_enoent:
 		bh = head = page_buffers(page);
 		do {
 			if (u == nr_pages &&
-					((s64)page->index << PAGE_CACHE_SHIFT) +
+					((s64)page->index << PAGE_SHIFT) +
 					bh_offset(bh) >= end)
 				break;
 			if (!buffer_new(bh))
@@ -1422,7 +1422,7 @@ static inline int ntfs_commit_pages_after_non_resident_write(
 		bool partial;
 
 		page = pages[u];
-		bh_pos = (s64)page->index << PAGE_CACHE_SHIFT;
+		bh_pos = (s64)page->index << PAGE_SHIFT;
 		bh = head = page_buffers(page);
 		partial = false;
 		do {
@@ -1639,7 +1639,7 @@ static int ntfs_commit_pages_after_write(struct page **pages,
 		if (end < attr_len)
 			memcpy(kaddr + end, kattr + end, attr_len - end);
 		/* Zero the region outside the end of the attribute value. */
-		memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+		memset(kaddr + attr_len, 0, PAGE_SIZE - attr_len);
 		flush_dcache_page(page);
 		SetPageUptodate(page);
 	}
@@ -1706,7 +1706,7 @@ static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
 	unsigned len, copied;
 
 	do {
-		len = PAGE_CACHE_SIZE - ofs;
+		len = PAGE_SIZE - ofs;
 		if (len > bytes)
 			len = bytes;
 		copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
@@ -1724,14 +1724,14 @@ out:
 	return total;
 err:
 	/* Zero the rest of the target like __copy_from_user(). */
-	len = PAGE_CACHE_SIZE - copied;
+	len = PAGE_SIZE - copied;
 	do {
 		if (len > bytes)
 			len = bytes;
 		zero_user(*pages, copied, len);
 		bytes -= len;
 		copied = 0;
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 	} while (++pages < last_page);
 	goto out;
 }
@@ -1787,8 +1787,8 @@ static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
 	 * attributes.
 	 */
 	nr_pages = 1;
-	if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
-		nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
+	if (vol->cluster_size > PAGE_SIZE && NInoNonResident(ni))
+		nr_pages = vol->cluster_size >> PAGE_SHIFT;
 	last_vcn = -1;
 	do {
 		VCN vcn;
@@ -1796,9 +1796,9 @@ static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
 		unsigned ofs, do_pages, u;
 		size_t copied;
 
-		start_idx = idx = pos >> PAGE_CACHE_SHIFT;
-		ofs = pos & ~PAGE_CACHE_MASK;
-		bytes = PAGE_CACHE_SIZE - ofs;
+		start_idx = idx = pos >> PAGE_SHIFT;
+		ofs = pos & ~PAGE_MASK;
+		bytes = PAGE_SIZE - ofs;
 		do_pages = 1;
 		if (nr_pages > 1) {
 			vcn = pos >> vol->cluster_size_bits;
@@ -1832,7 +1832,7 @@ static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
 				if (lcn == LCN_HOLE) {
 					start_idx = (pos & ~(s64)
 							vol->cluster_size_mask)
-							>> PAGE_CACHE_SHIFT;
+							>> PAGE_SHIFT;
 					bytes = vol->cluster_size - (pos &
 							vol->cluster_size_mask);
 					do_pages = nr_pages;
@@ -1871,12 +1871,12 @@ again:
 			if (unlikely(status)) {
 				do {
 					unlock_page(pages[--do_pages]);
-					page_cache_release(pages[do_pages]);
+					put_page(pages[do_pages]);
 				} while (do_pages);
 				break;
 			}
 		}
-		u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
+		u = (pos >> PAGE_SHIFT) - pages[0]->index;
 		copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
 					i, bytes);
 		ntfs_flush_dcache_pages(pages + u, do_pages - u);
@@ -1889,7 +1889,7 @@ again:
 		}
 		do {
 			unlock_page(pages[--do_pages]);
-			page_cache_release(pages[do_pages]);
+			put_page(pages[do_pages]);
 		} while (do_pages);
 		if (unlikely(status < 0))
 			break;
@@ -1921,7 +1921,7 @@ again:
 		}
 	} while (iov_iter_count(i));
 	if (cached_page)
-		page_cache_release(cached_page);
+		put_page(cached_page);
 	ntfs_debug("Done.  Returning %s (written 0x%lx, status %li).",
 			written ? "written" : "status", (unsigned long)written,
 			(long)status);
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 096c135691ae..02a83a46ead2 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -276,7 +276,7 @@ descend_into_child_node:
 	 * disk if necessary.
 	 */
 	page = ntfs_map_page(ia_mapping, vcn <<
-			idx_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT);
+			idx_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
 	if (IS_ERR(page)) {
 		ntfs_error(sb, "Failed to map index page, error %ld.",
 				-PTR_ERR(page));
@@ -288,9 +288,9 @@ descend_into_child_node:
 fast_descend_into_child_node:
 	/* Get to the index allocation block. */
 	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
-			idx_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK));
+			idx_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
 	/* Bounds checks. */
-	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
+	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
 		ntfs_error(sb, "Out of bounds check failed.  Corrupt inode "
 				"0x%lx or driver bug.", idx_ni->mft_no);
 		goto unm_err_out;
@@ -323,7 +323,7 @@ fast_descend_into_child_node:
 		goto unm_err_out;
 	}
 	index_end = (u8*)ia + idx_ni->itype.index.block_size;
-	if (index_end > kaddr + PAGE_CACHE_SIZE) {
+	if (index_end > kaddr + PAGE_SIZE) {
 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of inode 0x%lx "
 				"crosses page boundary.  Impossible!  Cannot "
 				"access!  This is probably a bug in the "
@@ -427,9 +427,9 @@ ia_done:
 		 * the mapped page.
 		 */
 		if (old_vcn << vol->cluster_size_bits >>
-				PAGE_CACHE_SHIFT == vcn <<
+				PAGE_SHIFT == vcn <<
 				vol->cluster_size_bits >>
-				PAGE_CACHE_SHIFT)
+				PAGE_SHIFT)
 			goto fast_descend_into_child_node;
 		unlock_page(page);
 		ntfs_unmap_page(page);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index d284f07eda77..3eda6d4bcc65 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -868,12 +868,12 @@ skip_attr_list_load:
 					ni->itype.index.block_size);
 			goto unm_err_out;
 		}
-		if (ni->itype.index.block_size > PAGE_CACHE_SIZE) {
+		if (ni->itype.index.block_size > PAGE_SIZE) {
 			ntfs_error(vi->i_sb, "Index block size (%u) > "
 					"PAGE_CACHE_SIZE (%ld) is not "
 					"supported.  Sorry.",
 					ni->itype.index.block_size,
-					PAGE_CACHE_SIZE);
+					PAGE_SIZE);
 			err = -EOPNOTSUPP;
 			goto unm_err_out;
 		}
@@ -1585,10 +1585,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
 				"two.", ni->itype.index.block_size);
 		goto unm_err_out;
 	}
-	if (ni->itype.index.block_size > PAGE_CACHE_SIZE) {
+	if (ni->itype.index.block_size > PAGE_SIZE) {
 		ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_CACHE_SIZE "
 				"(%ld) is not supported.  Sorry.",
-				ni->itype.index.block_size, PAGE_CACHE_SIZE);
+				ni->itype.index.block_size, PAGE_SIZE);
 		err = -EOPNOTSUPP;
 		goto unm_err_out;
 	}
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 1711b710b641..27a24a42f712 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -283,15 +283,15 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
 			ntfs_unmap_page(page);
 		}
 		page = ntfs_map_page(mapping, last_read_pos >>
-				PAGE_CACHE_SHIFT);
+				PAGE_SHIFT);
 		if (IS_ERR(page)) {
 			err = PTR_ERR(page);
 			ntfs_error(vol->sb, "Failed to map page.");
 			goto out;
 		}
-		buf_size = last_read_pos & ~PAGE_CACHE_MASK;
+		buf_size = last_read_pos & ~PAGE_MASK;
 		buf = page_address(page) + buf_size;
-		buf_size = PAGE_CACHE_SIZE - buf_size;
+		buf_size = PAGE_SIZE - buf_size;
 		if (unlikely(last_read_pos + buf_size > i_size))
 			buf_size = i_size - last_read_pos;
 		buf_size <<= 3;
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index c71de292c5ad..9d71213ca81e 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -381,7 +381,7 @@ static int ntfs_check_and_load_restart_page(struct inode *vi,
 	 * completely inside @rp, just copy it from there.  Otherwise map all
 	 * the required pages and copy the data from them.
 	 */
-	size = PAGE_CACHE_SIZE - (pos & ~PAGE_CACHE_MASK);
+	size = PAGE_SIZE - (pos & ~PAGE_MASK);
 	if (size >= le32_to_cpu(rp->system_page_size)) {
 		memcpy(trp, rp, le32_to_cpu(rp->system_page_size));
 	} else {
@@ -394,8 +394,8 @@ static int ntfs_check_and_load_restart_page(struct inode *vi,
 		/* Copy the remaining data one page at a time. */
 		have_read = size;
 		to_read = le32_to_cpu(rp->system_page_size) - size;
-		idx = (pos + size) >> PAGE_CACHE_SHIFT;
-		BUG_ON((pos + size) & ~PAGE_CACHE_MASK);
+		idx = (pos + size) >> PAGE_SHIFT;
+		BUG_ON((pos + size) & ~PAGE_MASK);
 		do {
 			page = ntfs_map_page(vi->i_mapping, idx);
 			if (IS_ERR(page)) {
@@ -406,7 +406,7 @@ static int ntfs_check_and_load_restart_page(struct inode *vi,
 					err = -EIO;
 				goto err_out;
 			}
-			size = min_t(int, to_read, PAGE_CACHE_SIZE);
+			size = min_t(int, to_read, PAGE_SIZE);
 			memcpy((u8*)trp + have_read, page_address(page), size);
 			ntfs_unmap_page(page);
 			have_read += size;
@@ -509,11 +509,11 @@ bool ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp)
 	 * log page size if the page cache size is between the default log page
 	 * size and twice that.
 	 */
-	if (PAGE_CACHE_SIZE >= DefaultLogPageSize && PAGE_CACHE_SIZE <=
+	if (PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <=
 			DefaultLogPageSize * 2)
 		log_page_size = DefaultLogPageSize;
 	else
-		log_page_size = PAGE_CACHE_SIZE;
+		log_page_size = PAGE_SIZE;
 	log_page_mask = log_page_size - 1;
 	/*
 	 * Use ntfs_ffs() instead of ffs() to enable the compiler to
@@ -539,7 +539,7 @@ bool ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp)
 	 * to be empty.
 	 */
 	for (pos = 0; pos < size; pos <<= 1) {
-		pgoff_t idx = pos >> PAGE_CACHE_SHIFT;
+		pgoff_t idx = pos >> PAGE_SHIFT;
 		if (!page || page->index != idx) {
 			if (page)
 				ntfs_unmap_page(page);
@@ -550,7 +550,7 @@ bool ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp)
 				goto err_out;
 			}
 		}
-		kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK);
+		kaddr = (u8*)page_address(page) + (pos & ~PAGE_MASK);
 		/*
 		 * A non-empty block means the logfile is not empty while an
 		 * empty block after a non-empty block has been encountered
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 3014a36a255b..37b2501caaa4 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -61,16 +61,16 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
 	 * here if the volume was that big...
 	 */
 	index = (u64)ni->mft_no << vol->mft_record_size_bits >>
-			PAGE_CACHE_SHIFT;
-	ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
+			PAGE_SHIFT;
+	ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
 
 	i_size = i_size_read(mft_vi);
 	/* The maximum valid index into the page cache for $MFT's data. */
-	end_index = i_size >> PAGE_CACHE_SHIFT;
+	end_index = i_size >> PAGE_SHIFT;
 
 	/* If the wanted index is out of bounds the mft record doesn't exist. */
 	if (unlikely(index >= end_index)) {
-		if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs +
+		if (index > end_index || (i_size & ~PAGE_MASK) < ofs +
 				vol->mft_record_size) {
 			page = ERR_PTR(-ENOENT);
 			ntfs_error(vol->sb, "Attempt to read mft record 0x%lx, "
@@ -487,7 +487,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
 	}
 	/* Get the page containing the mirror copy of the mft record @m. */
 	page = ntfs_map_page(vol->mftmirr_ino->i_mapping, mft_no >>
-			(PAGE_CACHE_SHIFT - vol->mft_record_size_bits));
+			(PAGE_SHIFT - vol->mft_record_size_bits));
 	if (IS_ERR(page)) {
 		ntfs_error(vol->sb, "Failed to map mft mirror page.");
 		err = PTR_ERR(page);
@@ -497,7 +497,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
 	BUG_ON(!PageUptodate(page));
 	ClearPageUptodate(page);
 	/* Offset of the mft mirror record inside the page. */
-	page_ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
+	page_ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
 	/* The address in the page of the mirror copy of the mft record @m. */
 	kmirr = page_address(page) + page_ofs;
 	/* Copy the mst protected mft record to the mirror. */
@@ -1178,8 +1178,8 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
 	for (; pass <= 2;) {
 		/* Cap size to pass_end. */
 		ofs = data_pos >> 3;
-		page_ofs = ofs & ~PAGE_CACHE_MASK;
-		size = PAGE_CACHE_SIZE - page_ofs;
+		page_ofs = ofs & ~PAGE_MASK;
+		size = PAGE_SIZE - page_ofs;
 		ll = ((pass_end + 7) >> 3) - ofs;
 		if (size > ll)
 			size = ll;
@@ -1190,7 +1190,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
 		 */
 		if (size) {
 			page = ntfs_map_page(mftbmp_mapping,
-					ofs >> PAGE_CACHE_SHIFT);
+					ofs >> PAGE_SHIFT);
 			if (IS_ERR(page)) {
 				ntfs_error(vol->sb, "Failed to read mft "
 						"bitmap, aborting.");
@@ -1328,13 +1328,13 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 	 */
 	ll = lcn >> 3;
 	page = ntfs_map_page(vol->lcnbmp_ino->i_mapping,
-			ll >> PAGE_CACHE_SHIFT);
+			ll >> PAGE_SHIFT);
 	if (IS_ERR(page)) {
 		up_write(&mftbmp_ni->runlist.lock);
 		ntfs_error(vol->sb, "Failed to read from lcn bitmap.");
 		return PTR_ERR(page);
 	}
-	b = (u8*)page_address(page) + (ll & ~PAGE_CACHE_MASK);
+	b = (u8*)page_address(page) + (ll & ~PAGE_MASK);
 	tb = 1 << (lcn & 7ull);
 	down_write(&vol->lcnbmp_lock);
 	if (*b != 0xff && !(*b & tb)) {
@@ -2103,14 +2103,14 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
 	 * The index into the page cache and the offset within the page cache
 	 * page of the wanted mft record.
 	 */
-	index = mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
-	ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
+	index = mft_no << vol->mft_record_size_bits >> PAGE_SHIFT;
+	ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
 	/* The maximum valid index into the page cache for $MFT's data. */
 	i_size = i_size_read(mft_vi);
-	end_index = i_size >> PAGE_CACHE_SHIFT;
+	end_index = i_size >> PAGE_SHIFT;
 	if (unlikely(index >= end_index)) {
 		if (unlikely(index > end_index || ofs + vol->mft_record_size >=
-				(i_size & ~PAGE_CACHE_MASK))) {
+				(i_size & ~PAGE_MASK))) {
 			ntfs_error(vol->sb, "Tried to format non-existing mft "
 					"record 0x%llx.", (long long)mft_no);
 			return -ENOENT;
@@ -2515,8 +2515,8 @@ mft_rec_already_initialized:
 	 * We now have allocated and initialized the mft record.  Calculate the
 	 * index of and the offset within the page cache page the record is in.
 	 */
-	index = bit << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
-	ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
+	index = bit << vol->mft_record_size_bits >> PAGE_SHIFT;
+	ofs = (bit << vol->mft_record_size_bits) & ~PAGE_MASK;
 	/* Read, map, and pin the page containing the mft record. */
 	page = ntfs_map_page(vol->mft_ino->i_mapping, index);
 	if (IS_ERR(page)) {
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index c581e26a350d..12de47b96ca9 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -43,7 +43,7 @@ typedef enum {
 	NTFS_MAX_NAME_LEN	= 255,
 	NTFS_MAX_ATTR_NAME_LEN	= 255,
 	NTFS_MAX_CLUSTER_SIZE	= 64 * 1024,	/* 64kiB */
-	NTFS_MAX_PAGES_PER_CLUSTER = NTFS_MAX_CLUSTER_SIZE / PAGE_CACHE_SIZE,
+	NTFS_MAX_PAGES_PER_CLUSTER = NTFS_MAX_CLUSTER_SIZE / PAGE_SIZE,
 } NTFS_CONSTANTS;
 
 /* Global variables. */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 1b38abdaa3ed..ab2b0930054e 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -826,11 +826,11 @@ static bool parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
 	 * We cannot support mft record sizes above the PAGE_CACHE_SIZE since
 	 * we store $MFT/$DATA, the table of mft records in the page cache.
 	 */
-	if (vol->mft_record_size > PAGE_CACHE_SIZE) {
+	if (vol->mft_record_size > PAGE_SIZE) {
 		ntfs_error(vol->sb, "Mft record size (%i) exceeds the "
 				"PAGE_CACHE_SIZE on your system (%lu).  "
 				"This is not supported.  Sorry.",
-				vol->mft_record_size, PAGE_CACHE_SIZE);
+				vol->mft_record_size, PAGE_SIZE);
 		return false;
 	}
 	/* We cannot support mft record sizes below the sector size. */
@@ -1096,7 +1096,7 @@ static bool check_mft_mirror(ntfs_volume *vol)
 
 	ntfs_debug("Entering.");
 	/* Compare contents of $MFT and $MFTMirr. */
-	mrecs_per_page = PAGE_CACHE_SIZE / vol->mft_record_size;
+	mrecs_per_page = PAGE_SIZE / vol->mft_record_size;
 	BUG_ON(!mrecs_per_page);
 	BUG_ON(!vol->mftmirr_size);
 	mft_page = mirr_page = NULL;
@@ -1615,20 +1615,20 @@ static bool load_and_init_attrdef(ntfs_volume *vol)
 	if (!vol->attrdef)
 		goto iput_failed;
 	index = 0;
-	max_index = i_size >> PAGE_CACHE_SHIFT;
-	size = PAGE_CACHE_SIZE;
+	max_index = i_size >> PAGE_SHIFT;
+	size = PAGE_SIZE;
 	while (index < max_index) {
 		/* Read the attrdef table and copy it into the linear buffer. */
 read_partial_attrdef_page:
 		page = ntfs_map_page(ino->i_mapping, index);
 		if (IS_ERR(page))
 			goto free_iput_failed;
-		memcpy((u8*)vol->attrdef + (index++ << PAGE_CACHE_SHIFT),
+		memcpy((u8*)vol->attrdef + (index++ << PAGE_SHIFT),
 				page_address(page), size);
 		ntfs_unmap_page(page);
 	};
-	if (size == PAGE_CACHE_SIZE) {
-		size = i_size & ~PAGE_CACHE_MASK;
+	if (size == PAGE_SIZE) {
+		size = i_size & ~PAGE_MASK;
 		if (size)
 			goto read_partial_attrdef_page;
 	}
@@ -1684,20 +1684,20 @@ static bool load_and_init_upcase(ntfs_volume *vol)
 	if (!vol->upcase)
 		goto iput_upcase_failed;
 	index = 0;
-	max_index = i_size >> PAGE_CACHE_SHIFT;
-	size = PAGE_CACHE_SIZE;
+	max_index = i_size >> PAGE_SHIFT;
+	size = PAGE_SIZE;
 	while (index < max_index) {
 		/* Read the upcase table and copy it into the linear buffer. */
 read_partial_upcase_page:
 		page = ntfs_map_page(ino->i_mapping, index);
 		if (IS_ERR(page))
 			goto iput_upcase_failed;
-		memcpy((char*)vol->upcase + (index++ << PAGE_CACHE_SHIFT),
+		memcpy((char*)vol->upcase + (index++ << PAGE_SHIFT),
 				page_address(page), size);
 		ntfs_unmap_page(page);
 	};
-	if (size == PAGE_CACHE_SIZE) {
-		size = i_size & ~PAGE_CACHE_MASK;
+	if (size == PAGE_SIZE) {
+		size = i_size & ~PAGE_MASK;
 		if (size)
 			goto read_partial_upcase_page;
 	}
@@ -2474,11 +2474,11 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
 	 * multiples of PAGE_CACHE_SIZE, rounding up so that if we have one
 	 * full and one partial page max_index = 2.
 	 */
-	max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_CACHE_SIZE - 1) >>
-			PAGE_CACHE_SHIFT;
+	max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_SIZE - 1) >>
+			PAGE_SHIFT;
 	/* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */
 	ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%lx.",
-			max_index, PAGE_CACHE_SIZE / 4);
+			max_index, PAGE_SIZE / 4);
 	for (index = 0; index < max_index; index++) {
 		unsigned long *kaddr;
 
@@ -2491,7 +2491,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
 		if (IS_ERR(page)) {
 			ntfs_debug("read_mapping_page() error. Skipping "
 					"page (index 0x%lx).", index);
-			nr_free -= PAGE_CACHE_SIZE * 8;
+			nr_free -= PAGE_SIZE * 8;
 			continue;
 		}
 		kaddr = kmap_atomic(page);
@@ -2503,9 +2503,9 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
 		 * ntfs_readpage().
 		 */
 		nr_free -= bitmap_weight(kaddr,
-					PAGE_CACHE_SIZE * BITS_PER_BYTE);
+					PAGE_SIZE * BITS_PER_BYTE);
 		kunmap_atomic(kaddr);
-		page_cache_release(page);
+		put_page(page);
 	}
 	ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1);
 	/*
@@ -2549,7 +2549,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
 	ntfs_debug("Entering.");
 	/* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */
 	ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = "
-			"0x%lx.", max_index, PAGE_CACHE_SIZE / 4);
+			"0x%lx.", max_index, PAGE_SIZE / 4);
 	for (index = 0; index < max_index; index++) {
 		unsigned long *kaddr;
 
@@ -2562,7 +2562,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
 		if (IS_ERR(page)) {
 			ntfs_debug("read_mapping_page() error. Skipping "
 					"page (index 0x%lx).", index);
-			nr_free -= PAGE_CACHE_SIZE * 8;
+			nr_free -= PAGE_SIZE * 8;
 			continue;
 		}
 		kaddr = kmap_atomic(page);
@@ -2574,9 +2574,9 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
 		 * ntfs_readpage().
 		 */
 		nr_free -= bitmap_weight(kaddr,
-					PAGE_CACHE_SIZE * BITS_PER_BYTE);
+					PAGE_SIZE * BITS_PER_BYTE);
 		kunmap_atomic(kaddr);
-		page_cache_release(page);
+		put_page(page);
 	}
 	ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.",
 			index - 1);
@@ -2618,17 +2618,17 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
 	/* Type of filesystem. */
 	sfs->f_type   = NTFS_SB_MAGIC;
 	/* Optimal transfer block size. */
-	sfs->f_bsize  = PAGE_CACHE_SIZE;
+	sfs->f_bsize  = PAGE_SIZE;
 	/*
 	 * Total data blocks in filesystem in units of f_bsize and since
 	 * inodes are also stored in data blocs ($MFT is a file) this is just
 	 * the total clusters.
 	 */
 	sfs->f_blocks = vol->nr_clusters << vol->cluster_size_bits >>
-				PAGE_CACHE_SHIFT;
+				PAGE_SHIFT;
 	/* Free data blocks in filesystem in units of f_bsize. */
 	size	      = get_nr_free_clusters(vol) << vol->cluster_size_bits >>
-				PAGE_CACHE_SHIFT;
+				PAGE_SHIFT;
 	if (size < 0LL)
 		size = 0LL;
 	/* Free blocks avail to non-superuser, same as above on NTFS. */
@@ -2643,7 +2643,7 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
 	 * have one full and one partial page max_index = 2.
 	 */
 	max_index = ((((mft_ni->initialized_size >> vol->mft_record_size_bits)
-			+ 7) >> 3) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+			+ 7) >> 3) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
 	/* Number of inodes in filesystem (at this point in time). */
 	sfs->f_files = size;
@@ -2766,14 +2766,14 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		goto err_out_now;
 
 	/* We support sector sizes up to the PAGE_CACHE_SIZE. */
-	if (bdev_logical_block_size(sb->s_bdev) > PAGE_CACHE_SIZE) {
+	if (bdev_logical_block_size(sb->s_bdev) > PAGE_SIZE) {
 		if (!silent)
 			ntfs_error(sb, "Device has unsupported sector size "
 					"(%i).  The maximum supported sector "
 					"size on this architecture is %lu "
 					"bytes.",
 					bdev_logical_block_size(sb->s_bdev),
-					PAGE_CACHE_SIZE);
+					PAGE_SIZE);
 		goto err_out_now;
 	}
 	/*
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 70907d638b60..e361d1a0ca09 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6671,7 +6671,7 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
 {
 	int i;
 	struct page *page;
-	unsigned int from, to = PAGE_CACHE_SIZE;
+	unsigned int from, to = PAGE_SIZE;
 	struct super_block *sb = inode->i_sb;
 
 	BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
@@ -6679,21 +6679,21 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
 	if (numpages == 0)
 		goto out;
 
-	to = PAGE_CACHE_SIZE;
+	to = PAGE_SIZE;
 	for(i = 0; i < numpages; i++) {
 		page = pages[i];
 
-		from = start & (PAGE_CACHE_SIZE - 1);
-		if ((end >> PAGE_CACHE_SHIFT) == page->index)
-			to = end & (PAGE_CACHE_SIZE - 1);
+		from = start & (PAGE_SIZE - 1);
+		if ((end >> PAGE_SHIFT) == page->index)
+			to = end & (PAGE_SIZE - 1);
 
-		BUG_ON(from > PAGE_CACHE_SIZE);
-		BUG_ON(to > PAGE_CACHE_SIZE);
+		BUG_ON(from > PAGE_SIZE);
+		BUG_ON(to > PAGE_SIZE);
 
 		ocfs2_map_and_dirty_page(inode, handle, from, to, page, 1,
 					 &phys);
 
-		start = (page->index + 1) << PAGE_CACHE_SHIFT;
+		start = (page->index + 1) << PAGE_SHIFT;
 	}
 out:
 	if (pages)
@@ -6712,7 +6712,7 @@ int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
 
 	numpages = 0;
 	last_page_bytes = PAGE_ALIGN(end);
-	index = start >> PAGE_CACHE_SHIFT;
+	index = start >> PAGE_SHIFT;
 	do {
 		pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS);
 		if (!pages[numpages]) {
@@ -6723,7 +6723,7 @@ int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
 
 		numpages++;
 		index++;
-	} while (index < (last_page_bytes >> PAGE_CACHE_SHIFT));
+	} while (index < (last_page_bytes >> PAGE_SHIFT));
 
 out:
 	if (ret != 0) {
@@ -6950,8 +6950,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		 * to do that now.
 		 */
 		if (!ocfs2_sparse_alloc(osb) &&
-		    PAGE_CACHE_SIZE < osb->s_clustersize)
-			end = PAGE_CACHE_SIZE;
+		    PAGE_SIZE < osb->s_clustersize)
+			end = PAGE_SIZE;
 
 		ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
 		if (ret) {
@@ -6971,8 +6971,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 			goto out_unlock;
 		}
 
-		page_end = PAGE_CACHE_SIZE;
-		if (PAGE_CACHE_SIZE > osb->s_clustersize)
+		page_end = PAGE_SIZE;
+		if (PAGE_SIZE > osb->s_clustersize)
 			page_end = osb->s_clustersize;
 
 		for (i = 0; i < num_pages; i++)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1581240a7ca0..ce5dc4f92935 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -234,7 +234,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
 
 	size = i_size_read(inode);
 
-	if (size > PAGE_CACHE_SIZE ||
+	if (size > PAGE_SIZE ||
 	    size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) {
 		ocfs2_error(inode->i_sb,
 			    "Inode %llu has with inline data has bad size: %Lu\n",
@@ -247,7 +247,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
 	if (size)
 		memcpy(kaddr, di->id2.i_data.id_data, size);
 	/* Clear the remaining part of the page */
-	memset(kaddr + size, 0, PAGE_CACHE_SIZE - size);
+	memset(kaddr + size, 0, PAGE_SIZE - size);
 	flush_dcache_page(page);
 	kunmap_atomic(kaddr);
 
@@ -282,7 +282,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
 {
 	struct inode *inode = page->mapping->host;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT;
+	loff_t start = (loff_t)page->index << PAGE_SHIFT;
 	int ret, unlock = 1;
 
 	trace_ocfs2_readpage((unsigned long long)oi->ip_blkno,
@@ -385,7 +385,7 @@ static int ocfs2_readpages(struct file *filp, struct address_space *mapping,
 	 * drop out in that case as it's not worth handling here.
 	 */
 	last = list_entry(pages->prev, struct page, lru);
-	start = (loff_t)last->index << PAGE_CACHE_SHIFT;
+	start = (loff_t)last->index << PAGE_SHIFT;
 	if (start >= i_size_read(inode))
 		goto out_unlock;
 
@@ -511,12 +511,12 @@ static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,
 					    unsigned int *start,
 					    unsigned int *end)
 {
-	unsigned int cluster_start = 0, cluster_end = PAGE_CACHE_SIZE;
+	unsigned int cluster_start = 0, cluster_end = PAGE_SIZE;
 
-	if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) {
+	if (unlikely(PAGE_SHIFT > osb->s_clustersize_bits)) {
 		unsigned int cpp;
 
-		cpp = 1 << (PAGE_CACHE_SHIFT - osb->s_clustersize_bits);
+		cpp = 1 << (PAGE_SHIFT - osb->s_clustersize_bits);
 
 		cluster_start = cpos % cpp;
 		cluster_start = cluster_start << osb->s_clustersize_bits;
@@ -687,10 +687,10 @@ next_bh:
 #if (PAGE_CACHE_SIZE >= OCFS2_MAX_CLUSTERSIZE)
 #define OCFS2_MAX_CTXT_PAGES	1
 #else
-#define OCFS2_MAX_CTXT_PAGES	(OCFS2_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE)
+#define OCFS2_MAX_CTXT_PAGES	(OCFS2_MAX_CLUSTERSIZE / PAGE_SIZE)
 #endif
 
-#define OCFS2_MAX_CLUSTERS_PER_PAGE	(PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE)
+#define OCFS2_MAX_CLUSTERS_PER_PAGE	(PAGE_SIZE / OCFS2_MIN_CLUSTERSIZE)
 
 struct ocfs2_unwritten_extent {
 	struct list_head	ue_node;
@@ -785,7 +785,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
 		if (pages[i]) {
 			unlock_page(pages[i]);
 			mark_page_accessed(pages[i]);
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 		}
 	}
 }
@@ -808,7 +808,7 @@ static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
 			}
 		}
 		mark_page_accessed(wc->w_target_page);
-		page_cache_release(wc->w_target_page);
+		put_page(wc->w_target_page);
 	}
 	ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
 }
@@ -857,7 +857,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
 	wc->w_di_bh = di_bh;
 	wc->w_type = type;
 
-	if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits))
+	if (unlikely(PAGE_SHIFT > osb->s_clustersize_bits))
 		wc->w_large_pages = 1;
 	else
 		wc->w_large_pages = 0;
@@ -920,7 +920,7 @@ static void ocfs2_write_failure(struct inode *inode,
 				loff_t user_pos, unsigned user_len)
 {
 	int i;
-	unsigned from = user_pos & (PAGE_CACHE_SIZE - 1),
+	unsigned from = user_pos & (PAGE_SIZE - 1),
 		to = user_pos + user_len;
 	struct page *tmppage;
 
@@ -960,7 +960,7 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
 			(page_offset(page) <= user_pos));
 
 	if (page == wc->w_target_page) {
-		map_from = user_pos & (PAGE_CACHE_SIZE - 1);
+		map_from = user_pos & (PAGE_SIZE - 1);
 		map_to = map_from + user_len;
 
 		if (new)
@@ -1034,7 +1034,7 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
 	struct inode *inode = mapping->host;
 	loff_t last_byte;
 
-	target_index = user_pos >> PAGE_CACHE_SHIFT;
+	target_index = user_pos >> PAGE_SHIFT;
 
 	/*
 	 * Figure out how many pages we'll be manipulating here. For
@@ -1053,14 +1053,14 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
 		 */
 		last_byte = max(user_pos + user_len, i_size_read(inode));
 		BUG_ON(last_byte < 1);
-		end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1;
+		end_index = ((last_byte - 1) >> PAGE_SHIFT) + 1;
 		if ((start + wc->w_num_pages) > end_index)
 			wc->w_num_pages = end_index - start;
 	} else {
 		wc->w_num_pages = 1;
 		start = target_index;
 	}
-	end_index = (user_pos + user_len - 1) >> PAGE_CACHE_SHIFT;
+	end_index = (user_pos + user_len - 1) >> PAGE_SHIFT;
 
 	for(i = 0; i < wc->w_num_pages; i++) {
 		index = start + i;
@@ -1082,7 +1082,7 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
 				goto out;
 			}
 
-			page_cache_get(mmap_page);
+			get_page(mmap_page);
 			wc->w_pages[i] = mmap_page;
 			wc->w_target_locked = true;
 		} else if (index >= target_index && index <= end_index &&
@@ -1272,7 +1272,7 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
 {
 	struct ocfs2_write_cluster_desc *desc;
 
-	wc->w_target_from = pos & (PAGE_CACHE_SIZE - 1);
+	wc->w_target_from = pos & (PAGE_SIZE - 1);
 	wc->w_target_to = wc->w_target_from + len;
 
 	if (alloc == 0)
@@ -1309,7 +1309,7 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
 							&wc->w_target_to);
 	} else {
 		wc->w_target_from = 0;
-		wc->w_target_to = PAGE_CACHE_SIZE;
+		wc->w_target_to = PAGE_SIZE;
 	}
 }
 
@@ -1981,7 +1981,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
 			   struct page *page, void *fsdata)
 {
 	int i, ret;
-	unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from, to, start = pos & (PAGE_SIZE - 1);
 	struct inode *inode = mapping->host;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_write_ctxt *wc = fsdata;
@@ -2027,8 +2027,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
 			from = wc->w_target_from;
 			to = wc->w_target_to;
 
-			BUG_ON(from > PAGE_CACHE_SIZE ||
-			       to > PAGE_CACHE_SIZE ||
+			BUG_ON(from > PAGE_SIZE ||
+			       to > PAGE_SIZE ||
 			       to < from);
 		} else {
 			/*
@@ -2037,7 +2037,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
 			 * to flush their entire range.
 			 */
 			from = 0;
-			to = PAGE_CACHE_SIZE;
+			to = PAGE_SIZE;
 		}
 
 		if (page_has_buffers(tmppage)) {
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index bd15929b5f92..1934abb6b680 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -417,13 +417,13 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
 	bio->bi_private = wc;
 	bio->bi_end_io = o2hb_bio_end_io;
 
-	vec_start = (cs << bits) % PAGE_CACHE_SIZE;
+	vec_start = (cs << bits) % PAGE_SIZE;
 	while(cs < max_slots) {
 		current_page = cs / spp;
 		page = reg->hr_slot_data[current_page];
 
-		vec_len = min(PAGE_CACHE_SIZE - vec_start,
-			      (max_slots-cs) * (PAGE_CACHE_SIZE/spp) );
+		vec_len = min(PAGE_SIZE - vec_start,
+			      (max_slots-cs) * (PAGE_SIZE/spp) );
 
 		mlog(ML_HB_BIO, "page %d, vec_len = %u, vec_start = %u\n",
 		     current_page, vec_len, vec_start);
@@ -431,7 +431,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
 		len = bio_add_page(bio, page, vec_len, vec_start);
 		if (len != vec_len) break;
 
-		cs += vec_len / (PAGE_CACHE_SIZE/spp);
+		cs += vec_len / (PAGE_SIZE/spp);
 		vec_start = 0;
 	}
 
@@ -1576,7 +1576,7 @@ static ssize_t o2hb_region_dev_show(struct config_item *item, char *page)
 
 static void o2hb_init_region_params(struct o2hb_region *reg)
 {
-	reg->hr_slots_per_page = PAGE_CACHE_SIZE >> reg->hr_block_bits;
+	reg->hr_slots_per_page = PAGE_SIZE >> reg->hr_block_bits;
 	reg->hr_timeout_ms = O2HB_REGION_TIMEOUT_MS;
 
 	mlog(ML_HEARTBEAT, "hr_start_block = %llu, hr_blocks = %u\n",
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 03768bb3aab1..47b3b2d4e775 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -571,8 +571,8 @@ static int dlmfs_fill_super(struct super_block * sb,
 			    int silent)
 {
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = DLMFS_MAGIC;
 	sb->s_op = &dlmfs_ops;
 	sb->s_root = d_make_root(dlmfs_get_root_inode(sb));
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c18ab45f8d21..5308841756be 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -770,14 +770,14 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
-	unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
+	unsigned long index = abs_from >> PAGE_SHIFT;
 	handle_t *handle;
 	int ret = 0;
 	unsigned zero_from, zero_to, block_start, block_end;
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
 
 	BUG_ON(abs_from >= abs_to);
-	BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
+	BUG_ON(abs_to > (((u64)index + 1) << PAGE_SHIFT));
 	BUG_ON(abs_from & (inode->i_blkbits - 1));
 
 	handle = ocfs2_zero_start_ordered_transaction(inode, di_bh);
@@ -794,10 +794,10 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
 	}
 
 	/* Get the offsets within the page that we want to zero */
-	zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
-	zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
+	zero_from = abs_from & (PAGE_SIZE - 1);
+	zero_to = abs_to & (PAGE_SIZE - 1);
 	if (!zero_to)
-		zero_to = PAGE_CACHE_SIZE;
+		zero_to = PAGE_SIZE;
 
 	trace_ocfs2_write_zero_page(
 			(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -851,7 +851,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
 
 out_unlock:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 out_commit_trans:
 	if (handle)
 		ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
@@ -959,7 +959,7 @@ static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
 	BUG_ON(range_start >= range_end);
 
 	while (zero_pos < range_end) {
-		next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
+		next_pos = (zero_pos & PAGE_MASK) + PAGE_SIZE;
 		if (next_pos > range_end)
 			next_pos = range_end;
 		rc = ocfs2_write_zero_page(inode, zero_pos, next_pos, di_bh);
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 9ea081f4e6e4..71545ad4628c 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -65,13 +65,13 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
 	struct inode *inode = file_inode(file);
 	struct address_space *mapping = inode->i_mapping;
 	loff_t pos = page_offset(page);
-	unsigned int len = PAGE_CACHE_SIZE;
+	unsigned int len = PAGE_SIZE;
 	pgoff_t last_index;
 	struct page *locked_page = NULL;
 	void *fsdata;
 	loff_t size = i_size_read(inode);
 
-	last_index = (size - 1) >> PAGE_CACHE_SHIFT;
+	last_index = (size - 1) >> PAGE_SHIFT;
 
 	/*
 	 * There are cases that lead to the page no longer bebongs to the
@@ -102,7 +102,7 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
 	 * because the "write" would invalidate their data.
 	 */
 	if (page->index == last_index)
-		len = ((size - 1) & ~PAGE_CACHE_MASK) + 1;
+		len = ((size - 1) & ~PAGE_MASK) + 1;
 
 	ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
 				       &locked_page, &fsdata, di_bh, page);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 6cf6538a0651..e63af7ddfe68 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -822,10 +822,10 @@ static inline unsigned int ocfs2_page_index_to_clusters(struct super_block *sb,
 	u32 clusters = pg_index;
 	unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
 
-	if (unlikely(PAGE_CACHE_SHIFT > cbits))
-		clusters = pg_index << (PAGE_CACHE_SHIFT - cbits);
-	else if (PAGE_CACHE_SHIFT < cbits)
-		clusters = pg_index >> (cbits - PAGE_CACHE_SHIFT);
+	if (unlikely(PAGE_SHIFT > cbits))
+		clusters = pg_index << (PAGE_SHIFT - cbits);
+	else if (PAGE_SHIFT < cbits)
+		clusters = pg_index >> (cbits - PAGE_SHIFT);
 
 	return clusters;
 }
@@ -839,10 +839,10 @@ static inline pgoff_t ocfs2_align_clusters_to_page_index(struct super_block *sb,
 	unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
         pgoff_t index = clusters;
 
-	if (PAGE_CACHE_SHIFT > cbits) {
-		index = (pgoff_t)clusters >> (PAGE_CACHE_SHIFT - cbits);
-	} else if (PAGE_CACHE_SHIFT < cbits) {
-		index = (pgoff_t)clusters << (cbits - PAGE_CACHE_SHIFT);
+	if (PAGE_SHIFT > cbits) {
+		index = (pgoff_t)clusters >> (PAGE_SHIFT - cbits);
+	} else if (PAGE_SHIFT < cbits) {
+		index = (pgoff_t)clusters << (cbits - PAGE_SHIFT);
 	}
 
 	return index;
@@ -853,8 +853,8 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb)
 	unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
 	unsigned int pages_per_cluster = 1;
 
-	if (PAGE_CACHE_SHIFT < cbits)
-		pages_per_cluster = 1 << (cbits - PAGE_CACHE_SHIFT);
+	if (PAGE_SHIFT < cbits)
+		pages_per_cluster = 1 << (cbits - PAGE_SHIFT);
 
 	return pages_per_cluster;
 }
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 3eff031aaf26..881242c3a8d2 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2937,16 +2937,16 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
 		end = i_size_read(inode);
 
 	while (offset < end) {
-		page_index = offset >> PAGE_CACHE_SHIFT;
-		map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
+		page_index = offset >> PAGE_SHIFT;
+		map_end = ((loff_t)page_index + 1) << PAGE_SHIFT;
 		if (map_end > end)
 			map_end = end;
 
 		/* from, to is the offset within the page. */
-		from = offset & (PAGE_CACHE_SIZE - 1);
-		to = PAGE_CACHE_SIZE;
-		if (map_end & (PAGE_CACHE_SIZE - 1))
-			to = map_end & (PAGE_CACHE_SIZE - 1);
+		from = offset & (PAGE_SIZE - 1);
+		to = PAGE_SIZE;
+		if (map_end & (PAGE_SIZE - 1))
+			to = map_end & (PAGE_SIZE - 1);
 
 		page = find_or_create_page(mapping, page_index, GFP_NOFS);
 		if (!page) {
@@ -2959,7 +2959,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
 		 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
 		 * can't be dirtied before we CoW it out.
 		 */
-		if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
+		if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize)
 			BUG_ON(PageDirty(page));
 
 		if (!PageUptodate(page)) {
@@ -2987,7 +2987,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
 		mark_page_accessed(page);
 unlock:
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 		offset = map_end;
 		if (ret)
@@ -3165,8 +3165,8 @@ int ocfs2_cow_sync_writeback(struct super_block *sb,
 	}
 
 	while (offset < end) {
-		page_index = offset >> PAGE_CACHE_SHIFT;
-		map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
+		page_index = offset >> PAGE_SHIFT;
+		map_end = ((loff_t)page_index + 1) << PAGE_SHIFT;
 		if (map_end > end)
 			map_end = end;
 
@@ -3182,7 +3182,7 @@ int ocfs2_cow_sync_writeback(struct super_block *sb,
 			mark_page_accessed(page);
 
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 		offset = map_end;
 		if (ret)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7db631e1c8b0..d7cae3327de5 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -605,8 +605,8 @@ static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
 	/*
 	 * We might be limited by page cache size.
 	 */
-	if (bytes > PAGE_CACHE_SIZE) {
-		bytes = PAGE_CACHE_SIZE;
+	if (bytes > PAGE_SIZE) {
+		bytes = PAGE_SIZE;
 		trim = 1;
 		/*
 		 * Shift by 31 here so that we don't get larger than
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 2382e267b49e..0166faabf8f2 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -18,8 +18,8 @@ static int read_one_page(struct page *page)
 	int max_block;
 	ssize_t bytes_read = 0;
 	struct inode *inode = page->mapping->host;
-	const __u32 blocksize = PAGE_CACHE_SIZE;	/* inode->i_blksize */
-	const __u32 blockbits = PAGE_CACHE_SHIFT;	/* inode->i_blkbits */
+	const __u32 blocksize = PAGE_SIZE;	/* inode->i_blksize */
+	const __u32 blockbits = PAGE_SHIFT;	/* inode->i_blkbits */
 	struct iov_iter to;
 	struct bio_vec bv = {.bv_page = page, .bv_len = PAGE_SIZE};
 
@@ -86,7 +86,7 @@ static int orangefs_readpages(struct file *file,
 				"failure adding page to cache, read_one_page returned: %d\n",
 				ret);
 	      } else {
-			page_cache_release(page);
+			put_page(page);
 	      }
 	}
 	BUG_ON(!list_empty(pages));
@@ -328,7 +328,7 @@ static int orangefs_init_iops(struct inode *inode)
 	case S_IFREG:
 		inode->i_op = &orangefs_file_inode_operations;
 		inode->i_fop = &orangefs_file_operations;
-		inode->i_blkbits = PAGE_CACHE_SHIFT;
+		inode->i_blkbits = PAGE_SHIFT;
 		break;
 	case S_IFLNK:
 		inode->i_op = &orangefs_symlink_inode_operations;
@@ -456,7 +456,7 @@ struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir,
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	inode->i_size = PAGE_CACHE_SIZE;
+	inode->i_size = PAGE_SIZE;
 	inode->i_rdev = dev;
 
 	error = insert_inode_locked4(inode, hash, orangefs_test_inode, ref);
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 1f8acc9f9a88..75375e90a63f 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -170,7 +170,7 @@ orangefs_bufmap_unmap(struct orangefs_bufmap *bufmap)
 	int i;
 
 	for (i = 0; i < bufmap->page_count; i++)
-		page_cache_release(bufmap->page_array[i]);
+		put_page(bufmap->page_array[i]);
 }
 
 static void
@@ -299,7 +299,7 @@ orangefs_bufmap_map(struct orangefs_bufmap *bufmap,
 
 		for (i = 0; i < ret; i++) {
 			SetPageError(bufmap->page_array[i]);
-			page_cache_release(bufmap->page_array[i]);
+			put_page(bufmap->page_array[i]);
 		}
 		return -ENOMEM;
 	}
diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c
index 40f5163b56aa..8277aba65e87 100644
--- a/fs/orangefs/orangefs-utils.c
+++ b/fs/orangefs/orangefs-utils.c
@@ -303,7 +303,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int size)
 		}
 		break;
 	case S_IFDIR:
-		inode->i_size = PAGE_CACHE_SIZE;
+		inode->i_size = PAGE_SIZE;
 		orangefs_inode->blksize = (1 << inode->i_blkbits);
 		spin_lock(&inode->i_lock);
 		inode_set_bytes(inode, inode->i_size);
diff --git a/fs/pipe.c b/fs/pipe.c
index ab8dad3ccb6a..0d3f5165cb0b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -134,7 +134,7 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 	if (page_count(page) == 1 && !pipe->tmp_page)
 		pipe->tmp_page = page;
 	else
-		page_cache_release(page);
+		put_page(page);
 }
 
 /**
@@ -180,7 +180,7 @@ EXPORT_SYMBOL(generic_pipe_buf_steal);
  */
 void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
 {
-	page_cache_get(buf->page);
+	get_page(buf->page);
 }
 EXPORT_SYMBOL(generic_pipe_buf_get);
 
@@ -211,7 +211,7 @@ EXPORT_SYMBOL(generic_pipe_buf_confirm);
 void generic_pipe_buf_release(struct pipe_inode_info *pipe,
 			      struct pipe_buffer *buf)
 {
-	page_cache_release(buf->page);
+	put_page(buf->page);
 }
 EXPORT_SYMBOL(generic_pipe_buf_release);
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9df431642042..229cb546bee0 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -553,7 +553,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
 		if (radix_tree_exceptional_entry(page))
 			mss->swap += PAGE_SIZE;
 		else
-			page_cache_release(page);
+			put_page(page);
 
 		return;
 	}
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 55bb57e6a30d..8afe10cf7df8 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -279,12 +279,12 @@ static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (!page)
 		return VM_FAULT_OOM;
 	if (!PageUptodate(page)) {
-		offset = (loff_t) index << PAGE_CACHE_SHIFT;
+		offset = (loff_t) index << PAGE_SHIFT;
 		buf = __va((page_to_pfn(page) << PAGE_SHIFT));
 		rc = __read_vmcore(buf, PAGE_SIZE, &offset, 0);
 		if (rc < 0) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			return (rc == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
 		}
 		SetPageUptodate(page);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index dc645b66cd79..45d6110744cb 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -420,8 +420,8 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent)
 	pstore_sb = sb;
 
 	sb->s_maxbytes		= MAX_LFS_FILESIZE;
-	sb->s_blocksize		= PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits	= PAGE_CACHE_SHIFT;
+	sb->s_blocksize		= PAGE_SIZE;
+	sb->s_blocksize_bits	= PAGE_SHIFT;
 	sb->s_magic		= PSTOREFS_MAGIC;
 	sb->s_op		= &pstore_ops;
 	sb->s_time_gran		= 1;
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index e1f37278cf97..144ceda4948e 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -35,9 +35,9 @@ static struct page *qnx6_get_page(struct inode *dir, unsigned long n)
 static unsigned last_entry(struct inode *inode, unsigned long page_nr)
 {
 	unsigned long last_byte = inode->i_size;
-	last_byte -= page_nr << PAGE_CACHE_SHIFT;
-	if (last_byte > PAGE_CACHE_SIZE)
-		last_byte = PAGE_CACHE_SIZE;
+	last_byte -= page_nr << PAGE_SHIFT;
+	if (last_byte > PAGE_SIZE)
+		last_byte = PAGE_SIZE;
 	return last_byte / QNX6_DIR_ENTRY_SIZE;
 }
 
@@ -47,9 +47,9 @@ static struct qnx6_long_filename *qnx6_longname(struct super_block *sb,
 {
 	struct qnx6_sb_info *sbi = QNX6_SB(sb);
 	u32 s = fs32_to_cpu(sbi, de->de_long_inode); /* in block units */
-	u32 n = s >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); /* in pages */
+	u32 n = s >> (PAGE_SHIFT - sb->s_blocksize_bits); /* in pages */
 	/* within page */
-	u32 offs = (s << sb->s_blocksize_bits) & ~PAGE_CACHE_MASK;
+	u32 offs = (s << sb->s_blocksize_bits) & ~PAGE_MASK;
 	struct address_space *mapping = sbi->longfile->i_mapping;
 	struct page *page = read_mapping_page(mapping, n, NULL);
 	if (IS_ERR(page))
@@ -115,8 +115,8 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
 	struct qnx6_sb_info *sbi = QNX6_SB(s);
 	loff_t pos = ctx->pos & ~(QNX6_DIR_ENTRY_SIZE - 1);
 	unsigned long npages = dir_pages(inode);
-	unsigned long n = pos >> PAGE_CACHE_SHIFT;
-	unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE;
+	unsigned long n = pos >> PAGE_SHIFT;
+	unsigned start = (pos & ~PAGE_MASK) / QNX6_DIR_ENTRY_SIZE;
 	bool done = false;
 
 	ctx->pos = pos;
@@ -131,7 +131,7 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
 
 		if (IS_ERR(page)) {
 			pr_err("%s(): read failed\n", __func__);
-			ctx->pos = (n + 1) << PAGE_CACHE_SHIFT;
+			ctx->pos = (n + 1) << PAGE_SHIFT;
 			return PTR_ERR(page);
 		}
 		de = ((struct qnx6_dir_entry *)page_address(page)) + start;
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 47bb1de07155..1192422a1c56 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -542,8 +542,8 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
 		iget_failed(inode);
 		return ERR_PTR(-EIO);
 	}
-	n = (ino - 1) >> (PAGE_CACHE_SHIFT - QNX6_INODE_SIZE_BITS);
-	offs = (ino - 1) & (~PAGE_CACHE_MASK >> QNX6_INODE_SIZE_BITS);
+	n = (ino - 1) >> (PAGE_SHIFT - QNX6_INODE_SIZE_BITS);
+	offs = (ino - 1) & (~PAGE_MASK >> QNX6_INODE_SIZE_BITS);
 	mapping = sbi->inodes->i_mapping;
 	page = read_mapping_page(mapping, n, NULL);
 	if (IS_ERR(page)) {
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
index d3fb2b698800..f23b5c4a66ad 100644
--- a/fs/qnx6/qnx6.h
+++ b/fs/qnx6/qnx6.h
@@ -128,7 +128,7 @@ extern struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s,
 static inline void qnx6_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 extern unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 38981b037524..1ab6e6c2e60e 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -223,8 +223,8 @@ int ramfs_fill_super(struct super_block *sb, void *data, int silent)
 		return err;
 
 	sb->s_maxbytes		= MAX_LFS_FILESIZE;
-	sb->s_blocksize		= PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits	= PAGE_CACHE_SHIFT;
+	sb->s_blocksize		= PAGE_SIZE;
+	sb->s_blocksize_bits	= PAGE_SHIFT;
 	sb->s_magic		= RAMFS_MAGIC;
 	sb->s_op		= &ramfs_ops;
 	sb->s_time_gran		= 1;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9424a4ba93a9..389773711de4 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -180,11 +180,11 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
 	int partial = 0;
 	unsigned blocksize;
 	struct buffer_head *bh, *head;
-	unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	unsigned long i_size_index = inode->i_size >> PAGE_SHIFT;
 	int new;
 	int logit = reiserfs_file_data_log(inode);
 	struct super_block *s = inode->i_sb;
-	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
+	int bh_per_page = PAGE_SIZE / s->s_blocksize;
 	struct reiserfs_transaction_handle th;
 	int ret = 0;
 
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ae9e5b308cf9..d5c2e9c865de 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -386,7 +386,7 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
 		goto finished;
 	}
 	/* read file tail into part of page */
-	offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
+	offset = (cpu_key_k_offset(&key) - 1) & (PAGE_SIZE - 1);
 	copy_item_head(&tmp_ih, ih);
 
 	/*
@@ -587,10 +587,10 @@ static int convert_tail_for_hole(struct inode *inode,
 		return -EIO;
 
 	/* always try to read until the end of the block */
-	tail_start = tail_offset & (PAGE_CACHE_SIZE - 1);
+	tail_start = tail_offset & (PAGE_SIZE - 1);
 	tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
 
-	index = tail_offset >> PAGE_CACHE_SHIFT;
+	index = tail_offset >> PAGE_SHIFT;
 	/*
 	 * hole_page can be zero in case of direct_io, we are sure
 	 * that we cannot get here if we write with O_DIRECT into tail page
@@ -629,7 +629,7 @@ static int convert_tail_for_hole(struct inode *inode,
 unlock:
 	if (tail_page != hole_page) {
 		unlock_page(tail_page);
-		page_cache_release(tail_page);
+		put_page(tail_page);
 	}
 out:
 	return retval;
@@ -2189,11 +2189,11 @@ static int grab_tail_page(struct inode *inode,
 	 * we want the page with the last byte in the file,
 	 * not the page that will hold the next byte for appending
 	 */
-	unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
+	unsigned long index = (inode->i_size - 1) >> PAGE_SHIFT;
 	unsigned long pos = 0;
 	unsigned long start = 0;
 	unsigned long blocksize = inode->i_sb->s_blocksize;
-	unsigned long offset = (inode->i_size) & (PAGE_CACHE_SIZE - 1);
+	unsigned long offset = (inode->i_size) & (PAGE_SIZE - 1);
 	struct buffer_head *bh;
 	struct buffer_head *head;
 	struct page *page;
@@ -2251,7 +2251,7 @@ out:
 
 unlock:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return error;
 }
 
@@ -2265,7 +2265,7 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
 {
 	struct reiserfs_transaction_handle th;
 	/* we want the offset for the first byte after the end of the file */
-	unsigned long offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+	unsigned long offset = inode->i_size & (PAGE_SIZE - 1);
 	unsigned blocksize = inode->i_sb->s_blocksize;
 	unsigned length;
 	struct page *page = NULL;
@@ -2345,7 +2345,7 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
 			}
 		}
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	reiserfs_write_unlock(inode->i_sb);
@@ -2354,7 +2354,7 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
 out:
 	if (page) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	reiserfs_write_unlock(inode->i_sb);
@@ -2426,7 +2426,7 @@ research:
 	} else if (is_direct_le_ih(ih)) {
 		char *p;
 		p = page_address(bh_result->b_page);
-		p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1);
+		p += (byte_offset - 1) & (PAGE_SIZE - 1);
 		copy_size = ih_item_len(ih) - pos_in_item;
 
 		fs_gen = get_generation(inode->i_sb);
@@ -2525,7 +2525,7 @@ static int reiserfs_write_full_page(struct page *page,
 				    struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
-	unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	unsigned long end_index = inode->i_size >> PAGE_SHIFT;
 	int error = 0;
 	unsigned long block;
 	sector_t last_block;
@@ -2535,7 +2535,7 @@ static int reiserfs_write_full_page(struct page *page,
 	int checked = PageChecked(page);
 	struct reiserfs_transaction_handle th;
 	struct super_block *s = inode->i_sb;
-	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
+	int bh_per_page = PAGE_SIZE / s->s_blocksize;
 	th.t_trans_id = 0;
 
 	/* no logging allowed when nonblocking or from PF_MEMALLOC */
@@ -2564,16 +2564,16 @@ static int reiserfs_write_full_page(struct page *page,
 	if (page->index >= end_index) {
 		unsigned last_offset;
 
-		last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+		last_offset = inode->i_size & (PAGE_SIZE - 1);
 		/* no file contents in this page */
 		if (page->index >= end_index + 1 || !last_offset) {
 			unlock_page(page);
 			return 0;
 		}
-		zero_user_segment(page, last_offset, PAGE_CACHE_SIZE);
+		zero_user_segment(page, last_offset, PAGE_SIZE);
 	}
 	bh = head;
-	block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
+	block = page->index << (PAGE_SHIFT - s->s_blocksize_bits);
 	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
 	/* first map all the buffers, logging any direct items we find */
 	do {
@@ -2774,7 +2774,7 @@ static int reiserfs_write_begin(struct file *file,
 		*fsdata = (void *)(unsigned long)flags;
 	}
 
-	index = pos >> PAGE_CACHE_SHIFT;
+	index = pos >> PAGE_SHIFT;
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
@@ -2822,7 +2822,7 @@ static int reiserfs_write_begin(struct file *file,
 	}
 	if (ret) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		/* Truncate allocated blocks */
 		reiserfs_truncate_failed_write(inode);
 	}
@@ -2909,7 +2909,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
 	else
 		th = NULL;
 
-	start = pos & (PAGE_CACHE_SIZE - 1);
+	start = pos & (PAGE_SIZE - 1);
 	if (unlikely(copied < len)) {
 		if (!PageUptodate(page))
 			copied = 0;
@@ -2974,7 +2974,7 @@ out:
 	if (locked)
 		reiserfs_write_unlock(inode->i_sb);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (pos + len > inode->i_size)
 		reiserfs_truncate_failed_write(inode);
@@ -2996,7 +2996,7 @@ int reiserfs_commit_write(struct file *f, struct page *page,
 			  unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
-	loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
+	loff_t pos = ((loff_t) page->index << PAGE_SHIFT) + to;
 	int ret = 0;
 	int update_sd = 0;
 	struct reiserfs_transaction_handle *th = NULL;
@@ -3181,7 +3181,7 @@ static void reiserfs_invalidatepage(struct page *page, unsigned int offset,
 	struct inode *inode = page->mapping->host;
 	unsigned int curr_off = 0;
 	unsigned int stop = offset + length;
-	int partial_page = (offset || length < PAGE_CACHE_SIZE);
+	int partial_page = (offset || length < PAGE_SIZE);
 	int ret = 1;
 
 	BUG_ON(!PageLocked(page));
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 036a1fc0a8c3..57045f423893 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -203,7 +203,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
 	 * __reiserfs_write_begin on that page.  This will force a
 	 * reiserfs_get_block to unpack the tail for us.
 	 */
-	index = inode->i_size >> PAGE_CACHE_SHIFT;
+	index = inode->i_size >> PAGE_SHIFT;
 	mapping = inode->i_mapping;
 	page = grab_cache_page(mapping, index);
 	retval = -ENOMEM;
@@ -221,7 +221,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
 
 out_unlock:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 out:
 	inode_unlock(inode);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 44c2bdced1c8..8c6487238bb3 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -605,12 +605,12 @@ static void release_buffer_page(struct buffer_head *bh)
 {
 	struct page *page = bh->b_page;
 	if (!page->mapping && trylock_page(page)) {
-		page_cache_get(page);
+		get_page(page);
 		put_bh(bh);
 		if (!page->mapping)
 			try_to_free_buffers(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	} else {
 		put_bh(bh);
 	}
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 24cbe013240f..5feacd689241 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1342,7 +1342,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
 		 */
 
 		data = kmap_atomic(un_bh->b_page);
-		off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
+		off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1));
 		memcpy(data + off,
 		       ih_item_body(PATH_PLAST_BUFFER(path), &s_ih),
 		       ret_value);
@@ -1511,7 +1511,7 @@ static void unmap_buffers(struct page *page, loff_t pos)
 
 	if (page) {
 		if (page_has_buffers(page)) {
-			tail_index = pos & (PAGE_CACHE_SIZE - 1);
+			tail_index = pos & (PAGE_SIZE - 1);
 			cur_index = 0;
 			head = page_buffers(page);
 			bh = head;
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index f41e19b4bb42..2d5489b0a269 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -151,7 +151,7 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
 	 */
 	if (up_to_date_bh) {
 		unsigned pgoff =
-		    (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1);
+		    (tail_offset + total_tail - 1) & (PAGE_SIZE - 1);
 		char *kaddr = kmap_atomic(up_to_date_bh->b_page);
 		memset(kaddr + pgoff, 0, blk_size - total_tail);
 		kunmap_atomic(kaddr);
@@ -271,7 +271,7 @@ int indirect2direct(struct reiserfs_transaction_handle *th,
 	 * the page was locked and this part of the page was up to date when
 	 * indirect2direct was called, so we know the bytes are still valid
 	 */
-	tail = tail + (pos & (PAGE_CACHE_SIZE - 1));
+	tail = tail + (pos & (PAGE_SIZE - 1));
 
 	PATH_LAST_POSITION(path)++;
 
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 57e0b2310532..28f5f8b11370 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -415,7 +415,7 @@ out:
 static inline void reiserfs_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 static struct page *reiserfs_get_page(struct inode *dir, size_t n)
@@ -427,7 +427,7 @@ static struct page *reiserfs_get_page(struct inode *dir, size_t n)
 	 * and an unlink/rmdir has just occurred - GFP_NOFS avoids this
 	 */
 	mapping_set_gfp_mask(mapping, GFP_NOFS);
-	page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL);
+	page = read_mapping_page(mapping, n >> PAGE_SHIFT, NULL);
 	if (!IS_ERR(page)) {
 		kmap(page);
 		if (PageError(page))
@@ -526,10 +526,10 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
 	while (buffer_pos < buffer_size || buffer_pos == 0) {
 		size_t chunk;
 		size_t skip = 0;
-		size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
+		size_t page_offset = (file_pos & (PAGE_SIZE - 1));
 
-		if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
-			chunk = PAGE_CACHE_SIZE;
+		if (buffer_size - buffer_pos > PAGE_SIZE)
+			chunk = PAGE_SIZE;
 		else
 			chunk = buffer_size - buffer_pos;
 
@@ -546,8 +546,8 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
 			struct reiserfs_xattr_header *rxh;
 
 			skip = file_pos = sizeof(struct reiserfs_xattr_header);
-			if (chunk + skip > PAGE_CACHE_SIZE)
-				chunk = PAGE_CACHE_SIZE - skip;
+			if (chunk + skip > PAGE_SIZE)
+				chunk = PAGE_SIZE - skip;
 			rxh = (struct reiserfs_xattr_header *)data;
 			rxh->h_magic = cpu_to_le32(REISERFS_XATTR_MAGIC);
 			rxh->h_hash = cpu_to_le32(xahash);
@@ -675,8 +675,8 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
 		char *data;
 		size_t skip = 0;
 
-		if (isize - file_pos > PAGE_CACHE_SIZE)
-			chunk = PAGE_CACHE_SIZE;
+		if (isize - file_pos > PAGE_SIZE)
+			chunk = PAGE_SIZE;
 		else
 			chunk = isize - file_pos;
 
diff --git a/fs/splice.c b/fs/splice.c
index 9947b5c69664..b018eb485019 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -88,7 +88,7 @@ out_unlock:
 static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
 					struct pipe_buffer *buf)
 {
-	page_cache_release(buf->page);
+	put_page(buf->page);
 	buf->flags &= ~PIPE_BUF_FLAG_LRU;
 }
 
@@ -268,7 +268,7 @@ EXPORT_SYMBOL_GPL(splice_to_pipe);
 
 void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
 {
-	page_cache_release(spd->pages[i]);
+	put_page(spd->pages[i]);
 }
 
 /*
@@ -328,9 +328,9 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 	if (splice_grow_spd(pipe, &spd))
 		return -ENOMEM;
 
-	index = *ppos >> PAGE_CACHE_SHIFT;
-	loff = *ppos & ~PAGE_CACHE_MASK;
-	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	index = *ppos >> PAGE_SHIFT;
+	loff = *ppos & ~PAGE_MASK;
+	req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	nr_pages = min(req_pages, spd.nr_pages_max);
 
 	/*
@@ -365,7 +365,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 			error = add_to_page_cache_lru(page, mapping, index,
 				   mapping_gfp_constraint(mapping, GFP_KERNEL));
 			if (unlikely(error)) {
-				page_cache_release(page);
+				put_page(page);
 				if (error == -EEXIST)
 					continue;
 				break;
@@ -385,7 +385,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 	 * Now loop over the map and see if we need to start IO on any
 	 * pages, fill in the partial map, etc.
 	 */
-	index = *ppos >> PAGE_CACHE_SHIFT;
+	index = *ppos >> PAGE_SHIFT;
 	nr_pages = spd.nr_pages;
 	spd.nr_pages = 0;
 	for (page_nr = 0; page_nr < nr_pages; page_nr++) {
@@ -397,7 +397,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 		/*
 		 * this_len is the max we'll use from this page
 		 */
-		this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
+		this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
 		page = spd.pages[page_nr];
 
 		if (PageReadahead(page))
@@ -426,7 +426,7 @@ retry_lookup:
 					error = -ENOMEM;
 					break;
 				}
-				page_cache_release(spd.pages[page_nr]);
+				put_page(spd.pages[page_nr]);
 				spd.pages[page_nr] = page;
 			}
 			/*
@@ -456,7 +456,7 @@ fill_it:
 		 * i_size must be checked after PageUptodate.
 		 */
 		isize = i_size_read(mapping->host);
-		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+		end_index = (isize - 1) >> PAGE_SHIFT;
 		if (unlikely(!isize || index > end_index))
 			break;
 
@@ -470,7 +470,7 @@ fill_it:
 			/*
 			 * max good bytes in this page
 			 */
-			plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+			plen = ((isize - 1) & ~PAGE_MASK) + 1;
 			if (plen <= loff)
 				break;
 
@@ -494,8 +494,8 @@ fill_it:
 	 * we got, 'nr_pages' is how many pages are in the map.
 	 */
 	while (page_nr < nr_pages)
-		page_cache_release(spd.pages[page_nr++]);
-	in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
+		put_page(spd.pages[page_nr++]);
+	in->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
 
 	if (spd.nr_pages)
 		error = splice_to_pipe(pipe, &spd);
@@ -636,8 +636,8 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 			goto shrink_ret;
 	}
 
-	offset = *ppos & ~PAGE_CACHE_MASK;
-	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	offset = *ppos & ~PAGE_MASK;
+	nr_pages = (len + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
 	for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
 		struct page *page;
@@ -647,7 +647,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 		if (!page)
 			goto err;
 
-		this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
+		this_len = min_t(size_t, len, PAGE_SIZE - offset);
 		vec[i].iov_base = (void __user *) page_address(page);
 		vec[i].iov_len = this_len;
 		spd.pages[i] = page;
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 0cea9b9236d0..2c2618410d51 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -181,11 +181,11 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
 			in = min(bytes, msblk->devblksize - offset);
 			bytes -= in;
 			while (in) {
-				if (pg_offset == PAGE_CACHE_SIZE) {
+				if (pg_offset == PAGE_SIZE) {
 					data = squashfs_next_page(output);
 					pg_offset = 0;
 				}
-				avail = min_t(int, in, PAGE_CACHE_SIZE -
+				avail = min_t(int, in, PAGE_SIZE -
 						pg_offset);
 				memcpy(data + pg_offset, bh[k]->b_data + offset,
 						avail);
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 1cb70a0b2168..27e501af0e8b 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -255,7 +255,7 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
 	cache->unused = entries;
 	cache->entries = entries;
 	cache->block_size = block_size;
-	cache->pages = block_size >> PAGE_CACHE_SHIFT;
+	cache->pages = block_size >> PAGE_SHIFT;
 	cache->pages = cache->pages ? cache->pages : 1;
 	cache->name = name;
 	cache->num_waiters = 0;
@@ -275,7 +275,7 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
 		}
 
 		for (j = 0; j < cache->pages; j++) {
-			entry->data[j] = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+			entry->data[j] = kmalloc(PAGE_SIZE, GFP_KERNEL);
 			if (entry->data[j] == NULL) {
 				ERROR("Failed to allocate %s buffer\n", name);
 				goto cleanup;
@@ -314,10 +314,10 @@ int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry,
 		return min(length, entry->length - offset);
 
 	while (offset < entry->length) {
-		void *buff = entry->data[offset / PAGE_CACHE_SIZE]
-				+ (offset % PAGE_CACHE_SIZE);
+		void *buff = entry->data[offset / PAGE_SIZE]
+				+ (offset % PAGE_SIZE);
 		int bytes = min_t(int, entry->length - offset,
-				PAGE_CACHE_SIZE - (offset % PAGE_CACHE_SIZE));
+				PAGE_SIZE - (offset % PAGE_SIZE));
 
 		if (bytes >= remaining) {
 			memcpy(buffer, buff, remaining);
@@ -415,7 +415,7 @@ struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb,
  */
 void *squashfs_read_table(struct super_block *sb, u64 block, int length)
 {
-	int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	int pages = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	int i, res;
 	void *table, *buffer, **data;
 	struct squashfs_page_actor *actor;
@@ -436,7 +436,7 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
 		goto failed2;
 	}
 
-	for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
+	for (i = 0; i < pages; i++, buffer += PAGE_SIZE)
 		data[i] = buffer;
 
 	res = squashfs_read_data(sb, block, length |
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index e9034bf6e5ae..d2bc13636f79 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -102,7 +102,7 @@ static void *get_comp_opts(struct super_block *sb, unsigned short flags)
 	 * Read decompressor specific options from file system if present
 	 */
 	if (SQUASHFS_COMP_OPTS(flags)) {
-		buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+		buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
 		if (buffer == NULL) {
 			comp_opts = ERR_PTR(-ENOMEM);
 			goto out;
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index e5c9689062ba..437de9e89221 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -175,7 +175,7 @@ static long long read_indexes(struct super_block *sb, int n,
 {
 	int err, i;
 	long long block = 0;
-	__le32 *blist = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+	__le32 *blist = kmalloc(PAGE_SIZE, GFP_KERNEL);
 
 	if (blist == NULL) {
 		ERROR("read_indexes: Failed to allocate block_list\n");
@@ -183,7 +183,7 @@ static long long read_indexes(struct super_block *sb, int n,
 	}
 
 	while (n) {
-		int blocks = min_t(int, n, PAGE_CACHE_SIZE >> 2);
+		int blocks = min_t(int, n, PAGE_SIZE >> 2);
 
 		err = squashfs_read_metadata(sb, blist, start_block,
 				offset, blocks << 2);
@@ -377,7 +377,7 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
 	struct inode *inode = page->mapping->host;
 	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
 	void *pageaddr;
-	int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+	int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
 	int start_index = page->index & ~mask, end_index = start_index | mask;
 
 	/*
@@ -387,9 +387,9 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
 	 * been called to fill.
 	 */
 	for (i = start_index; i <= end_index && bytes > 0; i++,
-			bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+			bytes -= PAGE_SIZE, offset += PAGE_SIZE) {
 		struct page *push_page;
-		int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0;
+		int avail = buffer ? min_t(int, bytes, PAGE_SIZE) : 0;
 
 		TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
 
@@ -404,14 +404,14 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
 
 		pageaddr = kmap_atomic(push_page);
 		squashfs_copy_data(pageaddr, buffer, offset, avail);
-		memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+		memset(pageaddr + avail, 0, PAGE_SIZE - avail);
 		kunmap_atomic(pageaddr);
 		flush_dcache_page(push_page);
 		SetPageUptodate(push_page);
 skip_page:
 		unlock_page(push_page);
 		if (i != page->index)
-			page_cache_release(push_page);
+			put_page(push_page);
 	}
 }
 
@@ -454,7 +454,7 @@ static int squashfs_readpage(struct file *file, struct page *page)
 {
 	struct inode *inode = page->mapping->host;
 	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-	int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+	int index = page->index >> (msblk->block_log - PAGE_SHIFT);
 	int file_end = i_size_read(inode) >> msblk->block_log;
 	int res;
 	void *pageaddr;
@@ -462,8 +462,8 @@ static int squashfs_readpage(struct file *file, struct page *page)
 	TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
 				page->index, squashfs_i(inode)->start);
 
-	if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
-					PAGE_CACHE_SHIFT))
+	if (page->index >= ((i_size_read(inode) + PAGE_SIZE - 1) >>
+					PAGE_SHIFT))
 		goto out;
 
 	if (index < file_end || squashfs_i(inode)->fragment_block ==
@@ -487,7 +487,7 @@ error_out:
 	SetPageError(page);
 out:
 	pageaddr = kmap_atomic(page);
-	memset(pageaddr, 0, PAGE_CACHE_SIZE);
+	memset(pageaddr, 0, PAGE_SIZE);
 	kunmap_atomic(pageaddr);
 	flush_dcache_page(page);
 	if (!PageError(page))
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 43e7a7eddac0..cb485d8e0e91 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -30,8 +30,8 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
 	struct inode *inode = target_page->mapping->host;
 	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
 
-	int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
-	int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+	int file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+	int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
 	int start_index = target_page->index & ~mask;
 	int end_index = start_index | mask;
 	int i, n, pages, missing_pages, bytes, res = -ENOMEM;
@@ -68,7 +68,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
 
 		if (PageUptodate(page[i])) {
 			unlock_page(page[i]);
-			page_cache_release(page[i]);
+			put_page(page[i]);
 			page[i] = NULL;
 			missing_pages++;
 		}
@@ -96,10 +96,10 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
 		goto mark_errored;
 
 	/* Last page may have trailing bytes not filled */
-	bytes = res % PAGE_CACHE_SIZE;
+	bytes = res % PAGE_SIZE;
 	if (bytes) {
 		pageaddr = kmap_atomic(page[pages - 1]);
-		memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
+		memset(pageaddr + bytes, 0, PAGE_SIZE - bytes);
 		kunmap_atomic(pageaddr);
 	}
 
@@ -109,7 +109,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
 		SetPageUptodate(page[i]);
 		unlock_page(page[i]);
 		if (page[i] != target_page)
-			page_cache_release(page[i]);
+			put_page(page[i]);
 	}
 
 	kfree(actor);
@@ -127,7 +127,7 @@ mark_errored:
 		flush_dcache_page(page[i]);
 		SetPageError(page[i]);
 		unlock_page(page[i]);
-		page_cache_release(page[i]);
+		put_page(page[i]);
 	}
 
 out:
@@ -153,21 +153,21 @@ static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
 	}
 
 	for (n = 0; n < pages && bytes > 0; n++,
-			bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
-		int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+			bytes -= PAGE_SIZE, offset += PAGE_SIZE) {
+		int avail = min_t(int, bytes, PAGE_SIZE);
 
 		if (page[n] == NULL)
 			continue;
 
 		pageaddr = kmap_atomic(page[n]);
 		squashfs_copy_data(pageaddr, buffer, offset, avail);
-		memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+		memset(pageaddr + avail, 0, PAGE_SIZE - avail);
 		kunmap_atomic(pageaddr);
 		flush_dcache_page(page[n]);
 		SetPageUptodate(page[n]);
 		unlock_page(page[n]);
 		if (page[n] != target_page)
-			page_cache_release(page[n]);
+			put_page(page[n]);
 	}
 
 out:
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index c31e2bc9c081..ff4468bd18b0 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -117,13 +117,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	data = squashfs_first_page(output);
 	buff = stream->output;
 	while (data) {
-		if (bytes <= PAGE_CACHE_SIZE) {
+		if (bytes <= PAGE_SIZE) {
 			memcpy(data, buff, bytes);
 			break;
 		}
-		memcpy(data, buff, PAGE_CACHE_SIZE);
-		buff += PAGE_CACHE_SIZE;
-		bytes -= PAGE_CACHE_SIZE;
+		memcpy(data, buff, PAGE_SIZE);
+		buff += PAGE_SIZE;
+		bytes -= PAGE_SIZE;
 		data = squashfs_next_page(output);
 	}
 	squashfs_finish_page(output);
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 244b9fbfff7b..934c17e96590 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -102,13 +102,13 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	data = squashfs_first_page(output);
 	buff = stream->output;
 	while (data) {
-		if (bytes <= PAGE_CACHE_SIZE) {
+		if (bytes <= PAGE_SIZE) {
 			memcpy(data, buff, bytes);
 			break;
 		} else {
-			memcpy(data, buff, PAGE_CACHE_SIZE);
-			buff += PAGE_CACHE_SIZE;
-			bytes -= PAGE_CACHE_SIZE;
+			memcpy(data, buff, PAGE_SIZE);
+			buff += PAGE_SIZE;
+			bytes -= PAGE_SIZE;
 			data = squashfs_next_page(output);
 		}
 	}
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
index 5a1c11f56441..9b7b1b6a7892 100644
--- a/fs/squashfs/page_actor.c
+++ b/fs/squashfs/page_actor.c
@@ -48,7 +48,7 @@ struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
 	if (actor == NULL)
 		return NULL;
 
-	actor->length = length ? : pages * PAGE_CACHE_SIZE;
+	actor->length = length ? : pages * PAGE_SIZE;
 	actor->buffer = buffer;
 	actor->pages = pages;
 	actor->next_page = 0;
@@ -88,7 +88,7 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page,
 	if (actor == NULL)
 		return NULL;
 
-	actor->length = length ? : pages * PAGE_CACHE_SIZE;
+	actor->length = length ? : pages * PAGE_SIZE;
 	actor->page = page;
 	actor->pages = pages;
 	actor->next_page = 0;
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index 26dd82008b82..98537eab27e2 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -24,7 +24,7 @@ static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
 	if (actor == NULL)
 		return NULL;
 
-	actor->length = length ? : pages * PAGE_CACHE_SIZE;
+	actor->length = length ? : pages * PAGE_SIZE;
 	actor->page = page;
 	actor->pages = pages;
 	actor->next_page = 0;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 5e79bfa4f260..cf01e15a7b16 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -152,7 +152,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
 	 * Check the system page size is not larger than the filesystem
 	 * block size (by default 128K).  This is currently not supported.
 	 */
-	if (PAGE_CACHE_SIZE > msblk->block_size) {
+	if (PAGE_SIZE > msblk->block_size) {
 		ERROR("Page size > filesystem block size (%d).  This is "
 			"currently not supported!\n", msblk->block_size);
 		goto failed_mount;
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index dbcc2f54bad4..d688ef42a6a1 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c
@@ -48,10 +48,10 @@ static int squashfs_symlink_readpage(struct file *file, struct page *page)
 	struct inode *inode = page->mapping->host;
 	struct super_block *sb = inode->i_sb;
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	int index = page->index << PAGE_CACHE_SHIFT;
+	int index = page->index << PAGE_SHIFT;
 	u64 block = squashfs_i(inode)->start;
 	int offset = squashfs_i(inode)->offset;
-	int length = min_t(int, i_size_read(inode) - index, PAGE_CACHE_SIZE);
+	int length = min_t(int, i_size_read(inode) - index, PAGE_SIZE);
 	int bytes, copied;
 	void *pageaddr;
 	struct squashfs_cache_entry *entry;
@@ -94,7 +94,7 @@ static int squashfs_symlink_readpage(struct file *file, struct page *page)
 		copied = squashfs_copy_data(pageaddr + bytes, entry, offset,
 								length - bytes);
 		if (copied == length - bytes)
-			memset(pageaddr + length, 0, PAGE_CACHE_SIZE - length);
+			memset(pageaddr + length, 0, PAGE_SIZE - length);
 		else
 			block = entry->next_index;
 		kunmap_atomic(pageaddr);
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index c609624e4b8a..6bfaef73d065 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -141,7 +141,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	stream->buf.in_pos = 0;
 	stream->buf.in_size = 0;
 	stream->buf.out_pos = 0;
-	stream->buf.out_size = PAGE_CACHE_SIZE;
+	stream->buf.out_size = PAGE_SIZE;
 	stream->buf.out = squashfs_first_page(output);
 
 	do {
@@ -158,7 +158,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
 			stream->buf.out = squashfs_next_page(output);
 			if (stream->buf.out != NULL) {
 				stream->buf.out_pos = 0;
-				total += PAGE_CACHE_SIZE;
+				total += PAGE_SIZE;
 			}
 		}
 
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 8727caba6882..2ec24d128bce 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -69,7 +69,7 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	int zlib_err, zlib_init = 0, k = 0;
 	z_stream *stream = strm;
 
-	stream->avail_out = PAGE_CACHE_SIZE;
+	stream->avail_out = PAGE_SIZE;
 	stream->next_out = squashfs_first_page(output);
 	stream->avail_in = 0;
 
@@ -85,7 +85,7 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		if (stream->avail_out == 0) {
 			stream->next_out = squashfs_next_page(output);
 			if (stream->next_out != NULL)
-				stream->avail_out = PAGE_CACHE_SIZE;
+				stream->avail_out = PAGE_SIZE;
 		}
 
 		if (!zlib_init) {
diff --git a/fs/sync.c b/fs/sync.c
index dd5d1711c7ac..2a54c1f22035 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -302,7 +302,7 @@ SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
 		goto out;
 
 	if (sizeof(pgoff_t) == 4) {
-		if (offset >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
+		if (offset >= (0x100000000ULL << PAGE_SHIFT)) {
 			/*
 			 * The range starts outside a 32 bit machine's
 			 * pagecache addressing capabilities.  Let it "succeed"
@@ -310,7 +310,7 @@ SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
 			ret = 0;
 			goto out;
 		}
-		if (endbyte >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
+		if (endbyte >= (0x100000000ULL << PAGE_SHIFT)) {
 			/*
 			 * Out to EOF
 			 */
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 63c1bcb224ee..c0f0a3e643eb 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -30,7 +30,7 @@ const struct file_operations sysv_dir_operations = {
 static inline void dir_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
@@ -73,8 +73,8 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx)
 	if (pos >= inode->i_size)
 		return 0;
 
-	offset = pos & ~PAGE_CACHE_MASK;
-	n = pos >> PAGE_CACHE_SHIFT;
+	offset = pos & ~PAGE_MASK;
+	n = pos >> PAGE_SHIFT;
 
 	for ( ; n < npages; n++, offset = 0) {
 		char *kaddr, *limit;
@@ -85,7 +85,7 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx)
 			continue;
 		kaddr = (char *)page_address(page);
 		de = (struct sysv_dir_entry *)(kaddr+offset);
-		limit = kaddr + PAGE_CACHE_SIZE - SYSV_DIRSIZE;
+		limit = kaddr + PAGE_SIZE - SYSV_DIRSIZE;
 		for ( ;(char*)de <= limit; de++, ctx->pos += sizeof(*de)) {
 			char *name = de->name;
 
@@ -146,7 +146,7 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
 		if (!IS_ERR(page)) {
 			kaddr = (char*)page_address(page);
 			de = (struct sysv_dir_entry *) kaddr;
-			kaddr += PAGE_CACHE_SIZE - SYSV_DIRSIZE;
+			kaddr += PAGE_SIZE - SYSV_DIRSIZE;
 			for ( ; (char *) de <= kaddr ; de++) {
 				if (!de->inode)
 					continue;
@@ -190,7 +190,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
 			goto out;
 		kaddr = (char*)page_address(page);
 		de = (struct sysv_dir_entry *)kaddr;
-		kaddr += PAGE_CACHE_SIZE - SYSV_DIRSIZE;
+		kaddr += PAGE_SIZE - SYSV_DIRSIZE;
 		while ((char *)de <= kaddr) {
 			if (!de->inode)
 				goto got_it;
@@ -261,7 +261,7 @@ int sysv_make_empty(struct inode *inode, struct inode *dir)
 	kmap(page);
 
 	base = (char*)page_address(page);
-	memset(base, 0, PAGE_CACHE_SIZE);
+	memset(base, 0, PAGE_SIZE);
 
 	de = (struct sysv_dir_entry *) base;
 	de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
@@ -273,7 +273,7 @@ int sysv_make_empty(struct inode *inode, struct inode *dir)
 	kunmap(page);
 	err = dir_commit_chunk(page, 0, 2 * SYSV_DIRSIZE);
 fail:
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 
@@ -296,7 +296,7 @@ int sysv_empty_dir(struct inode * inode)
 
 		kaddr = (char *)page_address(page);
 		de = (struct sysv_dir_entry *)kaddr;
-		kaddr += PAGE_CACHE_SIZE-SYSV_DIRSIZE;
+		kaddr += PAGE_SIZE-SYSV_DIRSIZE;
 
 		for ( ;(char *)de <= kaddr; de++) {
 			if (!de->inode)
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 11e83ed0b4bf..90b60c03b588 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -264,11 +264,11 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
 out_dir:
 	if (dir_de) {
 		kunmap(dir_page);
-		page_cache_release(dir_page);
+		put_page(dir_page);
 	}
 out_old:
 	kunmap(old_page);
-	page_cache_release(old_page);
+	put_page(old_page);
 out:
 	return err;
 }
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 065c88f8e4b8..1a9c6640e604 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -121,7 +121,7 @@ static int do_readpage(struct page *page)
 	if (block >= beyond) {
 		/* Reading beyond inode */
 		SetPageChecked(page);
-		memset(addr, 0, PAGE_CACHE_SIZE);
+		memset(addr, 0, PAGE_SIZE);
 		goto out;
 	}
 
@@ -223,7 +223,7 @@ static int write_begin_slow(struct address_space *mapping,
 {
 	struct inode *inode = mapping->host;
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	struct ubifs_budget_req req = { .new_page = 1 };
 	int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
 	struct page *page;
@@ -254,13 +254,13 @@ static int write_begin_slow(struct address_space *mapping,
 	}
 
 	if (!PageUptodate(page)) {
-		if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+		if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE)
 			SetPageChecked(page);
 		else {
 			err = do_readpage(page);
 			if (err) {
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 				ubifs_release_budget(c, &req);
 				return err;
 			}
@@ -428,7 +428,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
 	struct inode *inode = mapping->host;
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
 	struct ubifs_inode *ui = ubifs_inode(inode);
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
 	int skipped_read = 0;
 	struct page *page;
@@ -446,7 +446,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
 
 	if (!PageUptodate(page)) {
 		/* The page is not loaded from the flash */
-		if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) {
+		if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) {
 			/*
 			 * We change whole page so no need to load it. But we
 			 * do not know whether this page exists on the media or
@@ -462,7 +462,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
 			err = do_readpage(page);
 			if (err) {
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 				return err;
 			}
 		}
@@ -494,7 +494,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
 			mutex_unlock(&ui->ui_mutex);
 		}
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
 		return write_begin_slow(mapping, pos, len, pagep, flags);
 	}
@@ -549,7 +549,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
 	dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld",
 		inode->i_ino, pos, page->index, len, copied, inode->i_size);
 
-	if (unlikely(copied < len && len == PAGE_CACHE_SIZE)) {
+	if (unlikely(copied < len && len == PAGE_SIZE)) {
 		/*
 		 * VFS copied less data to the page that it intended and
 		 * declared in its '->write_begin()' call via the @len
@@ -593,7 +593,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
 
 out:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return copied;
 }
 
@@ -621,10 +621,10 @@ static int populate_page(struct ubifs_info *c, struct page *page,
 
 	addr = zaddr = kmap(page);
 
-	end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+	end_index = (i_size - 1) >> PAGE_SHIFT;
 	if (!i_size || page->index > end_index) {
 		hole = 1;
-		memset(addr, 0, PAGE_CACHE_SIZE);
+		memset(addr, 0, PAGE_SIZE);
 		goto out_hole;
 	}
 
@@ -673,7 +673,7 @@ static int populate_page(struct ubifs_info *c, struct page *page,
 	}
 
 	if (end_index == page->index) {
-		int len = i_size & (PAGE_CACHE_SIZE - 1);
+		int len = i_size & (PAGE_SIZE - 1);
 
 		if (len && len < read)
 			memset(zaddr + len, 0, read - len);
@@ -773,7 +773,7 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
 	isize = i_size_read(inode);
 	if (isize == 0)
 		goto out_free;
-	end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
+	end_index = ((isize - 1) >> PAGE_SHIFT);
 
 	for (page_idx = 1; page_idx < page_cnt; page_idx++) {
 		pgoff_t page_offset = offset + page_idx;
@@ -788,7 +788,7 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
 		if (!PageUptodate(page))
 			err = populate_page(c, page, bu, &n);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		if (err)
 			break;
 	}
@@ -905,7 +905,7 @@ static int do_writepage(struct page *page, int len)
 #ifdef UBIFS_DEBUG
 	struct ubifs_inode *ui = ubifs_inode(inode);
 	spin_lock(&ui->ui_lock);
-	ubifs_assert(page->index <= ui->synced_i_size >> PAGE_CACHE_SHIFT);
+	ubifs_assert(page->index <= ui->synced_i_size >> PAGE_SHIFT);
 	spin_unlock(&ui->ui_lock);
 #endif
 
@@ -1001,8 +1001,8 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
 	struct inode *inode = page->mapping->host;
 	struct ubifs_inode *ui = ubifs_inode(inode);
 	loff_t i_size =  i_size_read(inode), synced_i_size;
-	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
-	int err, len = i_size & (PAGE_CACHE_SIZE - 1);
+	pgoff_t end_index = i_size >> PAGE_SHIFT;
+	int err, len = i_size & (PAGE_SIZE - 1);
 	void *kaddr;
 
 	dbg_gen("ino %lu, pg %lu, pg flags %#lx",
@@ -1021,7 +1021,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
 
 	/* Is the page fully inside @i_size? */
 	if (page->index < end_index) {
-		if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) {
+		if (page->index >= synced_i_size >> PAGE_SHIFT) {
 			err = inode->i_sb->s_op->write_inode(inode, NULL);
 			if (err)
 				goto out_unlock;
@@ -1034,7 +1034,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
 			 * with this.
 			 */
 		}
-		return do_writepage(page, PAGE_CACHE_SIZE);
+		return do_writepage(page, PAGE_SIZE);
 	}
 
 	/*
@@ -1045,7 +1045,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
 	 * writes to that region are not written out to the file."
 	 */
 	kaddr = kmap_atomic(page);
-	memset(kaddr + len, 0, PAGE_CACHE_SIZE - len);
+	memset(kaddr + len, 0, PAGE_SIZE - len);
 	flush_dcache_page(page);
 	kunmap_atomic(kaddr);
 
@@ -1138,7 +1138,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
 	truncate_setsize(inode, new_size);
 
 	if (offset) {
-		pgoff_t index = new_size >> PAGE_CACHE_SHIFT;
+		pgoff_t index = new_size >> PAGE_SHIFT;
 		struct page *page;
 
 		page = find_lock_page(inode->i_mapping, index);
@@ -1157,9 +1157,9 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
 				clear_page_dirty_for_io(page);
 				if (UBIFS_BLOCKS_PER_PAGE_SHIFT)
 					offset = new_size &
-						 (PAGE_CACHE_SIZE - 1);
+						 (PAGE_SIZE - 1);
 				err = do_writepage(page, offset);
-				page_cache_release(page);
+				put_page(page);
 				if (err)
 					goto out_budg;
 				/*
@@ -1173,7 +1173,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
 				 * having to read it.
 				 */
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 			}
 		}
 	}
@@ -1285,7 +1285,7 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset,
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
 
 	ubifs_assert(PagePrivate(page));
-	if (offset || length < PAGE_CACHE_SIZE)
+	if (offset || length < PAGE_SIZE)
 		/* Partial page remains dirty */
 		return;
 
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index a233ba913be4..20daea9aa657 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2240,9 +2240,9 @@ static int __init ubifs_init(void)
 	 * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to
 	 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
 	 */
-	if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) {
+	if (PAGE_SIZE < UBIFS_BLOCK_SIZE) {
 		pr_err("UBIFS error (pid %d): VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes",
-		       current->pid, (unsigned int)PAGE_CACHE_SIZE);
+		       current->pid, (unsigned int)PAGE_SIZE);
 		return -EINVAL;
 	}
 
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index c2a57e193a81..4cd7e569cd00 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -46,8 +46,8 @@
 #define UBIFS_SUPER_MAGIC 0x24051905
 
 /* Number of UBIFS blocks per VFS page */
-#define UBIFS_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / UBIFS_BLOCK_SIZE)
-#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_CACHE_SHIFT - UBIFS_BLOCK_SHIFT)
+#define UBIFS_BLOCKS_PER_PAGE (PAGE_SIZE / UBIFS_BLOCK_SIZE)
+#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_SHIFT - UBIFS_BLOCK_SHIFT)
 
 /* "File system end of life" sequence number watermark */
 #define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 1af98963d860..877ba1c9b461 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -46,7 +46,7 @@ static void __udf_adinicb_readpage(struct page *page)
 
 	kaddr = kmap(page);
 	memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size);
-	memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size);
+	memset(kaddr + inode->i_size, 0, PAGE_SIZE - inode->i_size);
 	flush_dcache_page(page);
 	SetPageUptodate(page);
 	kunmap(page);
@@ -87,14 +87,14 @@ static int udf_adinicb_write_begin(struct file *file,
 {
 	struct page *page;
 
-	if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE))
+	if (WARN_ON_ONCE(pos >= PAGE_SIZE))
 		return -EIO;
 	page = grab_cache_page_write_begin(mapping, 0, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
 
-	if (!PageUptodate(page) && len != PAGE_CACHE_SIZE)
+	if (!PageUptodate(page) && len != PAGE_SIZE)
 		__udf_adinicb_readpage(page);
 	return 0;
 }
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 166d3ed32c39..2dc461eeb415 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -287,7 +287,7 @@ int udf_expand_file_adinicb(struct inode *inode)
 	if (!PageUptodate(page)) {
 		kaddr = kmap(page);
 		memset(kaddr + iinfo->i_lenAlloc, 0x00,
-		       PAGE_CACHE_SIZE - iinfo->i_lenAlloc);
+		       PAGE_SIZE - iinfo->i_lenAlloc);
 		memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr,
 			iinfo->i_lenAlloc);
 		flush_dcache_page(page);
@@ -319,7 +319,7 @@ int udf_expand_file_adinicb(struct inode *inode)
 		inode->i_data.a_ops = &udf_adinicb_aops;
 		up_write(&iinfo->i_data_sem);
 	}
-	page_cache_release(page);
+	put_page(page);
 	mark_inode_dirty(inode);
 
 	return err;
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index dc5fae601c24..0447b949c7f5 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -237,7 +237,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
 			       sector_t newb, struct page *locked_page)
 {
 	const unsigned blks_per_page =
-		1 << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		1 << (PAGE_SHIFT - inode->i_blkbits);
 	const unsigned mask = blks_per_page - 1;
 	struct address_space * const mapping = inode->i_mapping;
 	pgoff_t index, cur_index, last_index;
@@ -255,9 +255,9 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
 
 	cur_index = locked_page->index;
 	end = count + beg;
-	last_index = end >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	last_index = end >> (PAGE_SHIFT - inode->i_blkbits);
 	for (i = beg; i < end; i = (i | mask) + 1) {
-		index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		index = i >> (PAGE_SHIFT - inode->i_blkbits);
 
 		if (likely(cur_index != index)) {
 			page = ufs_get_locked_page(mapping, index);
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 74f2e80288bf..0b1457292734 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -62,7 +62,7 @@ static int ufs_commit_chunk(struct page *page, loff_t pos, unsigned len)
 static inline void ufs_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr)
@@ -111,13 +111,13 @@ static void ufs_check_page(struct page *page)
 	struct super_block *sb = dir->i_sb;
 	char *kaddr = page_address(page);
 	unsigned offs, rec_len;
-	unsigned limit = PAGE_CACHE_SIZE;
+	unsigned limit = PAGE_SIZE;
 	const unsigned chunk_mask = UFS_SB(sb)->s_uspi->s_dirblksize - 1;
 	struct ufs_dir_entry *p;
 	char *error;
 
-	if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
-		limit = dir->i_size & ~PAGE_CACHE_MASK;
+	if ((dir->i_size >> PAGE_SHIFT) == page->index) {
+		limit = dir->i_size & ~PAGE_MASK;
 		if (limit & chunk_mask)
 			goto Ebadsize;
 		if (!limit)
@@ -170,7 +170,7 @@ Einumber:
 bad_entry:
 	ufs_error (sb, "ufs_check_page", "bad entry in directory #%lu: %s - "
 		   "offset=%lu, rec_len=%d, name_len=%d",
-		   dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+		   dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
 		   rec_len, ufs_get_de_namlen(sb, p));
 	goto fail;
 Eend:
@@ -178,7 +178,7 @@ Eend:
 	ufs_error(sb, __func__,
 		   "entry in directory #%lu spans the page boundary"
 		   "offset=%lu",
-		   dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs);
+		   dir->i_ino, (page->index<<PAGE_SHIFT)+offs);
 fail:
 	SetPageChecked(page);
 	SetPageError(page);
@@ -211,9 +211,9 @@ ufs_last_byte(struct inode *inode, unsigned long page_nr)
 {
 	unsigned last_byte = inode->i_size;
 
-	last_byte -= page_nr << PAGE_CACHE_SHIFT;
-	if (last_byte > PAGE_CACHE_SIZE)
-		last_byte = PAGE_CACHE_SIZE;
+	last_byte -= page_nr << PAGE_SHIFT;
+	if (last_byte > PAGE_SIZE)
+		last_byte = PAGE_SIZE;
 	return last_byte;
 }
 
@@ -341,7 +341,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
 		kaddr = page_address(page);
 		dir_end = kaddr + ufs_last_byte(dir, n);
 		de = (struct ufs_dir_entry *)kaddr;
-		kaddr += PAGE_CACHE_SIZE - reclen;
+		kaddr += PAGE_SIZE - reclen;
 		while ((char *)de <= kaddr) {
 			if ((char *)de == dir_end) {
 				/* We hit i_size */
@@ -432,8 +432,8 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
 	loff_t pos = ctx->pos;
 	struct inode *inode = file_inode(file);
 	struct super_block *sb = inode->i_sb;
-	unsigned int offset = pos & ~PAGE_CACHE_MASK;
-	unsigned long n = pos >> PAGE_CACHE_SHIFT;
+	unsigned int offset = pos & ~PAGE_MASK;
+	unsigned long n = pos >> PAGE_SHIFT;
 	unsigned long npages = dir_pages(inode);
 	unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
 	int need_revalidate = file->f_version != inode->i_version;
@@ -454,14 +454,14 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
 			ufs_error(sb, __func__,
 				  "bad page in #%lu",
 				  inode->i_ino);
-			ctx->pos += PAGE_CACHE_SIZE - offset;
+			ctx->pos += PAGE_SIZE - offset;
 			return -EIO;
 		}
 		kaddr = page_address(page);
 		if (unlikely(need_revalidate)) {
 			if (offset) {
 				offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask);
-				ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset;
+				ctx->pos = (n<<PAGE_SHIFT) + offset;
 			}
 			file->f_version = inode->i_version;
 			need_revalidate = 0;
@@ -574,7 +574,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
 
 	kmap(page);
 	base = (char*)page_address(page);
-	memset(base, 0, PAGE_CACHE_SIZE);
+	memset(base, 0, PAGE_SIZE);
 
 	de = (struct ufs_dir_entry *) base;
 
@@ -594,7 +594,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
 
 	err = ufs_commit_chunk(page, 0, chunk_size);
 fail:
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index d897e169ab9c..9f49431e798d 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1051,13 +1051,13 @@ static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
 	lastfrag--;
 
 	lastpage = ufs_get_locked_page(mapping, lastfrag >>
-				       (PAGE_CACHE_SHIFT - inode->i_blkbits));
+				       (PAGE_SHIFT - inode->i_blkbits));
        if (IS_ERR(lastpage)) {
                err = -EIO;
                goto out;
        }
 
-       end = lastfrag & ((1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1);
+       end = lastfrag & ((1 << (PAGE_SHIFT - inode->i_blkbits)) - 1);
        bh = page_buffers(lastpage);
        for (i = 0; i < end; ++i)
                bh = bh->b_this_page;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index acf4a3b61b81..a1559f762805 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -305,7 +305,7 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			ufs_set_link(old_inode, dir_de, dir_page, new_dir, 0);
 		else {
 			kunmap(dir_page);
-			page_cache_release(dir_page);
+			put_page(dir_page);
 		}
 		inode_dec_link_count(old_dir);
 	}
@@ -315,11 +315,11 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
 out_dir:
 	if (dir_de) {
 		kunmap(dir_page);
-		page_cache_release(dir_page);
+		put_page(dir_page);
 	}
 out_old:
 	kunmap(old_page);
-	page_cache_release(old_page);
+	put_page(old_page);
 out:
 	return err;
 }
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index b6c2f94e041e..a409e3e7827a 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -261,14 +261,14 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
 		if (unlikely(page->mapping == NULL)) {
 			/* Truncate got there first */
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			page = NULL;
 			goto out;
 		}
 
 		if (!PageUptodate(page) || PageError(page)) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 
 			printk(KERN_ERR "ufs_change_blocknr: "
 			       "can not read page: ino %lu, index: %lu\n",
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 954175928240..b7fbf53dbc81 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -283,7 +283,7 @@ extern struct page *ufs_get_locked_page(struct address_space *mapping,
 static inline void ufs_put_locked_page(struct page *page)
 {
        unlock_page(page);
-       page_cache_release(page);
+       put_page(page);
 }
 
 
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 041b6948aecc..ce41d7fe753c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3742,11 +3742,11 @@ xfs_bmap_btalloc(
 		args.prod = align;
 		if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
 			args.mod = (xfs_extlen_t)(args.prod - args.mod);
-	} else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) {
+	} else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
 		args.prod = 1;
 		args.mod = 0;
 	} else {
-		args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog;
+		args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
 		if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
 			args.mod = (xfs_extlen_t)(args.prod - args.mod);
 	}
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index d445a64b979e..5b8ae03ba855 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -704,7 +704,7 @@ next_buffer:
 
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 out_invalidate:
-	xfs_vm_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+	xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
 	return;
 }
 
@@ -925,9 +925,9 @@ xfs_do_writepage(
 	 * ---------------------------------^------------------|
 	 */
 	offset = i_size_read(inode);
-	end_index = offset >> PAGE_CACHE_SHIFT;
+	end_index = offset >> PAGE_SHIFT;
 	if (page->index < end_index)
-		end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
+		end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT;
 	else {
 		/*
 		 * Check whether the page to write out is beyond or straddles
@@ -940,7 +940,7 @@ xfs_do_writepage(
 		 * |				    |      Straddles     |
 		 * ---------------------------------^-----------|--------|
 		 */
-		unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
+		unsigned offset_into_page = offset & (PAGE_SIZE - 1);
 
 		/*
 		 * Skip the page if it is fully outside i_size, e.g. due to a
@@ -971,7 +971,7 @@ xfs_do_writepage(
 		 * memory is zeroed when mapped, and writes to that region are
 		 * not written out to the file."
 		 */
-		zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE);
+		zero_user_segment(page, offset_into_page, PAGE_SIZE);
 
 		/* Adjust the end_offset to the end of file */
 		end_offset = offset;
@@ -1475,7 +1475,7 @@ xfs_vm_write_failed(
 	loff_t			block_offset;
 	loff_t			block_start;
 	loff_t			block_end;
-	loff_t			from = pos & (PAGE_CACHE_SIZE - 1);
+	loff_t			from = pos & (PAGE_SIZE - 1);
 	loff_t			to = from + len;
 	struct buffer_head	*bh, *head;
 	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
@@ -1491,7 +1491,7 @@ xfs_vm_write_failed(
 	 * start of the page by using shifts rather than masks the mismatch
 	 * problem.
 	 */
-	block_offset = (pos >> PAGE_CACHE_SHIFT) << PAGE_CACHE_SHIFT;
+	block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT;
 
 	ASSERT(block_offset + from == pos);
 
@@ -1558,7 +1558,7 @@ xfs_vm_write_begin(
 	struct page		**pagep,
 	void			**fsdata)
 {
-	pgoff_t			index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t			index = pos >> PAGE_SHIFT;
 	struct page		*page;
 	int			status;
 	struct xfs_mount	*mp = XFS_I(mapping->host)->i_mount;
@@ -1592,7 +1592,7 @@ xfs_vm_write_begin(
 			truncate_pagecache_range(inode, start, pos + len);
 		}
 
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 	}
 
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index a32c1dcae2ff..3b6309865c65 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1237,7 +1237,7 @@ xfs_free_file_space(
 	/* wait for the completion of any pending DIOs */
 	inode_dio_wait(VFS_I(ip));
 
-	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
+	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
 	ioffset = round_down(offset, rounding);
 	iendoffset = round_up(offset + len, rounding) - 1;
 	error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset,
@@ -1466,7 +1466,7 @@ xfs_shift_file_space(
 	if (error)
 		return error;
 	error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
-					offset >> PAGE_CACHE_SHIFT, -1);
+					offset >> PAGE_SHIFT, -1);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ac0fd32de31e..569938a4a357 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -106,8 +106,8 @@ xfs_iozero(
 		unsigned offset, bytes;
 		void *fsdata;
 
-		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
-		bytes = PAGE_CACHE_SIZE - offset;
+		offset = (pos & (PAGE_SIZE -1)); /* Within page */
+		bytes = PAGE_SIZE - offset;
 		if (bytes > count)
 			bytes = count;
 
@@ -799,8 +799,8 @@ xfs_file_dio_aio_write(
 	/* see generic_file_direct_write() for why this is necessary */
 	if (mapping->nrpages) {
 		invalidate_inode_pages2_range(mapping,
-					      pos >> PAGE_CACHE_SHIFT,
-					      end >> PAGE_CACHE_SHIFT);
+					      pos >> PAGE_SHIFT,
+					      end >> PAGE_SHIFT);
 	}
 
 	if (ret > 0) {
@@ -1207,9 +1207,9 @@ xfs_find_get_desired_pgoff(
 
 	pagevec_init(&pvec, 0);
 
-	index = startoff >> PAGE_CACHE_SHIFT;
+	index = startoff >> PAGE_SHIFT;
 	endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
-	end = endoff >> PAGE_CACHE_SHIFT;
+	end = endoff >> PAGE_SHIFT;
 	do {
 		int		want;
 		unsigned	nr_pages;
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index ec0e239a0fa9..a8192dc797dc 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -135,7 +135,7 @@ typedef __u32			xfs_nlink_t;
  * Size of block device i/o is parameterized here.
  * Currently the system supports page-sized i/o.
  */
-#define	BLKDEV_IOSHIFT		PAGE_CACHE_SHIFT
+#define	BLKDEV_IOSHIFT		PAGE_SHIFT
 #define	BLKDEV_IOSIZE		(1<<BLKDEV_IOSHIFT)
 /* number of BB's per block device block */
 #define	BLKDEV_BB		BTOBB(BLKDEV_IOSIZE)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 536a0ee9cd5a..cfd4210dd015 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -171,7 +171,7 @@ xfs_sb_validate_fsb_count(
 	ASSERT(sbp->sb_blocklog >= BBSHIFT);
 
 	/* Limited by ULONG_MAX of page cache index */
-	if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
+	if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
 		return -EFBIG;
 	return 0;
 }
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bac6b3435591..eafe257b357a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -231,12 +231,12 @@ static inline unsigned long
 xfs_preferred_iosize(xfs_mount_t *mp)
 {
 	if (mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)
-		return PAGE_CACHE_SIZE;
+		return PAGE_SIZE;
 	return (mp->m_swidth ?
 		(mp->m_swidth << mp->m_sb.sb_blocklog) :
 		((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ?
 			(1 << (int)MAX(mp->m_readio_log, mp->m_writeio_log)) :
-			PAGE_CACHE_SIZE));
+			PAGE_SIZE));
 }
 
 #define XFS_LAST_UNMOUNT_WAS_CLEAN(mp)	\
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index ade236e90bb3..51ddaf2c2b8c 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -293,8 +293,8 @@ xfs_fs_commit_blocks(
 		 * Make sure reads through the pagecache see the new data.
 		 */
 		error = invalidate_inode_pages2_range(inode->i_mapping,
-					start >> PAGE_CACHE_SHIFT,
-					(end - 1) >> PAGE_CACHE_SHIFT);
+					start >> PAGE_SHIFT,
+					(end - 1) >> PAGE_SHIFT);
 		WARN_ON_ONCE(error);
 
 		error = xfs_iomap_write_unwritten(ip, start, length);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d760934109b5..6082f54e0557 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -570,10 +570,10 @@ xfs_max_file_offset(
 #if BITS_PER_LONG == 32
 # if defined(CONFIG_LBDAF)
 	ASSERT(sizeof(sector_t) == 8);
-	pagefactor = PAGE_CACHE_SIZE;
+	pagefactor = PAGE_SIZE;
 	bitshift = BITS_PER_LONG;
 # else
-	pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
+	pagefactor = PAGE_SIZE >> (PAGE_SHIFT - blockshift);
 # endif
 #endif
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 88bc64f00bb5..6b7481f62218 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -41,7 +41,7 @@
 #endif
 
 #define BIO_MAX_PAGES		256
-#define BIO_MAX_SIZE		(BIO_MAX_PAGES << PAGE_CACHE_SHIFT)
+#define BIO_MAX_SIZE		(BIO_MAX_PAGES << PAGE_SHIFT)
 #define BIO_MAX_SECTORS		(BIO_MAX_SIZE >> 9)
 
 /*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7e5d7e018bea..669e419d6234 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1372,7 +1372,7 @@ unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
 
 static inline void put_dev_sector(Sector p)
 {
-	page_cache_release(p.v);
+	put_page(p.v);
 }
 
 static inline bool __bvec_gap_to_prev(struct request_queue *q,
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index c67f052cc5e5..d48daa3f6f20 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -43,7 +43,7 @@ enum bh_state_bits {
 			 */
 };
 
-#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
+#define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
 
 struct page;
 struct buffer_head;
@@ -263,7 +263,7 @@ void buffer_init(void);
 static inline void attach_page_buffers(struct page *page,
 		struct buffer_head *head)
 {
-	page_cache_get(page);
+	get_page(page);
 	SetPagePrivate(page);
 	set_page_private(page, (unsigned long)head);
 }
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index e7975e4681e1..db92a8d4926e 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -176,8 +176,8 @@ extern void ceph_put_snap_context(struct ceph_snap_context *sc);
  */
 static inline int calc_pages_for(u64 off, u64 len)
 {
-	return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
-		(off >> PAGE_CACHE_SHIFT);
+	return ((off+len+PAGE_SIZE-1) >> PAGE_SHIFT) -
+		(off >> PAGE_SHIFT);
 }
 
 extern struct kmem_cache *ceph_inode_cachep;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 9eb215a155e0..b90e9bdbd1dd 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -262,7 +262,7 @@ struct f2fs_node {
 /*
  * For NAT entries
  */
-#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry))
+#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
 
 struct f2fs_nat_entry {
 	__u8 version;		/* latest version of cached nat entry */
@@ -282,7 +282,7 @@ struct f2fs_nat_block {
  * Not allow to change this.
  */
 #define SIT_VBLOCK_MAP_SIZE 64
-#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry))
+#define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry))
 
 /*
  * Note that f2fs_sit_entry->vblocks has the following bit-field information.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 14a97194b34b..304991a80e23 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -929,7 +929,7 @@ static inline struct file *get_file(struct file *f)
 /* Page cache limit. The filesystems should put that into their s_maxbytes 
    limits, otherwise bad things can happen in VM. */ 
 #if BITS_PER_LONG==32
-#define MAX_LFS_FILESIZE	(((loff_t)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 
+#define MAX_LFS_FILESIZE	(((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1)
 #elif BITS_PER_LONG==64
 #define MAX_LFS_FILESIZE 	((loff_t)0x7fffffffffffffffLL)
 #endif
@@ -2067,7 +2067,7 @@ extern int generic_update_time(struct inode *, struct timespec *, int);
 /* /sys/fs */
 extern struct kobject *fs_kobj;
 
-#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
+#define MAX_RW_COUNT (INT_MAX & PAGE_MASK)
 
 #ifdef CONFIG_MANDATORY_FILE_LOCKING
 extern int locks_mandatory_locked(struct file *);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index f2f650f136ee..efada239205e 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -184,7 +184,7 @@ nfs_list_entry(struct list_head *head)
 static inline
 loff_t req_offset(struct nfs_page *req)
 {
-	return (((loff_t)req->wb_index) << PAGE_CACHE_SHIFT) + req->wb_offset;
+	return (((loff_t)req->wb_index) << PAGE_SHIFT) + req->wb_offset;
 }
 
 #endif /* _LINUX_NFS_PAGE_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 1ebd65c91422..f396ccb900cc 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -390,13 +390,13 @@ static inline pgoff_t page_to_pgoff(struct page *page)
 		return page->index << compound_order(page);
 
 	if (likely(!PageTransTail(page)))
-		return page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+		return page->index;
 
 	/*
 	 *  We don't initialize ->index for tail pages: calculate based on
 	 *  head page
 	 */
-	pgoff = compound_head(page)->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	pgoff = compound_head(page)->index;
 	pgoff += page - compound_head(page);
 	return pgoff;
 }
@@ -406,12 +406,12 @@ static inline pgoff_t page_to_pgoff(struct page *page)
  */
 static inline loff_t page_offset(struct page *page)
 {
-	return ((loff_t)page->index) << PAGE_CACHE_SHIFT;
+	return ((loff_t)page->index) << PAGE_SHIFT;
 }
 
 static inline loff_t page_file_offset(struct page *page)
 {
-	return ((loff_t)page_file_index(page)) << PAGE_CACHE_SHIFT;
+	return ((loff_t)page_file_index(page)) << PAGE_SHIFT;
 }
 
 extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
@@ -425,7 +425,7 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 		return linear_hugepage_index(vma, address);
 	pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
 	pgoff += vma->vm_pgoff;
-	return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	return pgoff;
 }
 
 extern void __lock_page(struct page *page);
@@ -671,8 +671,8 @@ static inline int add_to_page_cache(struct page *page,
 
 static inline unsigned long dir_pages(struct inode *inode)
 {
-	return (unsigned long)(inode->i_size + PAGE_CACHE_SIZE - 1) >>
-			       PAGE_CACHE_SHIFT;
+	return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
+			       PAGE_SHIFT;
 }
 
 #endif /* _LINUX_PAGEMAP_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index d18b65c53dbb..3d980ea1c946 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -435,7 +435,7 @@ struct backing_dev_info;
 /* only sparc can not include linux/pagemap.h in this file
  * so leave page_cache_release and release_pages undeclared... */
 #define free_page_and_swap_cache(page) \
-	page_cache_release(page)
+	put_page(page)
 #define free_pages_and_swap_cache(pages, nr) \
 	release_pages((pages), (nr), false);
 
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 781c1399c6a3..ade739f67f1d 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -307,8 +307,8 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
 	struct inode *inode;
 	struct ipc_namespace *ns = data;
 
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = MQUEUE_MAGIC;
 	sb->s_op = &mqueue_super_ops;
 
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 220fc17b9718..7edc95edfaee 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -321,7 +321,7 @@ retry:
 	copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
 	ret = __replace_page(vma, vaddr, old_page, new_page);
-	page_cache_release(new_page);
+	put_page(new_page);
 put_old:
 	put_page(old_page);
 
@@ -539,14 +539,14 @@ static int __copy_insn(struct address_space *mapping, struct file *filp,
 	 * see uprobe_register().
 	 */
 	if (mapping->a_ops->readpage)
-		page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
+		page = read_mapping_page(mapping, offset >> PAGE_SHIFT, filp);
 	else
-		page = shmem_read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT);
+		page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
 	if (IS_ERR(page))
 		return PTR_ERR(page);
 
 	copy_from_page(page, offset, insn, nbytes);
-	page_cache_release(page);
+	put_page(page);
 
 	return 0;
 }
diff --git a/mm/fadvise.c b/mm/fadvise.c
index b8a5bc66b0c0..b8024fa7101d 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -97,8 +97,8 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
 		break;
 	case POSIX_FADV_WILLNEED:
 		/* First and last PARTIAL page! */
-		start_index = offset >> PAGE_CACHE_SHIFT;
-		end_index = endbyte >> PAGE_CACHE_SHIFT;
+		start_index = offset >> PAGE_SHIFT;
+		end_index = endbyte >> PAGE_SHIFT;
 
 		/* Careful about overflow on the "+1" */
 		nrpages = end_index - start_index + 1;
@@ -124,8 +124,8 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
 		 * preserved on the expectation that it is better to preserve
 		 * needed memory than to discard unneeded memory.
 		 */
-		start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT;
-		end_index = (endbyte >> PAGE_CACHE_SHIFT);
+		start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
+		end_index = (endbyte >> PAGE_SHIFT);
 
 		if (end_index >= start_index) {
 			unsigned long count = invalidate_mapping_pages(mapping,
diff --git a/mm/filemap.c b/mm/filemap.c
index a8c69c8c0a90..f2479af09da9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -265,7 +265,7 @@ void delete_from_page_cache(struct page *page)
 
 	if (freepage)
 		freepage(page);
-	page_cache_release(page);
+	put_page(page);
 }
 EXPORT_SYMBOL(delete_from_page_cache);
 
@@ -352,8 +352,8 @@ EXPORT_SYMBOL(filemap_flush);
 static int __filemap_fdatawait_range(struct address_space *mapping,
 				     loff_t start_byte, loff_t end_byte)
 {
-	pgoff_t index = start_byte >> PAGE_CACHE_SHIFT;
-	pgoff_t end = end_byte >> PAGE_CACHE_SHIFT;
+	pgoff_t index = start_byte >> PAGE_SHIFT;
+	pgoff_t end = end_byte >> PAGE_SHIFT;
 	struct pagevec pvec;
 	int nr_pages;
 	int ret = 0;
@@ -550,7 +550,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		pgoff_t offset = old->index;
 		freepage = mapping->a_ops->freepage;
 
-		page_cache_get(new);
+		get_page(new);
 		new->mapping = mapping;
 		new->index = offset;
 
@@ -572,7 +572,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		radix_tree_preload_end();
 		if (freepage)
 			freepage(old);
-		page_cache_release(old);
+		put_page(old);
 	}
 
 	return error;
@@ -651,7 +651,7 @@ static int __add_to_page_cache_locked(struct page *page,
 		return error;
 	}
 
-	page_cache_get(page);
+	get_page(page);
 	page->mapping = mapping;
 	page->index = offset;
 
@@ -675,7 +675,7 @@ err_insert:
 	spin_unlock_irq(&mapping->tree_lock);
 	if (!huge)
 		mem_cgroup_cancel_charge(page, memcg, false);
-	page_cache_release(page);
+	put_page(page);
 	return error;
 }
 
@@ -1083,7 +1083,7 @@ repeat:
 		 * include/linux/pagemap.h for details.
 		 */
 		if (unlikely(page != *pagep)) {
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 	}
@@ -1121,7 +1121,7 @@ repeat:
 		/* Has the page been truncated? */
 		if (unlikely(page->mapping != mapping)) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 		VM_BUG_ON_PAGE(page->index != offset, page);
@@ -1168,7 +1168,7 @@ repeat:
 	if (fgp_flags & FGP_LOCK) {
 		if (fgp_flags & FGP_NOWAIT) {
 			if (!trylock_page(page)) {
-				page_cache_release(page);
+				put_page(page);
 				return NULL;
 			}
 		} else {
@@ -1178,7 +1178,7 @@ repeat:
 		/* Has the page been truncated? */
 		if (unlikely(page->mapping != mapping)) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 		VM_BUG_ON_PAGE(page->index != offset, page);
@@ -1209,7 +1209,7 @@ no_page:
 		err = add_to_page_cache_lru(page, mapping, offset,
 				gfp_mask & GFP_RECLAIM_MASK);
 		if (unlikely(err)) {
-			page_cache_release(page);
+			put_page(page);
 			page = NULL;
 			if (err == -EEXIST)
 				goto repeat;
@@ -1278,7 +1278,7 @@ repeat:
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 export:
@@ -1343,7 +1343,7 @@ repeat:
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 
@@ -1405,7 +1405,7 @@ repeat:
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 
@@ -1415,7 +1415,7 @@ repeat:
 		 * negatives, which is just confusing to the caller.
 		 */
 		if (page->mapping == NULL || page->index != iter.index) {
-			page_cache_release(page);
+			put_page(page);
 			break;
 		}
 
@@ -1482,7 +1482,7 @@ repeat:
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 
@@ -1549,7 +1549,7 @@ repeat:
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 export:
@@ -1610,11 +1610,11 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
 	unsigned int prev_offset;
 	int error = 0;
 
-	index = *ppos >> PAGE_CACHE_SHIFT;
-	prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
-	prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
-	last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
-	offset = *ppos & ~PAGE_CACHE_MASK;
+	index = *ppos >> PAGE_SHIFT;
+	prev_index = ra->prev_pos >> PAGE_SHIFT;
+	prev_offset = ra->prev_pos & (PAGE_SIZE-1);
+	last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
+	offset = *ppos & ~PAGE_MASK;
 
 	for (;;) {
 		struct page *page;
@@ -1648,7 +1648,7 @@ find_page:
 			if (PageUptodate(page))
 				goto page_ok;
 
-			if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
+			if (inode->i_blkbits == PAGE_SHIFT ||
 					!mapping->a_ops->is_partially_uptodate)
 				goto page_not_up_to_date;
 			if (!trylock_page(page))
@@ -1672,18 +1672,18 @@ page_ok:
 		 */
 
 		isize = i_size_read(inode);
-		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+		end_index = (isize - 1) >> PAGE_SHIFT;
 		if (unlikely(!isize || index > end_index)) {
-			page_cache_release(page);
+			put_page(page);
 			goto out;
 		}
 
 		/* nr is the maximum number of bytes to copy from this page */
-		nr = PAGE_CACHE_SIZE;
+		nr = PAGE_SIZE;
 		if (index == end_index) {
-			nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+			nr = ((isize - 1) & ~PAGE_MASK) + 1;
 			if (nr <= offset) {
-				page_cache_release(page);
+				put_page(page);
 				goto out;
 			}
 		}
@@ -1711,11 +1711,11 @@ page_ok:
 
 		ret = copy_page_to_iter(page, offset, nr, iter);
 		offset += ret;
-		index += offset >> PAGE_CACHE_SHIFT;
-		offset &= ~PAGE_CACHE_MASK;
+		index += offset >> PAGE_SHIFT;
+		offset &= ~PAGE_MASK;
 		prev_offset = offset;
 
-		page_cache_release(page);
+		put_page(page);
 		written += ret;
 		if (!iov_iter_count(iter))
 			goto out;
@@ -1735,7 +1735,7 @@ page_not_up_to_date_locked:
 		/* Did it get truncated before we got the lock? */
 		if (!page->mapping) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			continue;
 		}
 
@@ -1757,7 +1757,7 @@ readpage:
 
 		if (unlikely(error)) {
 			if (error == AOP_TRUNCATED_PAGE) {
-				page_cache_release(page);
+				put_page(page);
 				error = 0;
 				goto find_page;
 			}
@@ -1774,7 +1774,7 @@ readpage:
 					 * invalidate_mapping_pages got it
 					 */
 					unlock_page(page);
-					page_cache_release(page);
+					put_page(page);
 					goto find_page;
 				}
 				unlock_page(page);
@@ -1789,7 +1789,7 @@ readpage:
 
 readpage_error:
 		/* UHHUH! A synchronous read error occurred. Report it */
-		page_cache_release(page);
+		put_page(page);
 		goto out;
 
 no_cached_page:
@@ -1805,7 +1805,7 @@ no_cached_page:
 		error = add_to_page_cache_lru(page, mapping, index,
 				mapping_gfp_constraint(mapping, GFP_KERNEL));
 		if (error) {
-			page_cache_release(page);
+			put_page(page);
 			if (error == -EEXIST) {
 				error = 0;
 				goto find_page;
@@ -1817,10 +1817,10 @@ no_cached_page:
 
 out:
 	ra->prev_pos = prev_index;
-	ra->prev_pos <<= PAGE_CACHE_SHIFT;
+	ra->prev_pos <<= PAGE_SHIFT;
 	ra->prev_pos |= prev_offset;
 
-	*ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
+	*ppos = ((loff_t)index << PAGE_SHIFT) + offset;
 	file_accessed(filp);
 	return written ? written : error;
 }
@@ -1912,7 +1912,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
 		else if (ret == -EEXIST)
 			ret = 0; /* losing race to add is OK */
 
-		page_cache_release(page);
+		put_page(page);
 
 	} while (ret == AOP_TRUNCATED_PAGE);
 
@@ -2022,8 +2022,8 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	loff_t size;
 	int ret = 0;
 
-	size = round_up(i_size_read(inode), PAGE_CACHE_SIZE);
-	if (offset >= size >> PAGE_CACHE_SHIFT)
+	size = round_up(i_size_read(inode), PAGE_SIZE);
+	if (offset >= size >> PAGE_SHIFT)
 		return VM_FAULT_SIGBUS;
 
 	/*
@@ -2049,7 +2049,7 @@ retry_find:
 	}
 
 	if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
-		page_cache_release(page);
+		put_page(page);
 		return ret | VM_FAULT_RETRY;
 	}
 
@@ -2072,10 +2072,10 @@ retry_find:
 	 * Found the page and have a reference on it.
 	 * We must recheck i_size under page lock.
 	 */
-	size = round_up(i_size_read(inode), PAGE_CACHE_SIZE);
-	if (unlikely(offset >= size >> PAGE_CACHE_SHIFT)) {
+	size = round_up(i_size_read(inode), PAGE_SIZE);
+	if (unlikely(offset >= size >> PAGE_SHIFT)) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -2120,7 +2120,7 @@ page_not_uptodate:
 		if (!PageUptodate(page))
 			error = -EIO;
 	}
-	page_cache_release(page);
+	put_page(page);
 
 	if (!error || error == AOP_TRUNCATED_PAGE)
 		goto retry_find;
@@ -2164,7 +2164,7 @@ repeat:
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 
@@ -2178,8 +2178,8 @@ repeat:
 		if (page->mapping != mapping || !PageUptodate(page))
 			goto unlock;
 
-		size = round_up(i_size_read(mapping->host), PAGE_CACHE_SIZE);
-		if (page->index >= size >> PAGE_CACHE_SHIFT)
+		size = round_up(i_size_read(mapping->host), PAGE_SIZE);
+		if (page->index >= size >> PAGE_SHIFT)
 			goto unlock;
 
 		pte = vmf->pte + page->index - vmf->pgoff;
@@ -2195,7 +2195,7 @@ repeat:
 unlock:
 		unlock_page(page);
 skip:
-		page_cache_release(page);
+		put_page(page);
 next:
 		if (iter.index == vmf->max_pgoff)
 			break;
@@ -2278,7 +2278,7 @@ static struct page *wait_on_page_read(struct page *page)
 	if (!IS_ERR(page)) {
 		wait_on_page_locked(page);
 		if (!PageUptodate(page)) {
-			page_cache_release(page);
+			put_page(page);
 			page = ERR_PTR(-EIO);
 		}
 	}
@@ -2301,7 +2301,7 @@ repeat:
 			return ERR_PTR(-ENOMEM);
 		err = add_to_page_cache_lru(page, mapping, index, gfp);
 		if (unlikely(err)) {
-			page_cache_release(page);
+			put_page(page);
 			if (err == -EEXIST)
 				goto repeat;
 			/* Presumably ENOMEM for radix tree node */
@@ -2311,7 +2311,7 @@ repeat:
 filler:
 		err = filler(data, page);
 		if (err < 0) {
-			page_cache_release(page);
+			put_page(page);
 			return ERR_PTR(err);
 		}
 
@@ -2364,7 +2364,7 @@ filler:
 	/* Case c or d, restart the operation */
 	if (!page->mapping) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		goto repeat;
 	}
 
@@ -2511,7 +2511,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 	struct iov_iter data;
 
 	write_len = iov_iter_count(from);
-	end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
+	end = (pos + write_len - 1) >> PAGE_SHIFT;
 
 	written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
 	if (written)
@@ -2525,7 +2525,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 	 */
 	if (mapping->nrpages) {
 		written = invalidate_inode_pages2_range(mapping,
-					pos >> PAGE_CACHE_SHIFT, end);
+					pos >> PAGE_SHIFT, end);
 		/*
 		 * If a page can not be invalidated, return 0 to fall back
 		 * to buffered write.
@@ -2550,7 +2550,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 	 */
 	if (mapping->nrpages) {
 		invalidate_inode_pages2_range(mapping,
-					      pos >> PAGE_CACHE_SHIFT, end);
+					      pos >> PAGE_SHIFT, end);
 	}
 
 	if (written > 0) {
@@ -2611,8 +2611,8 @@ ssize_t generic_perform_write(struct file *file,
 		size_t copied;		/* Bytes copied from user */
 		void *fsdata;
 
-		offset = (pos & (PAGE_CACHE_SIZE - 1));
-		bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+		offset = (pos & (PAGE_SIZE - 1));
+		bytes = min_t(unsigned long, PAGE_SIZE - offset,
 						iov_iter_count(i));
 
 again:
@@ -2665,7 +2665,7 @@ again:
 			 * because not all segments in the iov can be copied at
 			 * once without a pagefault.
 			 */
-			bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+			bytes = min_t(unsigned long, PAGE_SIZE - offset,
 						iov_iter_single_seg_count(i));
 			goto again;
 		}
@@ -2752,8 +2752,8 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 			iocb->ki_pos = endbyte + 1;
 			written += status;
 			invalidate_mapping_pages(mapping,
-						 pos >> PAGE_CACHE_SHIFT,
-						 endbyte >> PAGE_CACHE_SHIFT);
+						 pos >> PAGE_SHIFT,
+						 endbyte >> PAGE_SHIFT);
 		} else {
 			/*
 			 * We don't know how much we wrote, so just return
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 06058eaa173b..19d0d08b396f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3346,7 +3346,7 @@ retry_avoidcopy:
 			old_page != pagecache_page)
 		outside_reserve = 1;
 
-	page_cache_get(old_page);
+	get_page(old_page);
 
 	/*
 	 * Drop page table lock as buddy allocator may be called. It will
@@ -3364,7 +3364,7 @@ retry_avoidcopy:
 		 * may get SIGKILLed if it later faults.
 		 */
 		if (outside_reserve) {
-			page_cache_release(old_page);
+			put_page(old_page);
 			BUG_ON(huge_pte_none(pte));
 			unmap_ref_private(mm, vma, old_page, address);
 			BUG_ON(huge_pte_none(pte));
@@ -3425,9 +3425,9 @@ retry_avoidcopy:
 	spin_unlock(ptl);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 out_release_all:
-	page_cache_release(new_page);
+	put_page(new_page);
 out_release_old:
-	page_cache_release(old_page);
+	put_page(old_page);
 
 	spin_lock(ptl); /* Caller expects lock to be held */
 	return ret;
diff --git a/mm/madvise.c b/mm/madvise.c
index a01147359f3b..07427d3fcead 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -170,7 +170,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
 		page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
 								vma, index);
 		if (page)
-			page_cache_release(page);
+			put_page(page);
 	}
 
 	return 0;
@@ -204,14 +204,14 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
 		page = find_get_entry(mapping, index);
 		if (!radix_tree_exceptional_entry(page)) {
 			if (page)
-				page_cache_release(page);
+				put_page(page);
 			continue;
 		}
 		swap = radix_to_swp_entry(page);
 		page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
 								NULL, 0);
 		if (page)
-			page_cache_release(page);
+			put_page(page);
 	}
 
 	lru_add_drain();	/* Push any new pages onto the LRU now */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 5a544c6c0717..78f5f2641b91 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -538,7 +538,7 @@ static int delete_from_lru_cache(struct page *p)
 		/*
 		 * drop the page count elevated by isolate_lru_page()
 		 */
-		page_cache_release(p);
+		put_page(p);
 		return 0;
 	}
 	return -EIO;
diff --git a/mm/memory.c b/mm/memory.c
index 098f00d05461..07a420488cda 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2054,7 +2054,7 @@ static inline int wp_page_reuse(struct mm_struct *mm,
 		VM_BUG_ON_PAGE(PageAnon(page), page);
 		mapping = page->mapping;
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
 		if ((dirtied || page_mkwrite) && mapping) {
 			/*
@@ -2188,7 +2188,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	if (new_page)
-		page_cache_release(new_page);
+		put_page(new_page);
 
 	pte_unmap_unlock(page_table, ptl);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
@@ -2203,14 +2203,14 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
 				munlock_vma_page(old_page);
 			unlock_page(old_page);
 		}
-		page_cache_release(old_page);
+		put_page(old_page);
 	}
 	return page_copied ? VM_FAULT_WRITE : 0;
 oom_free_new:
-	page_cache_release(new_page);
+	put_page(new_page);
 oom:
 	if (old_page)
-		page_cache_release(old_page);
+		put_page(old_page);
 	return VM_FAULT_OOM;
 }
 
@@ -2258,7 +2258,7 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	int page_mkwrite = 0;
 
-	page_cache_get(old_page);
+	get_page(old_page);
 
 	if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
 		int tmp;
@@ -2267,7 +2267,7 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
 		tmp = do_page_mkwrite(vma, old_page, address);
 		if (unlikely(!tmp || (tmp &
 				      (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
-			page_cache_release(old_page);
+			put_page(old_page);
 			return tmp;
 		}
 		/*
@@ -2281,7 +2281,7 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (!pte_same(*page_table, orig_pte)) {
 			unlock_page(old_page);
 			pte_unmap_unlock(page_table, ptl);
-			page_cache_release(old_page);
+			put_page(old_page);
 			return 0;
 		}
 		page_mkwrite = 1;
@@ -2341,7 +2341,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 */
 	if (PageAnon(old_page) && !PageKsm(old_page)) {
 		if (!trylock_page(old_page)) {
-			page_cache_get(old_page);
+			get_page(old_page);
 			pte_unmap_unlock(page_table, ptl);
 			lock_page(old_page);
 			page_table = pte_offset_map_lock(mm, pmd, address,
@@ -2349,10 +2349,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			if (!pte_same(*page_table, orig_pte)) {
 				unlock_page(old_page);
 				pte_unmap_unlock(page_table, ptl);
-				page_cache_release(old_page);
+				put_page(old_page);
 				return 0;
 			}
-			page_cache_release(old_page);
+			put_page(old_page);
 		}
 		if (reuse_swap_page(old_page)) {
 			/*
@@ -2375,7 +2375,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	/*
 	 * Ok, we need to copy. Oh, well..
 	 */
-	page_cache_get(old_page);
+	get_page(old_page);
 
 	pte_unmap_unlock(page_table, ptl);
 	return wp_page_copy(mm, vma, address, page_table, pmd,
@@ -2619,7 +2619,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		 * parallel locked swapcache.
 		 */
 		unlock_page(swapcache);
-		page_cache_release(swapcache);
+		put_page(swapcache);
 	}
 
 	if (flags & FAULT_FLAG_WRITE) {
@@ -2641,10 +2641,10 @@ out_nomap:
 out_page:
 	unlock_page(page);
 out_release:
-	page_cache_release(page);
+	put_page(page);
 	if (page != swapcache) {
 		unlock_page(swapcache);
-		page_cache_release(swapcache);
+		put_page(swapcache);
 	}
 	return ret;
 }
@@ -2752,7 +2752,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (userfaultfd_missing(vma)) {
 		pte_unmap_unlock(page_table, ptl);
 		mem_cgroup_cancel_charge(page, memcg, false);
-		page_cache_release(page);
+		put_page(page);
 		return handle_userfault(vma, address, flags,
 					VM_UFFD_MISSING);
 	}
@@ -2771,10 +2771,10 @@ unlock:
 	return 0;
 release:
 	mem_cgroup_cancel_charge(page, memcg, false);
-	page_cache_release(page);
+	put_page(page);
 	goto unlock;
 oom_free_page:
-	page_cache_release(page);
+	put_page(page);
 oom:
 	return VM_FAULT_OOM;
 }
@@ -2807,7 +2807,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
 	if (unlikely(PageHWPoison(vmf.page))) {
 		if (ret & VM_FAULT_LOCKED)
 			unlock_page(vmf.page);
-		page_cache_release(vmf.page);
+		put_page(vmf.page);
 		return VM_FAULT_HWPOISON;
 	}
 
@@ -2996,7 +2996,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (unlikely(!pte_same(*pte, orig_pte))) {
 		pte_unmap_unlock(pte, ptl);
 		unlock_page(fault_page);
-		page_cache_release(fault_page);
+		put_page(fault_page);
 		return ret;
 	}
 	do_set_pte(vma, address, fault_page, pte, false, false);
@@ -3024,7 +3024,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		return VM_FAULT_OOM;
 
 	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) {
-		page_cache_release(new_page);
+		put_page(new_page);
 		return VM_FAULT_OOM;
 	}
 
@@ -3041,7 +3041,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		pte_unmap_unlock(pte, ptl);
 		if (fault_page) {
 			unlock_page(fault_page);
-			page_cache_release(fault_page);
+			put_page(fault_page);
 		} else {
 			/*
 			 * The fault handler has no page to lock, so it holds
@@ -3057,7 +3057,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	pte_unmap_unlock(pte, ptl);
 	if (fault_page) {
 		unlock_page(fault_page);
-		page_cache_release(fault_page);
+		put_page(fault_page);
 	} else {
 		/*
 		 * The fault handler has no page to lock, so it holds
@@ -3068,7 +3068,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	return ret;
 uncharge_out:
 	mem_cgroup_cancel_charge(new_page, memcg, false);
-	page_cache_release(new_page);
+	put_page(new_page);
 	return ret;
 }
 
@@ -3096,7 +3096,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		tmp = do_page_mkwrite(vma, fault_page, address);
 		if (unlikely(!tmp ||
 				(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
-			page_cache_release(fault_page);
+			put_page(fault_page);
 			return tmp;
 		}
 	}
@@ -3105,7 +3105,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (unlikely(!pte_same(*pte, orig_pte))) {
 		pte_unmap_unlock(pte, ptl);
 		unlock_page(fault_page);
-		page_cache_release(fault_page);
+		put_page(fault_page);
 		return ret;
 	}
 	do_set_pte(vma, address, fault_page, pte, true, false);
@@ -3736,7 +3736,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
 						    buf, maddr + offset, bytes);
 			}
 			kunmap(page);
-			page_cache_release(page);
+			put_page(page);
 		}
 		len -= bytes;
 		buf += bytes;
diff --git a/mm/mincore.c b/mm/mincore.c
index 563f32045490..0eed23d50adb 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -75,7 +75,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
 #endif
 	if (page) {
 		present = PageUptodate(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	return present;
@@ -226,7 +226,7 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
 	unsigned char *tmp;
 
 	/* Check the start address: needs to be page-aligned.. */
- 	if (start & ~PAGE_CACHE_MASK)
+	if (start & ~PAGE_MASK)
 		return -EINVAL;
 
 	/* ..and we need to be passed a valid user-space range */
diff --git a/mm/nommu.c b/mm/nommu.c
index de8b6b6580c1..102e257cc6c3 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -141,7 +141,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		if (pages) {
 			pages[i] = virt_to_page(start);
 			if (pages[i])
-				page_cache_get(pages[i]);
+				get_page(pages[i]);
 		}
 		if (vmas)
 			vmas[i] = vma;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 11ff8f758631..999792d35ccc 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2176,8 +2176,8 @@ int write_cache_pages(struct address_space *mapping,
 			cycled = 0;
 		end = -1;
 	} else {
-		index = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		index = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 			range_whole = 1;
 		cycled = 1; /* ignore range_cyclic tests */
@@ -2382,14 +2382,14 @@ int write_one_page(struct page *page, int wait)
 		wait_on_page_writeback(page);
 
 	if (clear_page_dirty_for_io(page)) {
-		page_cache_get(page);
+		get_page(page);
 		ret = mapping->a_ops->writepage(page, &wbc);
 		if (ret == 0 && wait) {
 			wait_on_page_writeback(page);
 			if (PageError(page))
 				ret = -EIO;
 		}
-		page_cache_release(page);
+		put_page(page);
 	} else {
 		unlock_page(page);
 	}
@@ -2431,7 +2431,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 		__inc_zone_page_state(page, NR_DIRTIED);
 		__inc_wb_stat(wb, WB_RECLAIMABLE);
 		__inc_wb_stat(wb, WB_DIRTIED);
-		task_io_account_write(PAGE_CACHE_SIZE);
+		task_io_account_write(PAGE_SIZE);
 		current->nr_dirtied++;
 		this_cpu_inc(bdp_ratelimits);
 	}
@@ -2450,7 +2450,7 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
 		mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
 		dec_zone_page_state(page, NR_FILE_DIRTY);
 		dec_wb_stat(wb, WB_RECLAIMABLE);
-		task_io_account_cancelled_write(PAGE_CACHE_SIZE);
+		task_io_account_cancelled_write(PAGE_SIZE);
 	}
 }
 
diff --git a/mm/page_io.c b/mm/page_io.c
index 18aac7819cc9..cd92e3d67a32 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -252,7 +252,7 @@ out:
 
 static sector_t swap_page_sector(struct page *page)
 {
-	return (sector_t)__page_file_index(page) << (PAGE_CACHE_SHIFT - 9);
+	return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9);
 }
 
 int __swap_writepage(struct page *page, struct writeback_control *wbc,
diff --git a/mm/readahead.c b/mm/readahead.c
index 20e58e820e44..40be3ae0afe3 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -47,11 +47,11 @@ static void read_cache_pages_invalidate_page(struct address_space *mapping,
 		if (!trylock_page(page))
 			BUG();
 		page->mapping = mapping;
-		do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		do_invalidatepage(page, 0, PAGE_SIZE);
 		page->mapping = NULL;
 		unlock_page(page);
 	}
-	page_cache_release(page);
+	put_page(page);
 }
 
 /*
@@ -93,14 +93,14 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
 			read_cache_pages_invalidate_page(mapping, page);
 			continue;
 		}
-		page_cache_release(page);
+		put_page(page);
 
 		ret = filler(data, page);
 		if (unlikely(ret)) {
 			read_cache_pages_invalidate_pages(mapping, pages);
 			break;
 		}
-		task_io_account_read(PAGE_CACHE_SIZE);
+		task_io_account_read(PAGE_SIZE);
 	}
 	return ret;
 }
@@ -130,7 +130,7 @@ static int read_pages(struct address_space *mapping, struct file *filp,
 				mapping_gfp_constraint(mapping, GFP_KERNEL))) {
 			mapping->a_ops->readpage(filp, page);
 		}
-		page_cache_release(page);
+		put_page(page);
 	}
 	ret = 0;
 
@@ -163,7 +163,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 	if (isize == 0)
 		goto out;
 
-	end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
+	end_index = ((isize - 1) >> PAGE_SHIFT);
 
 	/*
 	 * Preallocate as many pages as we will need.
@@ -216,7 +216,7 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 	while (nr_to_read) {
 		int err;
 
-		unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE;
+		unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
 
 		if (this_chunk > nr_to_read)
 			this_chunk = nr_to_read;
@@ -425,7 +425,7 @@ ondemand_readahead(struct address_space *mapping,
 	 * trivial case: (offset - prev_offset) == 1
 	 * unaligned reads: (offset - prev_offset) == 0
 	 */
-	prev_offset = (unsigned long long)ra->prev_pos >> PAGE_CACHE_SHIFT;
+	prev_offset = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
 	if (offset - prev_offset <= 1UL)
 		goto initial_readahead;
 
@@ -558,8 +558,8 @@ SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
 	if (f.file) {
 		if (f.file->f_mode & FMODE_READ) {
 			struct address_space *mapping = f.file->f_mapping;
-			pgoff_t start = offset >> PAGE_CACHE_SHIFT;
-			pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
+			pgoff_t start = offset >> PAGE_SHIFT;
+			pgoff_t end = (offset + count - 1) >> PAGE_SHIFT;
 			unsigned long len = end - start + 1;
 			ret = do_readahead(mapping, f.file, start, len);
 		}
diff --git a/mm/rmap.c b/mm/rmap.c
index c399a0d41b31..525b92f866a7 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1555,7 +1555,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
 discard:
 	page_remove_rmap(page, PageHuge(page));
-	page_cache_release(page);
+	put_page(page);
 
 out_unmap:
 	pte_unmap_unlock(pte, ptl);
diff --git a/mm/shmem.c b/mm/shmem.c
index 9428c51ab2d6..719bd6b88d98 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -75,8 +75,8 @@ static struct vfsmount *shm_mnt;
 
 #include "internal.h"
 
-#define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
-#define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
+#define BLOCKS_PER_PAGE  (PAGE_SIZE/512)
+#define VM_ACCT(size)    (PAGE_ALIGN(size) >> PAGE_SHIFT)
 
 /* Pretend that each entry is of this size in directory's i_size */
 #define BOGO_DIRENT_SIZE 20
@@ -176,13 +176,13 @@ static inline int shmem_reacct_size(unsigned long flags,
 static inline int shmem_acct_block(unsigned long flags)
 {
 	return (flags & VM_NORESERVE) ?
-		security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_CACHE_SIZE)) : 0;
+		security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_SIZE)) : 0;
 }
 
 static inline void shmem_unacct_blocks(unsigned long flags, long pages)
 {
 	if (flags & VM_NORESERVE)
-		vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
+		vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
 }
 
 static const struct super_operations shmem_ops;
@@ -300,7 +300,7 @@ static int shmem_add_to_page_cache(struct page *page,
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
 
-	page_cache_get(page);
+	get_page(page);
 	page->mapping = mapping;
 	page->index = index;
 
@@ -318,7 +318,7 @@ static int shmem_add_to_page_cache(struct page *page,
 	} else {
 		page->mapping = NULL;
 		spin_unlock_irq(&mapping->tree_lock);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return error;
 }
@@ -338,7 +338,7 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap)
 	__dec_zone_page_state(page, NR_FILE_PAGES);
 	__dec_zone_page_state(page, NR_SHMEM);
 	spin_unlock_irq(&mapping->tree_lock);
-	page_cache_release(page);
+	put_page(page);
 	BUG_ON(error);
 }
 
@@ -474,10 +474,10 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT;
-	unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1);
-	unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
+	pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	pgoff_t end = (lend + 1) >> PAGE_SHIFT;
+	unsigned int partial_start = lstart & (PAGE_SIZE - 1);
+	unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1);
 	struct pagevec pvec;
 	pgoff_t indices[PAGEVEC_SIZE];
 	long nr_swaps_freed = 0;
@@ -530,7 +530,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 		struct page *page = NULL;
 		shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
 		if (page) {
-			unsigned int top = PAGE_CACHE_SIZE;
+			unsigned int top = PAGE_SIZE;
 			if (start > end) {
 				top = partial_end;
 				partial_end = 0;
@@ -538,7 +538,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			zero_user_segment(page, partial_start, top);
 			set_page_dirty(page);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 		}
 	}
 	if (partial_end) {
@@ -548,7 +548,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			zero_user_segment(page, 0, partial_end);
 			set_page_dirty(page);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 		}
 	}
 	if (start >= end)
@@ -833,7 +833,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
 		mem_cgroup_commit_charge(page, memcg, true, false);
 out:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return error;
 }
 
@@ -1080,7 +1080,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
 	if (!newpage)
 		return -ENOMEM;
 
-	page_cache_get(newpage);
+	get_page(newpage);
 	copy_highpage(newpage, oldpage);
 	flush_dcache_page(newpage);
 
@@ -1120,8 +1120,8 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
 	set_page_private(oldpage, 0);
 
 	unlock_page(oldpage);
-	page_cache_release(oldpage);
-	page_cache_release(oldpage);
+	put_page(oldpage);
+	put_page(oldpage);
 	return error;
 }
 
@@ -1145,7 +1145,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	int once = 0;
 	int alloced = 0;
 
-	if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
+	if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
 		return -EFBIG;
 repeat:
 	swap.val = 0;
@@ -1156,7 +1156,7 @@ repeat:
 	}
 
 	if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
-	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+	    ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
 		error = -EINVAL;
 		goto unlock;
 	}
@@ -1169,7 +1169,7 @@ repeat:
 		if (sgp != SGP_READ)
 			goto clear;
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 	}
 	if (page || (sgp == SGP_READ && !swap.val)) {
@@ -1327,7 +1327,7 @@ clear:
 
 	/* Perhaps the file has been truncated since we checked */
 	if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
-	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+	    ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
 		if (alloced) {
 			ClearPageDirty(page);
 			delete_from_page_cache(page);
@@ -1355,7 +1355,7 @@ failed:
 unlock:
 	if (page) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	if (error == -ENOSPC && !once++) {
 		info = SHMEM_I(inode);
@@ -1577,7 +1577,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
 {
 	struct inode *inode = mapping->host;
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 
 	/* i_mutex is held by caller */
 	if (unlikely(info->seals)) {
@@ -1601,16 +1601,16 @@ shmem_write_end(struct file *file, struct address_space *mapping,
 		i_size_write(inode, pos + copied);
 
 	if (!PageUptodate(page)) {
-		if (copied < PAGE_CACHE_SIZE) {
-			unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+		if (copied < PAGE_SIZE) {
+			unsigned from = pos & (PAGE_SIZE - 1);
 			zero_user_segments(page, 0, from,
-					from + copied, PAGE_CACHE_SIZE);
+					from + copied, PAGE_SIZE);
 		}
 		SetPageUptodate(page);
 	}
 	set_page_dirty(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return copied;
 }
@@ -1635,8 +1635,8 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	if (!iter_is_iovec(to))
 		sgp = SGP_DIRTY;
 
-	index = *ppos >> PAGE_CACHE_SHIFT;
-	offset = *ppos & ~PAGE_CACHE_MASK;
+	index = *ppos >> PAGE_SHIFT;
+	offset = *ppos & ~PAGE_MASK;
 
 	for (;;) {
 		struct page *page = NULL;
@@ -1644,11 +1644,11 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		unsigned long nr, ret;
 		loff_t i_size = i_size_read(inode);
 
-		end_index = i_size >> PAGE_CACHE_SHIFT;
+		end_index = i_size >> PAGE_SHIFT;
 		if (index > end_index)
 			break;
 		if (index == end_index) {
-			nr = i_size & ~PAGE_CACHE_MASK;
+			nr = i_size & ~PAGE_MASK;
 			if (nr <= offset)
 				break;
 		}
@@ -1666,14 +1666,14 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		 * We must evaluate after, since reads (unlike writes)
 		 * are called without i_mutex protection against truncate
 		 */
-		nr = PAGE_CACHE_SIZE;
+		nr = PAGE_SIZE;
 		i_size = i_size_read(inode);
-		end_index = i_size >> PAGE_CACHE_SHIFT;
+		end_index = i_size >> PAGE_SHIFT;
 		if (index == end_index) {
-			nr = i_size & ~PAGE_CACHE_MASK;
+			nr = i_size & ~PAGE_MASK;
 			if (nr <= offset) {
 				if (page)
-					page_cache_release(page);
+					put_page(page);
 				break;
 			}
 		}
@@ -1694,7 +1694,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 				mark_page_accessed(page);
 		} else {
 			page = ZERO_PAGE(0);
-			page_cache_get(page);
+			get_page(page);
 		}
 
 		/*
@@ -1704,10 +1704,10 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		ret = copy_page_to_iter(page, offset, nr, to);
 		retval += ret;
 		offset += ret;
-		index += offset >> PAGE_CACHE_SHIFT;
-		offset &= ~PAGE_CACHE_MASK;
+		index += offset >> PAGE_SHIFT;
+		offset &= ~PAGE_MASK;
 
-		page_cache_release(page);
+		put_page(page);
 		if (!iov_iter_count(to))
 			break;
 		if (ret < nr) {
@@ -1717,7 +1717,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		cond_resched();
 	}
 
-	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
+	*ppos = ((loff_t) index << PAGE_SHIFT) + offset;
 	file_accessed(file);
 	return retval ? retval : error;
 }
@@ -1755,9 +1755,9 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 	if (splice_grow_spd(pipe, &spd))
 		return -ENOMEM;
 
-	index = *ppos >> PAGE_CACHE_SHIFT;
-	loff = *ppos & ~PAGE_CACHE_MASK;
-	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	index = *ppos >> PAGE_SHIFT;
+	loff = *ppos & ~PAGE_MASK;
+	req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	nr_pages = min(req_pages, spd.nr_pages_max);
 
 	spd.nr_pages = find_get_pages_contig(mapping, index,
@@ -1774,7 +1774,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 		index++;
 	}
 
-	index = *ppos >> PAGE_CACHE_SHIFT;
+	index = *ppos >> PAGE_SHIFT;
 	nr_pages = spd.nr_pages;
 	spd.nr_pages = 0;
 
@@ -1784,7 +1784,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 		if (!len)
 			break;
 
-		this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
+		this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
 		page = spd.pages[page_nr];
 
 		if (!PageUptodate(page) || page->mapping != mapping) {
@@ -1793,19 +1793,19 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 			if (error)
 				break;
 			unlock_page(page);
-			page_cache_release(spd.pages[page_nr]);
+			put_page(spd.pages[page_nr]);
 			spd.pages[page_nr] = page;
 		}
 
 		isize = i_size_read(inode);
-		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+		end_index = (isize - 1) >> PAGE_SHIFT;
 		if (unlikely(!isize || index > end_index))
 			break;
 
 		if (end_index == index) {
 			unsigned int plen;
 
-			plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+			plen = ((isize - 1) & ~PAGE_MASK) + 1;
 			if (plen <= loff)
 				break;
 
@@ -1822,7 +1822,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 	}
 
 	while (page_nr < nr_pages)
-		page_cache_release(spd.pages[page_nr++]);
+		put_page(spd.pages[page_nr++]);
 
 	if (spd.nr_pages)
 		error = splice_to_pipe(pipe, &spd);
@@ -1904,10 +1904,10 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 	else if (offset >= inode->i_size)
 		offset = -ENXIO;
 	else {
-		start = offset >> PAGE_CACHE_SHIFT;
-		end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		start = offset >> PAGE_SHIFT;
+		end = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		new_offset = shmem_seek_hole_data(mapping, start, end, whence);
-		new_offset <<= PAGE_CACHE_SHIFT;
+		new_offset <<= PAGE_SHIFT;
 		if (new_offset > offset) {
 			if (new_offset < inode->i_size)
 				offset = new_offset;
@@ -2203,8 +2203,8 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		goto out;
 	}
 
-	start = offset >> PAGE_CACHE_SHIFT;
-	end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	start = offset >> PAGE_SHIFT;
+	end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	/* Try to avoid a swapstorm if len is impossible to satisfy */
 	if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
 		error = -ENOSPC;
@@ -2237,8 +2237,8 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		if (error) {
 			/* Remove the !PageUptodate pages we added */
 			shmem_undo_range(inode,
-				(loff_t)start << PAGE_CACHE_SHIFT,
-				(loff_t)index << PAGE_CACHE_SHIFT, true);
+				(loff_t)start << PAGE_SHIFT,
+				(loff_t)index << PAGE_SHIFT, true);
 			goto undone;
 		}
 
@@ -2259,7 +2259,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		 */
 		set_page_dirty(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		cond_resched();
 	}
 
@@ -2280,7 +2280,7 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
 
 	buf->f_type = TMPFS_MAGIC;
-	buf->f_bsize = PAGE_CACHE_SIZE;
+	buf->f_bsize = PAGE_SIZE;
 	buf->f_namelen = NAME_MAX;
 	if (sbinfo->max_blocks) {
 		buf->f_blocks = sbinfo->max_blocks;
@@ -2523,7 +2523,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 	struct shmem_inode_info *info;
 
 	len = strlen(symname) + 1;
-	if (len > PAGE_CACHE_SIZE)
+	if (len > PAGE_SIZE)
 		return -ENAMETOOLONG;
 
 	inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
@@ -2562,7 +2562,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 		SetPageUptodate(page);
 		set_page_dirty(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	dir->i_size += BOGO_DIRENT_SIZE;
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
@@ -2835,7 +2835,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
 			if (*rest)
 				goto bad_val;
 			sbinfo->max_blocks =
-				DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
+				DIV_ROUND_UP(size, PAGE_SIZE);
 		} else if (!strcmp(this_char,"nr_blocks")) {
 			sbinfo->max_blocks = memparse(value, &rest);
 			if (*rest)
@@ -2940,7 +2940,7 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 
 	if (sbinfo->max_blocks != shmem_default_max_blocks())
 		seq_printf(seq, ",size=%luk",
-			sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
+			sbinfo->max_blocks << (PAGE_SHIFT - 10));
 	if (sbinfo->max_inodes != shmem_default_max_inodes())
 		seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
 	if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
@@ -3082,8 +3082,8 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 	sbinfo->free_inodes = sbinfo->max_inodes;
 
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = TMPFS_MAGIC;
 	sb->s_op = &shmem_ops;
 	sb->s_time_gran = 1;
diff --git a/mm/swap.c b/mm/swap.c
index 09fe5e97714a..ea641e247033 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -114,7 +114,7 @@ void put_pages_list(struct list_head *pages)
 
 		victim = list_entry(pages->prev, struct page, lru);
 		list_del(&victim->lru);
-		page_cache_release(victim);
+		put_page(victim);
 	}
 }
 EXPORT_SYMBOL(put_pages_list);
@@ -142,7 +142,7 @@ int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,
 			return seg;
 
 		pages[seg] = kmap_to_page(kiov[seg].iov_base);
-		page_cache_get(pages[seg]);
+		get_page(pages[seg]);
 	}
 
 	return seg;
@@ -236,7 +236,7 @@ void rotate_reclaimable_page(struct page *page)
 		struct pagevec *pvec;
 		unsigned long flags;
 
-		page_cache_get(page);
+		get_page(page);
 		local_irq_save(flags);
 		pvec = this_cpu_ptr(&lru_rotate_pvecs);
 		if (!pagevec_add(pvec, page))
@@ -294,7 +294,7 @@ void activate_page(struct page *page)
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
 
-		page_cache_get(page);
+		get_page(page);
 		if (!pagevec_add(pvec, page))
 			pagevec_lru_move_fn(pvec, __activate_page, NULL);
 		put_cpu_var(activate_page_pvecs);
@@ -389,7 +389,7 @@ static void __lru_cache_add(struct page *page)
 {
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
 
-	page_cache_get(page);
+	get_page(page);
 	if (!pagevec_space(pvec))
 		__pagevec_lru_add(pvec);
 	pagevec_add(pvec, page);
@@ -646,7 +646,7 @@ void deactivate_page(struct page *page)
 	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
 
-		page_cache_get(page);
+		get_page(page);
 		if (!pagevec_add(pvec, page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
 		put_cpu_var(lru_deactivate_pvecs);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 69cb2464e7dc..366ce3518703 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -85,7 +85,7 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry)
 	VM_BUG_ON_PAGE(PageSwapCache(page), page);
 	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
 
-	page_cache_get(page);
+	get_page(page);
 	SetPageSwapCache(page);
 	set_page_private(page, entry.val);
 
@@ -109,7 +109,7 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry)
 		VM_BUG_ON(error == -EEXIST);
 		set_page_private(page, 0UL);
 		ClearPageSwapCache(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	return error;
@@ -226,7 +226,7 @@ void delete_from_swap_cache(struct page *page)
 	spin_unlock_irq(&address_space->tree_lock);
 
 	swapcache_free(entry);
-	page_cache_release(page);
+	put_page(page);
 }
 
 /* 
@@ -252,7 +252,7 @@ static inline void free_swap_cache(struct page *page)
 void free_page_and_swap_cache(struct page *page)
 {
 	free_swap_cache(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 /*
@@ -380,7 +380,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 	} while (err != -ENOMEM);
 
 	if (new_page)
-		page_cache_release(new_page);
+		put_page(new_page);
 	return found_page;
 }
 
@@ -495,7 +495,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 			continue;
 		if (offset != entry_offset)
 			SetPageReadahead(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	blk_finish_plug(&plug);
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 560ad380634c..83874eced5bf 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -119,7 +119,7 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
 		ret = try_to_free_swap(page);
 		unlock_page(page);
 	}
-	page_cache_release(page);
+	put_page(page);
 	return ret;
 }
 
@@ -1000,7 +1000,7 @@ int free_swap_and_cache(swp_entry_t entry)
 			page = find_get_page(swap_address_space(entry),
 						entry.val);
 			if (page && !trylock_page(page)) {
-				page_cache_release(page);
+				put_page(page);
 				page = NULL;
 			}
 		}
@@ -1017,7 +1017,7 @@ int free_swap_and_cache(swp_entry_t entry)
 			SetPageDirty(page);
 		}
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return p != NULL;
 }
@@ -1518,7 +1518,7 @@ int try_to_unuse(unsigned int type, bool frontswap,
 		}
 		if (retval) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			break;
 		}
 
@@ -1570,7 +1570,7 @@ int try_to_unuse(unsigned int type, bool frontswap,
 		 */
 		SetPageDirty(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
 		/*
 		 * Make sure that we aren't completely killing
@@ -2574,7 +2574,7 @@ bad_swap:
 out:
 	if (page && !IS_ERR(page)) {
 		kunmap(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	if (name)
 		putname(name);
diff --git a/mm/truncate.c b/mm/truncate.c
index 7598b552ae03..b00272810871 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -118,7 +118,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
 		return -EIO;
 
 	if (page_has_private(page))
-		do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		do_invalidatepage(page, 0, PAGE_SIZE);
 
 	/*
 	 * Some filesystems seem to re-dirty the page even after
@@ -159,8 +159,8 @@ int truncate_inode_page(struct address_space *mapping, struct page *page)
 {
 	if (page_mapped(page)) {
 		unmap_mapping_range(mapping,
-				   (loff_t)page->index << PAGE_CACHE_SHIFT,
-				   PAGE_CACHE_SIZE, 0);
+				   (loff_t)page->index << PAGE_SHIFT,
+				   PAGE_SIZE, 0);
 	}
 	return truncate_complete_page(mapping, page);
 }
@@ -241,8 +241,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
 		return;
 
 	/* Offsets within partial pages */
-	partial_start = lstart & (PAGE_CACHE_SIZE - 1);
-	partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
+	partial_start = lstart & (PAGE_SIZE - 1);
+	partial_end = (lend + 1) & (PAGE_SIZE - 1);
 
 	/*
 	 * 'start' and 'end' always covers the range of pages to be fully
@@ -250,7 +250,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 	 * start of the range and 'partial_end' at the end of the range.
 	 * Note that 'end' is exclusive while 'lend' is inclusive.
 	 */
-	start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (lend == -1)
 		/*
 		 * lend == -1 indicates end-of-file so we have to set 'end'
@@ -259,7 +259,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 		 */
 		end = -1;
 	else
-		end = (lend + 1) >> PAGE_CACHE_SHIFT;
+		end = (lend + 1) >> PAGE_SHIFT;
 
 	pagevec_init(&pvec, 0);
 	index = start;
@@ -298,7 +298,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 	if (partial_start) {
 		struct page *page = find_lock_page(mapping, start - 1);
 		if (page) {
-			unsigned int top = PAGE_CACHE_SIZE;
+			unsigned int top = PAGE_SIZE;
 			if (start > end) {
 				/* Truncation within a single page */
 				top = partial_end;
@@ -311,7 +311,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 				do_invalidatepage(page, partial_start,
 						  top - partial_start);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 		}
 	}
 	if (partial_end) {
@@ -324,7 +324,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 				do_invalidatepage(page, 0,
 						  partial_end);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 		}
 	}
 	/*
@@ -538,7 +538,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	if (mapping->a_ops->freepage)
 		mapping->a_ops->freepage(page);
 
-	page_cache_release(page);	/* pagecache ref */
+	put_page(page);	/* pagecache ref */
 	return 1;
 failed:
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -608,18 +608,18 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 					 * Zap the rest of the file in one hit.
 					 */
 					unmap_mapping_range(mapping,
-					   (loff_t)index << PAGE_CACHE_SHIFT,
+					   (loff_t)index << PAGE_SHIFT,
 					   (loff_t)(1 + end - index)
-							 << PAGE_CACHE_SHIFT,
-					    0);
+							 << PAGE_SHIFT,
+							 0);
 					did_range_unmap = 1;
 				} else {
 					/*
 					 * Just zap this page
 					 */
 					unmap_mapping_range(mapping,
-					   (loff_t)index << PAGE_CACHE_SHIFT,
-					   PAGE_CACHE_SIZE, 0);
+					   (loff_t)index << PAGE_SHIFT,
+					   PAGE_SIZE, 0);
 				}
 			}
 			BUG_ON(page_mapped(page));
@@ -744,14 +744,14 @@ void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
 
 	WARN_ON(to > inode->i_size);
 
-	if (from >= to || bsize == PAGE_CACHE_SIZE)
+	if (from >= to || bsize == PAGE_SIZE)
 		return;
 	/* Page straddling @from will not have any hole block created? */
 	rounded_from = round_up(from, bsize);
-	if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
+	if (to <= rounded_from || !(rounded_from & (PAGE_SIZE - 1)))
 		return;
 
-	index = from >> PAGE_CACHE_SHIFT;
+	index = from >> PAGE_SHIFT;
 	page = find_lock_page(inode->i_mapping, index);
 	/* Page not cached? Nothing to do */
 	if (!page)
@@ -763,7 +763,7 @@ void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
 	if (page_mkclean(page))
 		set_page_dirty(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 }
 EXPORT_SYMBOL(pagecache_isize_extended);
 
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 9f3a0290b273..af817e5060fb 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -93,7 +93,7 @@ out_release_uncharge_unlock:
 	pte_unmap_unlock(dst_pte, ptl);
 	mem_cgroup_cancel_charge(page, memcg, false);
 out_release:
-	page_cache_release(page);
+	put_page(page);
 	goto out;
 }
 
@@ -287,7 +287,7 @@ out_unlock:
 	up_read(&dst_mm->mmap_sem);
 out:
 	if (page)
-		page_cache_release(page);
+		put_page(page);
 	BUG_ON(copied < 0);
 	BUG_ON(err > 0);
 	BUG_ON(!copied && !err);
diff --git a/mm/zswap.c b/mm/zswap.c
index bf14508afd64..91dad80d068b 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -869,7 +869,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
 
 	case ZSWAP_SWAPCACHE_EXIST:
 		/* page is already in the swap cache, ignore for now */
-		page_cache_release(page);
+		put_page(page);
 		ret = -EEXIST;
 		goto fail;
 
@@ -897,7 +897,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
 
 	/* start writeback */
 	__swap_writepage(page, &wbc, end_swap_bio_write);
-	page_cache_release(page);
+	put_page(page);
 	zswap_written_back_pages++;
 
 	spin_lock(&tree->lock);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 1831f6353622..a5502898ea33 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -269,7 +269,7 @@ static void _ceph_msgr_exit(void)
 	}
 
 	BUG_ON(zero_page == NULL);
-	page_cache_release(zero_page);
+	put_page(zero_page);
 	zero_page = NULL;
 
 	ceph_msgr_slab_exit();
@@ -282,7 +282,7 @@ int ceph_msgr_init(void)
 
 	BUG_ON(zero_page != NULL);
 	zero_page = ZERO_PAGE(0);
-	page_cache_get(zero_page);
+	get_page(zero_page);
 
 	/*
 	 * The number of active work items is limited by the number of
@@ -1602,7 +1602,7 @@ static int write_partial_skip(struct ceph_connection *con)
 
 	dout("%s %p %d left\n", __func__, con, con->out_skip);
 	while (con->out_skip > 0) {
-		size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
+		size_t size = min(con->out_skip, (int) PAGE_SIZE);
 
 		ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
 		if (ret <= 0)
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index c7c220a736e5..6864007e64fc 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -56,7 +56,7 @@ int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
 		size_t bit = pl->room;
 		int ret;
 
-		memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK),
+		memcpy(pl->mapped_tail + (pl->length & ~PAGE_MASK),
 		       buf, bit);
 		pl->length += bit;
 		pl->room -= bit;
@@ -67,7 +67,7 @@ int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
 			return ret;
 	}
 
-	memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len);
+	memcpy(pl->mapped_tail + (pl->length & ~PAGE_MASK), buf, len);
 	pl->length += len;
 	pl->room -= len;
 	return 0;
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 10297f7a89ba..00d2601407c5 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -95,19 +95,19 @@ int ceph_copy_user_to_page_vector(struct page **pages,
 					 loff_t off, size_t len)
 {
 	int i = 0;
-	int po = off & ~PAGE_CACHE_MASK;
+	int po = off & ~PAGE_MASK;
 	int left = len;
 	int l, bad;
 
 	while (left > 0) {
-		l = min_t(int, PAGE_CACHE_SIZE-po, left);
+		l = min_t(int, PAGE_SIZE-po, left);
 		bad = copy_from_user(page_address(pages[i]) + po, data, l);
 		if (bad == l)
 			return -EFAULT;
 		data += l - bad;
 		left -= l - bad;
 		po += l - bad;
-		if (po == PAGE_CACHE_SIZE) {
+		if (po == PAGE_SIZE) {
 			po = 0;
 			i++;
 		}
@@ -121,17 +121,17 @@ void ceph_copy_to_page_vector(struct page **pages,
 				    loff_t off, size_t len)
 {
 	int i = 0;
-	size_t po = off & ~PAGE_CACHE_MASK;
+	size_t po = off & ~PAGE_MASK;
 	size_t left = len;
 
 	while (left > 0) {
-		size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+		size_t l = min_t(size_t, PAGE_SIZE-po, left);
 
 		memcpy(page_address(pages[i]) + po, data, l);
 		data += l;
 		left -= l;
 		po += l;
-		if (po == PAGE_CACHE_SIZE) {
+		if (po == PAGE_SIZE) {
 			po = 0;
 			i++;
 		}
@@ -144,17 +144,17 @@ void ceph_copy_from_page_vector(struct page **pages,
 				    loff_t off, size_t len)
 {
 	int i = 0;
-	size_t po = off & ~PAGE_CACHE_MASK;
+	size_t po = off & ~PAGE_MASK;
 	size_t left = len;
 
 	while (left > 0) {
-		size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+		size_t l = min_t(size_t, PAGE_SIZE-po, left);
 
 		memcpy(data, page_address(pages[i]) + po, l);
 		data += l;
 		left -= l;
 		po += l;
-		if (po == PAGE_CACHE_SIZE) {
+		if (po == PAGE_SIZE) {
 			po = 0;
 			i++;
 		}
@@ -168,25 +168,25 @@ EXPORT_SYMBOL(ceph_copy_from_page_vector);
  */
 void ceph_zero_page_vector_range(int off, int len, struct page **pages)
 {
-	int i = off >> PAGE_CACHE_SHIFT;
+	int i = off >> PAGE_SHIFT;
 
-	off &= ~PAGE_CACHE_MASK;
+	off &= ~PAGE_MASK;
 
 	dout("zero_page_vector_page %u~%u\n", off, len);
 
 	/* leading partial page? */
 	if (off) {
-		int end = min((int)PAGE_CACHE_SIZE, off + len);
+		int end = min((int)PAGE_SIZE, off + len);
 		dout("zeroing %d %p head from %d\n", i, pages[i],
 		     (int)off);
 		zero_user_segment(pages[i], off, end);
 		len -= (end - off);
 		i++;
 	}
-	while (len >= PAGE_CACHE_SIZE) {
+	while (len >= PAGE_SIZE) {
 		dout("zeroing %d %p len=%d\n", i, pages[i], len);
-		zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
-		len -= PAGE_CACHE_SIZE;
+		zero_user_segment(pages[i], 0, PAGE_SIZE);
+		len -= PAGE_SIZE;
 		i++;
 	}
 	/* trailing partial page? */
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 8c6bc795f060..15612ffa8d57 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1728,8 +1728,8 @@ alloc_enc_pages(struct rpc_rqst *rqstp)
 		return 0;
 	}
 
-	first = snd_buf->page_base >> PAGE_CACHE_SHIFT;
-	last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_CACHE_SHIFT;
+	first = snd_buf->page_base >> PAGE_SHIFT;
+	last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_SHIFT;
 	rqstp->rq_enc_pages_num = last - first + 1 + 1;
 	rqstp->rq_enc_pages
 		= kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *),
@@ -1775,10 +1775,10 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	status = alloc_enc_pages(rqstp);
 	if (status)
 		return status;
-	first = snd_buf->page_base >> PAGE_CACHE_SHIFT;
+	first = snd_buf->page_base >> PAGE_SHIFT;
 	inpages = snd_buf->pages + first;
 	snd_buf->pages = rqstp->rq_enc_pages;
-	snd_buf->page_base -= first << PAGE_CACHE_SHIFT;
+	snd_buf->page_base -= first << PAGE_SHIFT;
 	/*
 	 * Give the tail its own page, in case we need extra space in the
 	 * head when wrapping:
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index d94a8e1e9f05..045e11ecd332 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -465,7 +465,7 @@ encryptor(struct scatterlist *sg, void *data)
 	page_pos = desc->pos - outbuf->head[0].iov_len;
 	if (page_pos >= 0 && page_pos < outbuf->page_len) {
 		/* pages are not in place: */
-		int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT;
+		int i = (page_pos + outbuf->page_base) >> PAGE_SHIFT;
 		in_page = desc->pages[i];
 	} else {
 		in_page = sg_page(sg);
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 765088e4ad84..a737c2da0837 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -79,9 +79,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
 		len -= buf->head[0].iov_len;
 	if (len <= buf->page_len) {
 		unsigned int last = (buf->page_base + len - 1)
-					>>PAGE_CACHE_SHIFT;
+					>>PAGE_SHIFT;
 		unsigned int offset = (buf->page_base + len - 1)
-					& (PAGE_CACHE_SIZE - 1);
+					& (PAGE_SIZE - 1);
 		ptr = kmap_atomic(buf->pages[last]);
 		pad = *(ptr + offset);
 		kunmap_atomic(ptr);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 008c25d1b9f9..553bf95f7003 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -881,7 +881,7 @@ static ssize_t cache_downcall(struct address_space *mapping,
 	char *kaddr;
 	ssize_t ret = -ENOMEM;
 
-	if (count >= PAGE_CACHE_SIZE)
+	if (count >= PAGE_SIZE)
 		goto out_slow;
 
 	page = find_or_create_page(mapping, 0, GFP_KERNEL);
@@ -892,7 +892,7 @@ static ssize_t cache_downcall(struct address_space *mapping,
 	ret = cache_do_downcall(kaddr, buf, count, cd);
 	kunmap(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return ret;
 out_slow:
 	return cache_slow_downcall(buf, count, cd);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 31789ef3e614..fc48eca21fd2 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1390,8 +1390,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 	int err;
 
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = RPCAUTH_GSSMAGIC;
 	sb->s_op = &s_ops;
 	sb->s_d_op = &simple_dentry_operations;
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 2df87f78e518..de70c78025d7 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -96,8 +96,8 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct
 	if (base || xdr->page_base) {
 		pglen -= base;
 		base += xdr->page_base;
-		ppage += base >> PAGE_CACHE_SHIFT;
-		base &= ~PAGE_CACHE_MASK;
+		ppage += base >> PAGE_SHIFT;
+		base &= ~PAGE_MASK;
 	}
 	do {
 		char *kaddr;
@@ -113,7 +113,7 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct
 			}
 		}
 
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 		kaddr = kmap_atomic(*ppage);
 		if (base) {
 			len -= base;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 4439ac4c1b53..1e6aba0501a1 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -181,20 +181,20 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
 	pgto_base += len;
 	pgfrom_base += len;
 
-	pgto = pages + (pgto_base >> PAGE_CACHE_SHIFT);
-	pgfrom = pages + (pgfrom_base >> PAGE_CACHE_SHIFT);
+	pgto = pages + (pgto_base >> PAGE_SHIFT);
+	pgfrom = pages + (pgfrom_base >> PAGE_SHIFT);
 
-	pgto_base &= ~PAGE_CACHE_MASK;
-	pgfrom_base &= ~PAGE_CACHE_MASK;
+	pgto_base &= ~PAGE_MASK;
+	pgfrom_base &= ~PAGE_MASK;
 
 	do {
 		/* Are any pointers crossing a page boundary? */
 		if (pgto_base == 0) {
-			pgto_base = PAGE_CACHE_SIZE;
+			pgto_base = PAGE_SIZE;
 			pgto--;
 		}
 		if (pgfrom_base == 0) {
-			pgfrom_base = PAGE_CACHE_SIZE;
+			pgfrom_base = PAGE_SIZE;
 			pgfrom--;
 		}
 
@@ -236,11 +236,11 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len)
 	char *vto;
 	size_t copy;
 
-	pgto = pages + (pgbase >> PAGE_CACHE_SHIFT);
-	pgbase &= ~PAGE_CACHE_MASK;
+	pgto = pages + (pgbase >> PAGE_SHIFT);
+	pgbase &= ~PAGE_MASK;
 
 	for (;;) {
-		copy = PAGE_CACHE_SIZE - pgbase;
+		copy = PAGE_SIZE - pgbase;
 		if (copy > len)
 			copy = len;
 
@@ -253,7 +253,7 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len)
 			break;
 
 		pgbase += copy;
-		if (pgbase == PAGE_CACHE_SIZE) {
+		if (pgbase == PAGE_SIZE) {
 			flush_dcache_page(*pgto);
 			pgbase = 0;
 			pgto++;
@@ -280,11 +280,11 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
 	char *vfrom;
 	size_t copy;
 
-	pgfrom = pages + (pgbase >> PAGE_CACHE_SHIFT);
-	pgbase &= ~PAGE_CACHE_MASK;
+	pgfrom = pages + (pgbase >> PAGE_SHIFT);
+	pgbase &= ~PAGE_MASK;
 
 	do {
-		copy = PAGE_CACHE_SIZE - pgbase;
+		copy = PAGE_SIZE - pgbase;
 		if (copy > len)
 			copy = len;
 
@@ -293,7 +293,7 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
 		kunmap_atomic(vfrom);
 
 		pgbase += copy;
-		if (pgbase == PAGE_CACHE_SIZE) {
+		if (pgbase == PAGE_SIZE) {
 			pgbase = 0;
 			pgfrom++;
 		}
@@ -1038,8 +1038,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
 	if (base < buf->page_len) {
 		subbuf->page_len = min(buf->page_len - base, len);
 		base += buf->page_base;
-		subbuf->page_base = base & ~PAGE_CACHE_MASK;
-		subbuf->pages = &buf->pages[base >> PAGE_CACHE_SHIFT];
+		subbuf->page_base = base & ~PAGE_MASK;
+		subbuf->pages = &buf->pages[base >> PAGE_SHIFT];
 		len -= subbuf->page_len;
 		base = 0;
 	} else {
@@ -1297,9 +1297,9 @@ xdr_xcode_array2(struct xdr_buf *buf, unsigned int base,
 		todo -= avail_here;
 
 		base += buf->page_base;
-		ppages = buf->pages + (base >> PAGE_CACHE_SHIFT);
-		base &= ~PAGE_CACHE_MASK;
-		avail_page = min_t(unsigned int, PAGE_CACHE_SIZE - base,
+		ppages = buf->pages + (base >> PAGE_SHIFT);
+		base &= ~PAGE_MASK;
+		avail_page = min_t(unsigned int, PAGE_SIZE - base,
 					avail_here);
 		c = kmap(*ppages) + base;
 
@@ -1383,7 +1383,7 @@ xdr_xcode_array2(struct xdr_buf *buf, unsigned int base,
 			}
 
 			avail_page = min(avail_here,
-				 (unsigned int) PAGE_CACHE_SIZE);
+				 (unsigned int) PAGE_SIZE);
 		}
 		base = buf->page_len;  /* align to start of tail */
 	}
@@ -1479,9 +1479,9 @@ xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len,
 		if (page_len > len)
 			page_len = len;
 		len -= page_len;
-		page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1);
-		i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT;
-		thislen = PAGE_CACHE_SIZE - page_offset;
+		page_offset = (offset + buf->page_base) & (PAGE_SIZE - 1);
+		i = (offset + buf->page_base) >> PAGE_SHIFT;
+		thislen = PAGE_SIZE - page_offset;
 		do {
 			if (thislen > page_len)
 				thislen = page_len;
@@ -1492,7 +1492,7 @@ xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len,
 			page_len -= thislen;
 			i++;
 			page_offset = 0;
-			thislen = PAGE_CACHE_SIZE;
+			thislen = PAGE_SIZE;
 		} while (page_len != 0);
 		offset = 0;
 	}
-- 
cgit v1.2.3


From ea1754a084760e68886f5b725c8eaada9cc57155 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 1 Apr 2016 15:29:48 +0300
Subject: mm, fs: remove remaining PAGE_CACHE_* and page_cache_{get,release}
 usage

Mostly direct substitution with occasional adjustment or removing
outdated comments.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/cramfs.txt               |  2 +-
 Documentation/filesystems/tmpfs.txt                |  2 +-
 Documentation/filesystems/vfs.txt                  |  4 ++--
 arch/parisc/mm/init.c                              |  2 +-
 block/bio.c                                        |  4 ++--
 drivers/block/drbd/drbd_int.h                      |  4 ++--
 drivers/staging/lustre/include/linux/lnet/types.h  |  2 +-
 drivers/staging/lustre/lnet/selftest/selftest.h    |  4 ++--
 drivers/staging/lustre/lustre/include/lu_object.h  |  2 +-
 .../lustre/lustre/include/lustre/lustre_idl.h      |  4 ++--
 drivers/staging/lustre/lustre/include/lustre_net.h |  4 ++--
 drivers/staging/lustre/lustre/include/obd.h        |  2 +-
 drivers/staging/lustre/lustre/llite/dir.c          |  5 ++---
 drivers/staging/lustre/lustre/llite/rw.c           |  2 +-
 drivers/staging/lustre/lustre/llite/vvp_io.c       |  2 +-
 drivers/staging/lustre/lustre/lmv/lmv_obd.c        |  8 +++----
 .../lustre/lustre/obdclass/linux/linux-obdo.c      |  1 -
 drivers/staging/lustre/lustre/osc/osc_cache.c      |  2 +-
 fs/btrfs/check-integrity.c                         |  4 ++--
 fs/btrfs/extent_io.c                               |  8 +++----
 fs/btrfs/struct-funcs.c                            |  4 ++--
 fs/btrfs/tests/extent-io-tests.c                   |  2 +-
 fs/cifs/cifsglob.h                                 |  4 ++--
 fs/cifs/file.c                                     |  2 +-
 fs/cramfs/README                                   | 26 +++++++++++-----------
 fs/cramfs/inode.c                                  |  2 +-
 fs/dax.c                                           |  4 ++--
 fs/ecryptfs/inode.c                                |  4 ++--
 fs/ext2/dir.c                                      |  4 ++--
 fs/ext4/ext4.h                                     |  4 ++--
 fs/ext4/inode.c                                    |  2 +-
 fs/ext4/mballoc.c                                  |  4 ++--
 fs/ext4/readpage.c                                 |  2 +-
 fs/hugetlbfs/inode.c                               |  2 +-
 fs/mpage.c                                         |  2 +-
 fs/ntfs/aops.c                                     |  2 +-
 fs/ntfs/aops.h                                     |  2 +-
 fs/ntfs/compress.c                                 | 21 +++++------------
 fs/ntfs/dir.c                                      | 16 ++++++-------
 fs/ntfs/file.c                                     |  2 +-
 fs/ntfs/index.c                                    |  2 +-
 fs/ntfs/inode.c                                    |  4 ++--
 fs/ntfs/super.c                                    | 14 ++++++------
 fs/ocfs2/aops.c                                    |  2 +-
 fs/ocfs2/refcounttree.c                            |  2 +-
 fs/reiserfs/journal.c                              |  2 +-
 fs/squashfs/cache.c                                |  4 ++--
 fs/squashfs/file.c                                 |  2 +-
 fs/ubifs/file.c                                    |  2 +-
 fs/ubifs/super.c                                   |  2 +-
 fs/xfs/xfs_aops.c                                  |  4 ++--
 fs/xfs/xfs_super.c                                 |  4 ++--
 include/linux/backing-dev-defs.h                   |  2 +-
 include/linux/mm.h                                 |  2 +-
 include/linux/mm_types.h                           |  2 +-
 include/linux/nfs_page.h                           |  4 ++--
 include/linux/nilfs2_fs.h                          |  4 ++--
 include/linux/pagemap.h                            |  3 +--
 include/linux/sunrpc/svc.h                         |  2 +-
 include/linux/swap.h                               |  2 +-
 mm/gup.c                                           |  2 +-
 mm/memory.c                                        |  1 -
 mm/mincore.c                                       |  4 ++--
 mm/swap.c                                          |  2 +-
 net/sunrpc/xdr.c                                   |  2 +-
 65 files changed, 120 insertions(+), 135 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/cramfs.txt b/Documentation/filesystems/cramfs.txt
index 31f53f0ab957..4006298f6707 100644
--- a/Documentation/filesystems/cramfs.txt
+++ b/Documentation/filesystems/cramfs.txt
@@ -38,7 +38,7 @@ the update lasts only as long as the inode is cached in memory, after
 which the timestamp reverts to 1970, i.e. moves backwards in time.
 
 Currently, cramfs must be written and read with architectures of the
-same endianness, and can be read only by kernels with PAGE_CACHE_SIZE
+same endianness, and can be read only by kernels with PAGE_SIZE
 == 4096.  At least the latter of these is a bug, but it hasn't been
 decided what the best fix is.  For the moment if you have larger pages
 you can just change the #define in mkcramfs.c, so long as you don't
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index d392e1505f17..d9c11d25bf02 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -60,7 +60,7 @@ size:      The limit of allocated bytes for this tmpfs instance. The
            default is half of your physical RAM without swap. If you
            oversize your tmpfs instances the machine will deadlock
            since the OOM handler will not be able to free that memory.
-nr_blocks: The same as size, but in blocks of PAGE_CACHE_SIZE.
+nr_blocks: The same as size, but in blocks of PAGE_SIZE.
 nr_inodes: The maximum number of inodes for this instance. The default
            is half of the number of your physical RAM pages, or (on a
            machine with highmem) the number of lowmem RAM pages,
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index b02a7d598258..4164bd6397a2 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -708,9 +708,9 @@ struct address_space_operations {
 	from the address space.  This generally corresponds to either a
 	truncation, punch hole  or a complete invalidation of the address
 	space (in the latter case 'offset' will always be 0 and 'length'
-	will be PAGE_CACHE_SIZE). Any private data associated with the page
+	will be PAGE_SIZE). Any private data associated with the page
 	should be updated to reflect this truncation.  If offset is 0 and
-	length is PAGE_CACHE_SIZE, then the private data should be released,
+	length is PAGE_SIZE, then the private data should be released,
 	because the page must be able to be completely discarded.  This may
 	be done by calling the ->releasepage function, but in this case the
 	release MUST succeed.
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 3c07d6b96877..6b3e7c6ee096 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -22,7 +22,7 @@
 #include <linux/swap.h>
 #include <linux/unistd.h>
 #include <linux/nodemask.h>	/* for node_online_map */
-#include <linux/pagemap.h>	/* for release_pages and page_cache_release */
+#include <linux/pagemap.h>	/* for release_pages */
 #include <linux/compat.h>
 
 #include <asm/pgalloc.h>
diff --git a/block/bio.c b/block/bio.c
index 168531517694..807d25e466ec 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1615,8 +1615,8 @@ static void bio_release_pages(struct bio *bio)
  * the BIO and the offending pages and re-dirty the pages in process context.
  *
  * It is expected that bio_check_pages_dirty() will wholly own the BIO from
- * here on.  It will run one page_cache_release() against each page and will
- * run one bio_put() against the BIO.
+ * here on.  It will run one put_page() against each page and will run one
+ * bio_put() against the BIO.
  */
 
 static void bio_dirty_fn(struct work_struct *work);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index c227fd4cad75..7a1cf7eaa71d 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1327,8 +1327,8 @@ struct bm_extent {
 #endif
 #endif
 
-/* BIO_MAX_SIZE is 256 * PAGE_CACHE_SIZE,
- * so for typical PAGE_CACHE_SIZE of 4k, that is (1<<20) Byte.
+/* BIO_MAX_SIZE is 256 * PAGE_SIZE,
+ * so for typical PAGE_SIZE of 4k, that is (1<<20) Byte.
  * Since we may live in a mixed-platform cluster,
  * we limit us to a platform agnostic constant here for now.
  * A followup commit may allow even bigger BIO sizes,
diff --git a/drivers/staging/lustre/include/linux/lnet/types.h b/drivers/staging/lustre/include/linux/lnet/types.h
index 08f193c341c5..1c679cb72785 100644
--- a/drivers/staging/lustre/include/linux/lnet/types.h
+++ b/drivers/staging/lustre/include/linux/lnet/types.h
@@ -514,7 +514,7 @@ typedef struct {
 	/**
 	 * Starting offset of the fragment within the page. Note that the
 	 * end of the fragment must not pass the end of the page; i.e.,
-	 * kiov_len + kiov_offset <= PAGE_CACHE_SIZE.
+	 * kiov_len + kiov_offset <= PAGE_SIZE.
 	 */
 	unsigned int	 kiov_offset;
 } lnet_kiov_t;
diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h
index 5321ddec9580..e689ca1846e1 100644
--- a/drivers/staging/lustre/lnet/selftest/selftest.h
+++ b/drivers/staging/lustre/lnet/selftest/selftest.h
@@ -390,8 +390,8 @@ typedef struct sfw_test_instance {
 	} tsi_u;
 } sfw_test_instance_t;
 
-/* XXX: trailing (PAGE_CACHE_SIZE % sizeof(lnet_process_id_t)) bytes at
- * the end of pages are not used */
+/* XXX: trailing (PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at the end of
+ * pages are not used */
 #define SFW_MAX_CONCUR	   LST_MAX_CONCUR
 #define SFW_ID_PER_PAGE    (PAGE_SIZE / sizeof(lnet_process_id_packed_t))
 #define SFW_MAX_NDESTS	   (LNET_MAX_IOV * SFW_ID_PER_PAGE)
diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
index b5088b13a305..242bb1ef6245 100644
--- a/drivers/staging/lustre/lustre/include/lu_object.h
+++ b/drivers/staging/lustre/lustre/include/lu_object.h
@@ -1118,7 +1118,7 @@ struct lu_context_key {
 	{							 \
 		type *value;				      \
 								  \
-		CLASSERT(PAGE_CACHE_SIZE >= sizeof (*value));       \
+		CLASSERT(PAGE_SIZE >= sizeof (*value));       \
 								  \
 		value = kzalloc(sizeof(*value), GFP_NOFS);	\
 		if (!value)				\
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index 1e2ebe5cc998..5aae1d06a5fa 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -1022,10 +1022,10 @@ static inline int lu_dirent_size(struct lu_dirent *ent)
  * MDS_READPAGE page size
  *
  * This is the directory page size packed in MDS_READPAGE RPC.
- * It's different than PAGE_CACHE_SIZE because the client needs to
+ * It's different than PAGE_SIZE because the client needs to
  * access the struct lu_dirpage header packed at the beginning of
  * the "page" and without this there isn't any way to know find the
- * lu_dirpage header is if client and server PAGE_CACHE_SIZE differ.
+ * lu_dirpage header is if client and server PAGE_SIZE differ.
  */
 #define LU_PAGE_SHIFT 12
 #define LU_PAGE_SIZE  (1UL << LU_PAGE_SHIFT)
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index a5e9095fbf36..69586a522eb7 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -112,8 +112,8 @@
 # if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0)
 #  error "PTLRPC_MAX_BRW_PAGES isn't a power of two"
 # endif
-# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_CACHE_SIZE))
-#  error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_CACHE_SIZE"
+# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_SIZE))
+#  error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_SIZE"
 # endif
 # if (PTLRPC_MAX_BRW_SIZE > LNET_MTU * PTLRPC_BULK_OPS_COUNT)
 #  error "PTLRPC_MAX_BRW_SIZE too big"
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index f4167db65b5d..4264d97650ec 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -272,7 +272,7 @@ struct client_obd {
 	int		 cl_grant_shrink_interval; /* seconds */
 
 	/* A chunk is an optimal size used by osc_extent to determine
-	 * the extent size. A chunk is max(PAGE_CACHE_SIZE, OST block size)
+	 * the extent size. A chunk is max(PAGE_SIZE, OST block size)
 	 */
 	int		  cl_chunkbits;
 	int		  cl_chunk;
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index a7c02e07fd75..e4c82883e580 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -134,9 +134,8 @@
  * a header lu_dirpage which describes the start/end hash, and whether this
  * page is empty (contains no dir entry) or hash collide with next page.
  * After client receives reply, several pages will be integrated into dir page
- * in PAGE_CACHE_SIZE (if PAGE_CACHE_SIZE greater than LU_PAGE_SIZE), and the
- * lu_dirpage for this integrated page will be adjusted. See
- * lmv_adjust_dirpages().
+ * in PAGE_SIZE (if PAGE_SIZE greater than LU_PAGE_SIZE), and the lu_dirpage
+ * for this integrated page will be adjusted. See lmv_adjust_dirpages().
  *
  */
 
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index 4c7250ab54e6..edab6c5b7e50 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -521,7 +521,7 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
  * striped over, rather than having a constant value for all files here.
  */
 
-/* RAS_INCREASE_STEP should be (1UL << (inode->i_blkbits - PAGE_CACHE_SHIFT)).
+/* RAS_INCREASE_STEP should be (1UL << (inode->i_blkbits - PAGE_SHIFT)).
  * Temporarily set RAS_INCREASE_STEP to 1MB. After 4MB RPC is enabled
  * by default, this should be adjusted corresponding with max_read_ahead_mb
  * and max_read_ahead_per_file_mb otherwise the readahead budget can be used
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 75d4df71cab8..85a835976174 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -512,7 +512,7 @@ static int vvp_io_read_start(const struct lu_env *env,
 		vio->cui_ra_window_set = 1;
 		bead->lrr_start = cl_index(obj, pos);
 		/*
-		 * XXX: explicit PAGE_CACHE_SIZE
+		 * XXX: explicit PAGE_SIZE
 		 */
 		bead->lrr_count = cl_index(obj, tot + PAGE_SIZE - 1);
 		ll_ra_read_in(file, bead);
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index ce7e8b70dd44..9abb7c2b9231 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -2017,7 +2017,7 @@ static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid,
  * |s|e|f|p|ent| 0 | ... | 0 |
  * '-----------------   -----'
  *
- * However, on hosts where the native VM page size (PAGE_CACHE_SIZE) is
+ * However, on hosts where the native VM page size (PAGE_SIZE) is
  * larger than LU_PAGE_SIZE, a single host page may contain multiple
  * lu_dirpages. After reading the lu_dirpages from the MDS, the
  * ldp_hash_end of the first lu_dirpage refers to the one immediately
@@ -2048,7 +2048,7 @@ static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid,
  * - Adjust the lde_reclen of the ending entry of each lu_dirpage to span
  *   to the first entry of the next lu_dirpage.
  */
-#if PAGE_CACHE_SIZE > LU_PAGE_SIZE
+#if PAGE_SIZE > LU_PAGE_SIZE
 static void lmv_adjust_dirpages(struct page **pages, int ncfspgs, int nlupgs)
 {
 	int i;
@@ -2101,7 +2101,7 @@ static void lmv_adjust_dirpages(struct page **pages, int ncfspgs, int nlupgs)
 }
 #else
 #define lmv_adjust_dirpages(pages, ncfspgs, nlupgs) do {} while (0)
-#endif	/* PAGE_CACHE_SIZE > LU_PAGE_SIZE */
+#endif	/* PAGE_SIZE > LU_PAGE_SIZE */
 
 static int lmv_readpage(struct obd_export *exp, struct md_op_data *op_data,
 			struct page **pages, struct ptlrpc_request **request)
@@ -2110,7 +2110,7 @@ static int lmv_readpage(struct obd_export *exp, struct md_op_data *op_data,
 	struct lmv_obd		*lmv = &obd->u.lmv;
 	__u64			offset = op_data->op_offset;
 	int			rc;
-	int			ncfspgs; /* pages read in PAGE_CACHE_SIZE */
+	int			ncfspgs; /* pages read in PAGE_SIZE */
 	int			nlupgs; /* pages read in LU_PAGE_SIZE */
 	struct lmv_tgt_desc	*tgt;
 
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
index 4a2baaff3dc1..b41b65e2f021 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
@@ -47,7 +47,6 @@
 #include "../../include/lustre/lustre_idl.h"
 
 #include <linux/fs.h>
-#include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
 
 void obdo_refresh_inode(struct inode *dst, struct obdo *src, u32 valid)
 {
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index 4e0a357ed43d..5f25bf83dcfc 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -1456,7 +1456,7 @@ static void osc_unreserve_grant(struct client_obd *cli,
  * used, we should return these grants to OST. There're two cases where grants
  * can be lost:
  * 1. truncate;
- * 2. blocksize at OST is less than PAGE_CACHE_SIZE and a partial page was
+ * 2. blocksize at OST is less than PAGE_SIZE and a partial page was
  *    written. In this case OST may use less chunks to serve this partial
  *    write. OSTs don't actually know the page size on the client side. so
  *    clients have to calculate lost grant by the blocksize on the OST.
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 866789ede4cb..516e19d1d202 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -3039,13 +3039,13 @@ int btrfsic_mount(struct btrfs_root *root,
 
 	if (root->nodesize & ((u64)PAGE_SIZE - 1)) {
 		printk(KERN_INFO
-		       "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
+		       "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_SIZE %ld!\n",
 		       root->nodesize, PAGE_SIZE);
 		return -1;
 	}
 	if (root->sectorsize & ((u64)PAGE_SIZE - 1)) {
 		printk(KERN_INFO
-		       "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
+		       "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_SIZE %ld!\n",
 		       root->sectorsize, PAGE_SIZE);
 		return -1;
 	}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 93d696d248d9..d247fc0eea19 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3264,13 +3264,11 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
 			goto done;
 		}
 		/*
-		 * delalloc_end is already one less than the total
-		 * length, so we don't subtract one from
-		 * PAGE_CACHE_SIZE
+		 * delalloc_end is already one less than the total length, so
+		 * we don't subtract one from PAGE_SIZE
 		 */
 		delalloc_to_write += (delalloc_end - delalloc_start +
-				      PAGE_SIZE) >>
-				      PAGE_SHIFT;
+				      PAGE_SIZE) >> PAGE_SHIFT;
 		delalloc_start = delalloc_end + 1;
 	}
 	if (wbc->nr_to_write < delalloc_to_write) {
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index b976597b0721..e05619f241be 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -66,7 +66,7 @@ u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr,	\
 									\
 	if (token && token->kaddr && token->offset <= offset &&		\
 	    token->eb == eb &&						\
-	   (token->offset + PAGE_CACHE_SIZE >= offset + size)) {	\
+	   (token->offset + PAGE_SIZE >= offset + size)) {	\
 		kaddr = token->kaddr;					\
 		p = kaddr + part_offset - token->offset;		\
 		res = get_unaligned_le##bits(p + off);			\
@@ -104,7 +104,7 @@ void btrfs_set_token_##bits(struct extent_buffer *eb,			\
 									\
 	if (token && token->kaddr && token->offset <= offset &&		\
 	    token->eb == eb &&						\
-	   (token->offset + PAGE_CACHE_SIZE >= offset + size)) {	\
+	   (token->offset + PAGE_SIZE >= offset + size)) {	\
 		kaddr = token->kaddr;					\
 		p = kaddr + part_offset - token->offset;		\
 		put_unaligned_le##bits(val, p + off);			\
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index ac3a06d28531..70948b13bc81 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -239,7 +239,7 @@ static int test_find_delalloc(void)
 	end = 0;
 	/*
 	 * Currently if we fail to find dirty pages in the delalloc range we
-	 * will adjust max_bytes down to PAGE_CACHE_SIZE and then re-search.  If
+	 * will adjust max_bytes down to PAGE_SIZE and then re-search.  If
 	 * this changes at any point in the future we will need to fix this
 	 * tests expected behavior.
 	 */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index d21da9f05bae..f2cc0b3d1af7 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -714,7 +714,7 @@ compare_mid(__u16 mid, const struct smb_hdr *smb)
  *
  * Note that this might make for "interesting" allocation problems during
  * writeback however as we have to allocate an array of pointers for the
- * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096.
+ * pages. A 16M write means ~32kb page array with PAGE_SIZE == 4096.
  *
  * For reads, there is a similar problem as we need to allocate an array
  * of kvecs to handle the receive, though that should only need to be done
@@ -733,7 +733,7 @@ compare_mid(__u16 mid, const struct smb_hdr *smb)
 
 /*
  * The default wsize is 1M. find_get_pages seems to return a maximum of 256
- * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill
+ * pages in a single call. With PAGE_SIZE == 4k, this means we can fill
  * a single wsize request with a single call.
  */
 #define CIFS_DEFAULT_IOSIZE (1024 * 1024)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5ce540dc6996..c03d0744648b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1902,7 +1902,7 @@ wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
 	 * find_get_pages_tag seems to return a max of 256 on each
 	 * iteration, so we must call it several times in order to
 	 * fill the array or the wsize is effectively limited to
-	 * 256 * PAGE_CACHE_SIZE.
+	 * 256 * PAGE_SIZE.
 	 */
 	*found_pages = 0;
 	pages = wdata->pages;
diff --git a/fs/cramfs/README b/fs/cramfs/README
index 445d1c2d7646..9d4e7ea311f4 100644
--- a/fs/cramfs/README
+++ b/fs/cramfs/README
@@ -86,26 +86,26 @@ Block Size
 
 (Block size in cramfs refers to the size of input data that is
 compressed at a time.  It's intended to be somewhere around
-PAGE_CACHE_SIZE for cramfs_readpage's convenience.)
+PAGE_SIZE for cramfs_readpage's convenience.)
 
 The superblock ought to indicate the block size that the fs was
 written for, since comments in <linux/pagemap.h> indicate that
-PAGE_CACHE_SIZE may grow in future (if I interpret the comment
+PAGE_SIZE may grow in future (if I interpret the comment
 correctly).
 
-Currently, mkcramfs #define's PAGE_CACHE_SIZE as 4096 and uses that
-for blksize, whereas Linux-2.3.39 uses its PAGE_CACHE_SIZE, which in
+Currently, mkcramfs #define's PAGE_SIZE as 4096 and uses that
+for blksize, whereas Linux-2.3.39 uses its PAGE_SIZE, which in
 turn is defined as PAGE_SIZE (which can be as large as 32KB on arm).
 This discrepancy is a bug, though it's not clear which should be
 changed.
 
-One option is to change mkcramfs to take its PAGE_CACHE_SIZE from
+One option is to change mkcramfs to take its PAGE_SIZE from
 <asm/page.h>.  Personally I don't like this option, but it does
 require the least amount of change: just change `#define
-PAGE_CACHE_SIZE (4096)' to `#include <asm/page.h>'.  The disadvantage
+PAGE_SIZE (4096)' to `#include <asm/page.h>'.  The disadvantage
 is that the generated cramfs cannot always be shared between different
 kernels, not even necessarily kernels of the same architecture if
-PAGE_CACHE_SIZE is subject to change between kernel versions
+PAGE_SIZE is subject to change between kernel versions
 (currently possible with arm and ia64).
 
 The remaining options try to make cramfs more sharable.
@@ -126,22 +126,22 @@ size.  The options are:
   1. Always 4096 bytes.
 
   2. Writer chooses blocksize; kernel adapts but rejects blocksize >
-     PAGE_CACHE_SIZE.
+     PAGE_SIZE.
 
   3. Writer chooses blocksize; kernel adapts even to blocksize >
-     PAGE_CACHE_SIZE.
+     PAGE_SIZE.
 
 It's easy enough to change the kernel to use a smaller value than
-PAGE_CACHE_SIZE: just make cramfs_readpage read multiple blocks.
+PAGE_SIZE: just make cramfs_readpage read multiple blocks.
 
-The cost of option 1 is that kernels with a larger PAGE_CACHE_SIZE
+The cost of option 1 is that kernels with a larger PAGE_SIZE
 value don't get as good compression as they can.
 
 The cost of option 2 relative to option 1 is that the code uses
 variables instead of #define'd constants.  The gain is that people
-with kernels having larger PAGE_CACHE_SIZE can make use of that if
+with kernels having larger PAGE_SIZE can make use of that if
 they don't mind their cramfs being inaccessible to kernels with
-smaller PAGE_CACHE_SIZE values.
+smaller PAGE_SIZE values.
 
 Option 3 is easy to implement if we don't mind being CPU-inefficient:
 e.g. get readpage to decompress to a buffer of size MAX_BLKSIZE (which
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 2096654dd26d..3a32ddf98095 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -137,7 +137,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
  * page cache and dentry tree anyway..
  *
  * This also acts as a way to guarantee contiguous areas of up to
- * BLKS_PER_BUF*PAGE_CACHE_SIZE, so that the caller doesn't need to
+ * BLKS_PER_BUF*PAGE_SIZE, so that the caller doesn't need to
  * worry about end-of-buffer issues even when decompressing a full
  * page cache.
  */
diff --git a/fs/dax.c b/fs/dax.c
index 1f954166704a..75ba46d82a76 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1094,7 +1094,7 @@ EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
  * you are truncating a file, the helper function dax_truncate_page() may be
  * more convenient.
  *
- * We work in terms of PAGE_CACHE_SIZE here for commonality with
+ * We work in terms of PAGE_SIZE here for commonality with
  * block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem
  * took care of disposing of the unnecessary blocks.  Even if the filesystem
  * block size is smaller than PAGE_SIZE, we have to zero the rest of the page
@@ -1146,7 +1146,7 @@ EXPORT_SYMBOL_GPL(dax_zero_page_range);
  * Similar to block_truncate_page(), this function can be called by a
  * filesystem when it is truncating a DAX file to handle the partial page.
  *
- * We work in terms of PAGE_CACHE_SIZE here for commonality with
+ * We work in terms of PAGE_SIZE here for commonality with
  * block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem
  * took care of disposing of the unnecessary blocks.  Even if the filesystem
  * block size is smaller than PAGE_SIZE, we have to zero the rest of the page
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 2a988f3a0cdb..224b49e71aa4 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -763,8 +763,8 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
 	} else { /* ia->ia_size < i_size_read(inode) */
 		/* We're chopping off all the pages down to the page
 		 * in which ia->ia_size is located. Fill in the end of
-		 * that page from (ia->ia_size & ~PAGE_CACHE_MASK) to
-		 * PAGE_CACHE_SIZE with zeros. */
+		 * that page from (ia->ia_size & ~PAGE_MASK) to
+		 * PAGE_SIZE with zeros. */
 		size_t num_zeros = (PAGE_SIZE
 				    - (ia->ia_size & ~PAGE_MASK));
 
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 1b45694de316..7ff6fcfa685d 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -37,7 +37,7 @@ static inline unsigned ext2_rec_len_from_disk(__le16 dlen)
 {
 	unsigned len = le16_to_cpu(dlen);
 
-#if (PAGE_CACHE_SIZE >= 65536)
+#if (PAGE_SIZE >= 65536)
 	if (len == EXT2_MAX_REC_LEN)
 		return 1 << 16;
 #endif
@@ -46,7 +46,7 @@ static inline unsigned ext2_rec_len_from_disk(__le16 dlen)
 
 static inline __le16 ext2_rec_len_to_disk(unsigned len)
 {
-#if (PAGE_CACHE_SIZE >= 65536)
+#if (PAGE_SIZE >= 65536)
 	if (len == (1 << 16))
 		return cpu_to_le16(EXT2_MAX_REC_LEN);
 	else
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index c04743519865..7ccba1aa142d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1961,7 +1961,7 @@ ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
 {
 	unsigned len = le16_to_cpu(dlen);
 
-#if (PAGE_CACHE_SIZE >= 65536)
+#if (PAGE_SIZE >= 65536)
 	if (len == EXT4_MAX_REC_LEN || len == 0)
 		return blocksize;
 	return (len & 65532) | ((len & 3) << 16);
@@ -1974,7 +1974,7 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
 {
 	if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
 		BUG();
-#if (PAGE_CACHE_SIZE >= 65536)
+#if (PAGE_SIZE >= 65536)
 	if (len < 65536)
 		return cpu_to_le16(len);
 	if (len == blocksize) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8a43c683eef9..4f7043ba4447 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4894,7 +4894,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
 	offset = inode->i_size & (PAGE_SIZE - 1);
 	/*
 	 * All buffers in the last page remain valid? Then there's nothing to
-	 * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
+	 * do. We do the check mainly to optimize the common PAGE_SIZE ==
 	 * blocksize case
 	 */
 	if (offset > PAGE_SIZE - (1 << inode->i_blkbits))
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c12174711ce2..eeeade76012e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -119,7 +119,7 @@ MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
  *
  *
  * one block each for bitmap and buddy information.  So for each group we
- * take up 2 blocks. A page can contain blocks_per_page (PAGE_CACHE_SIZE /
+ * take up 2 blocks. A page can contain blocks_per_page (PAGE_SIZE /
  * blocksize) blocks.  So it can have information regarding groups_per_page
  * which is blocks_per_page/2
  *
@@ -807,7 +807,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b)
  *
  * one block each for bitmap and buddy information.
  * So for each group we take up 2 blocks. A page can
- * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize)  blocks.
+ * contain blocks_per_page (PAGE_SIZE / blocksize)  blocks.
  * So it can have information regarding groups_per_page which
  * is blocks_per_page/2
  *
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index ea27aa1a778c..f24e7299e1c8 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -23,7 +23,7 @@
  *
  * then this code just gives up and calls the buffer_head-based read function.
  * It does handle a page which has holes at the end - that is a common case:
- * the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
+ * the end-of-file on blocksize < PAGE_SIZE setups.
  *
  */
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index afb7c7f05de5..4ea71eba40a5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -237,7 +237,7 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
 /*
  * Support for read() - Find the page attached to f_mapping and copy out the
  * data. Its *very* similar to do_generic_mapping_read(), we can't use that
- * since it has PAGE_CACHE_SIZE assumptions.
+ * since it has PAGE_SIZE assumptions.
  */
 static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
diff --git a/fs/mpage.c b/fs/mpage.c
index e7083bfbabb3..eedc644b78d7 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -331,7 +331,7 @@ confused:
  *
  * then this code just gives up and calls the buffer_head-based read function.
  * It does handle a page which has holes at the end - that is a common case:
- * the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
+ * the end-of-file on blocksize < PAGE_SIZE setups.
  *
  * BH_Boundary explanation:
  *
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index a474e7ef92ea..97768a1379f2 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -674,7 +674,7 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
 				// in the inode.
 				// Again, for each page do:
 				//	__set_page_dirty_buffers();
-				// page_cache_release()
+				// put_page()
 				// We don't need to wait on the writes.
 				// Update iblock.
 			}
diff --git a/fs/ntfs/aops.h b/fs/ntfs/aops.h
index 37cd7e45dcbc..820d6eabf60f 100644
--- a/fs/ntfs/aops.h
+++ b/fs/ntfs/aops.h
@@ -49,7 +49,7 @@ static inline void ntfs_unmap_page(struct page *page)
  * @index:	index into the page cache for @mapping of the page to map
  *
  * Read a page from the page cache of the address space @mapping at position
- * @index, where @index is in units of PAGE_CACHE_SIZE, and not in bytes.
+ * @index, where @index is in units of PAGE_SIZE, and not in bytes.
  *
  * If the page is not in memory it is loaded from disk first using the readpage
  * method defined in the address space operations of @mapping and the page is
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index b6074a56661b..f2b5e746f49b 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -105,10 +105,6 @@ static void zero_partial_compressed_page(struct page *page,
 
 	ntfs_debug("Zeroing page region outside initialized size.");
 	if (((s64)page->index << PAGE_SHIFT) >= initialized_size) {
-		/*
-		 * FIXME: Using clear_page() will become wrong when we get
-		 * PAGE_CACHE_SIZE != PAGE_SIZE but for now there is no problem.
-		 */
 		clear_page(kp);
 		return;
 	}
@@ -160,7 +156,7 @@ static inline void handle_bounds_compressed_page(struct page *page,
  * @xpage_done indicates whether the target page (@dest_pages[@xpage]) was
  * completed during the decompression of the compression block (@cb_start).
  *
- * Warning: This function *REQUIRES* PAGE_CACHE_SIZE >= 4096 or it will blow up
+ * Warning: This function *REQUIRES* PAGE_SIZE >= 4096 or it will blow up
  * unpredicatbly! You have been warned!
  *
  * Note to hackers: This function may not sleep until it has finished accessing
@@ -462,7 +458,7 @@ return_overflow:
  * have been written to so that we would lose data if we were to just overwrite
  * them with the out-of-date uncompressed data.
  *
- * FIXME: For PAGE_CACHE_SIZE > cb_size we are not doing the Right Thing(TM) at
+ * FIXME: For PAGE_SIZE > cb_size we are not doing the Right Thing(TM) at
  * the end of the file I think. We need to detect this case and zero the out
  * of bounds remainder of the page in question and mark it as handled. At the
  * moment we would just return -EIO on such a page. This bug will only become
@@ -470,7 +466,7 @@ return_overflow:
  * clusters so is probably not going to be seen by anyone. Still this should
  * be fixed. (AIA)
  *
- * FIXME: Again for PAGE_CACHE_SIZE > cb_size we are screwing up both in
+ * FIXME: Again for PAGE_SIZE > cb_size we are screwing up both in
  * handling sparse and compressed cbs. (AIA)
  *
  * FIXME: At the moment we don't do any zeroing out in the case that
@@ -497,12 +493,12 @@ int ntfs_read_compressed_block(struct page *page)
 	u64 cb_size_mask = cb_size - 1UL;
 	VCN vcn;
 	LCN lcn;
-	/* The first wanted vcn (minimum alignment is PAGE_CACHE_SIZE). */
+	/* The first wanted vcn (minimum alignment is PAGE_SIZE). */
 	VCN start_vcn = (((s64)index << PAGE_SHIFT) & ~cb_size_mask) >>
 			vol->cluster_size_bits;
 	/*
 	 * The first vcn after the last wanted vcn (minimum alignment is again
-	 * PAGE_CACHE_SIZE.
+	 * PAGE_SIZE.
 	 */
 	VCN end_vcn = ((((s64)(index + 1UL) << PAGE_SHIFT) + cb_size - 1)
 			& ~cb_size_mask) >> vol->cluster_size_bits;
@@ -753,11 +749,6 @@ lock_retry_remap:
 		for (; cur_page < cb_max_page; cur_page++) {
 			page = pages[cur_page];
 			if (page) {
-				/*
-				 * FIXME: Using clear_page() will become wrong
-				 * when we get PAGE_CACHE_SIZE != PAGE_SIZE but
-				 * for now there is no problem.
-				 */
 				if (likely(!cur_ofs))
 					clear_page(page_address(page));
 				else
@@ -807,7 +798,7 @@ lock_retry_remap:
 		 * synchronous io for the majority of pages.
 		 * Or if we choose not to do the read-ahead/-behind stuff, we
 		 * could just return block_read_full_page(pages[xpage]) as long
-		 * as PAGE_CACHE_SIZE <= cb_size.
+		 * as PAGE_SIZE <= cb_size.
 		 */
 		if (cb_max_ofs)
 			cb_max_page--;
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 3cdce162592d..a18613579001 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -315,7 +315,7 @@ found_it:
 descend_into_child_node:
 	/*
 	 * Convert vcn to index into the index allocation attribute in units
-	 * of PAGE_CACHE_SIZE and map the page cache page, reading it from
+	 * of PAGE_SIZE and map the page cache page, reading it from
 	 * disk if necessary.
 	 */
 	page = ntfs_map_page(ia_mapping, vcn <<
@@ -793,11 +793,11 @@ found_it:
 descend_into_child_node:
 	/*
 	 * Convert vcn to index into the index allocation attribute in units
-	 * of PAGE_CACHE_SIZE and map the page cache page, reading it from
+	 * of PAGE_SIZE and map the page cache page, reading it from
 	 * disk if necessary.
 	 */
 	page = ntfs_map_page(ia_mapping, vcn <<
-			dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT);
+			dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
 	if (IS_ERR(page)) {
 		ntfs_error(sb, "Failed to map directory index page, error %ld.",
 				-PTR_ERR(page));
@@ -809,9 +809,9 @@ descend_into_child_node:
 fast_descend_into_child_node:
 	/* Get to the index allocation block. */
 	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
-			dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK));
+			dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
 	/* Bounds checks. */
-	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
+	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
 		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
 				"inode 0x%lx or driver bug.", dir_ni->mft_no);
 		goto unm_err_out;
@@ -844,7 +844,7 @@ fast_descend_into_child_node:
 		goto unm_err_out;
 	}
 	index_end = (u8*)ia + dir_ni->itype.index.block_size;
-	if (index_end > kaddr + PAGE_CACHE_SIZE) {
+	if (index_end > kaddr + PAGE_SIZE) {
 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
 				"0x%lx crosses page boundary. Impossible! "
 				"Cannot access! This is probably a bug in the "
@@ -968,9 +968,9 @@ found_it2:
 			/* If vcn is in the same page cache page as old_vcn we
 			 * recycle the mapped page. */
 			if (old_vcn << vol->cluster_size_bits >>
-					PAGE_CACHE_SHIFT == vcn <<
+					PAGE_SHIFT == vcn <<
 					vol->cluster_size_bits >>
-					PAGE_CACHE_SHIFT)
+					PAGE_SHIFT)
 				goto fast_descend_into_child_node;
 			unlock_page(page);
 			ntfs_unmap_page(page);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 2dae60857544..91117ada8528 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -573,7 +573,7 @@ static inline int ntfs_submit_bh_for_read(struct buffer_head *bh)
  * only partially being written to.
  *
  * If @nr_pages is greater than one, we are guaranteed that the cluster size is
- * greater than PAGE_CACHE_SIZE, that all pages in @pages are entirely inside
+ * greater than PAGE_SIZE, that all pages in @pages are entirely inside
  * the same cluster and that they are the entirety of that cluster, and that
  * the cluster is sparse, i.e. we need to allocate a cluster to fill the hole.
  *
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 02a83a46ead2..0d645f357930 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -272,7 +272,7 @@ done:
 descend_into_child_node:
 	/*
 	 * Convert vcn to index into the index allocation attribute in units
-	 * of PAGE_CACHE_SIZE and map the page cache page, reading it from
+	 * of PAGE_SIZE and map the page cache page, reading it from
 	 * disk if necessary.
 	 */
 	page = ntfs_map_page(ia_mapping, vcn <<
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 3eda6d4bcc65..f40972d6df90 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -870,7 +870,7 @@ skip_attr_list_load:
 		}
 		if (ni->itype.index.block_size > PAGE_SIZE) {
 			ntfs_error(vi->i_sb, "Index block size (%u) > "
-					"PAGE_CACHE_SIZE (%ld) is not "
+					"PAGE_SIZE (%ld) is not "
 					"supported.  Sorry.",
 					ni->itype.index.block_size,
 					PAGE_SIZE);
@@ -1586,7 +1586,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
 		goto unm_err_out;
 	}
 	if (ni->itype.index.block_size > PAGE_SIZE) {
-		ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_CACHE_SIZE "
+		ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_SIZE "
 				"(%ld) is not supported.  Sorry.",
 				ni->itype.index.block_size, PAGE_SIZE);
 		err = -EOPNOTSUPP;
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index ab2b0930054e..ecb49870a680 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -823,12 +823,12 @@ static bool parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
 	ntfs_debug("vol->mft_record_size_bits = %i (0x%x)",
 			vol->mft_record_size_bits, vol->mft_record_size_bits);
 	/*
-	 * We cannot support mft record sizes above the PAGE_CACHE_SIZE since
+	 * We cannot support mft record sizes above the PAGE_SIZE since
 	 * we store $MFT/$DATA, the table of mft records in the page cache.
 	 */
 	if (vol->mft_record_size > PAGE_SIZE) {
 		ntfs_error(vol->sb, "Mft record size (%i) exceeds the "
-				"PAGE_CACHE_SIZE on your system (%lu).  "
+				"PAGE_SIZE on your system (%lu).  "
 				"This is not supported.  Sorry.",
 				vol->mft_record_size, PAGE_SIZE);
 		return false;
@@ -2471,12 +2471,12 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
 	down_read(&vol->lcnbmp_lock);
 	/*
 	 * Convert the number of bits into bytes rounded up, then convert into
-	 * multiples of PAGE_CACHE_SIZE, rounding up so that if we have one
+	 * multiples of PAGE_SIZE, rounding up so that if we have one
 	 * full and one partial page max_index = 2.
 	 */
 	max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_SIZE - 1) >>
 			PAGE_SHIFT;
-	/* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */
+	/* Use multiples of 4 bytes, thus max_size is PAGE_SIZE / 4. */
 	ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%lx.",
 			max_index, PAGE_SIZE / 4);
 	for (index = 0; index < max_index; index++) {
@@ -2547,7 +2547,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
 	pgoff_t index;
 
 	ntfs_debug("Entering.");
-	/* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */
+	/* Use multiples of 4 bytes, thus max_size is PAGE_SIZE / 4. */
 	ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = "
 			"0x%lx.", max_index, PAGE_SIZE / 4);
 	for (index = 0; index < max_index; index++) {
@@ -2639,7 +2639,7 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
 	size = i_size_read(vol->mft_ino) >> vol->mft_record_size_bits;
 	/*
 	 * Convert the maximum number of set bits into bytes rounded up, then
-	 * convert into multiples of PAGE_CACHE_SIZE, rounding up so that if we
+	 * convert into multiples of PAGE_SIZE, rounding up so that if we
 	 * have one full and one partial page max_index = 2.
 	 */
 	max_index = ((((mft_ni->initialized_size >> vol->mft_record_size_bits)
@@ -2765,7 +2765,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 	if (!parse_options(vol, (char*)opt))
 		goto err_out_now;
 
-	/* We support sector sizes up to the PAGE_CACHE_SIZE. */
+	/* We support sector sizes up to the PAGE_SIZE. */
 	if (bdev_logical_block_size(sb->s_bdev) > PAGE_SIZE) {
 		if (!silent)
 			ntfs_error(sb, "Device has unsupported sector size "
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index ce5dc4f92935..ad1577348a92 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -684,7 +684,7 @@ next_bh:
 	return ret;
 }
 
-#if (PAGE_CACHE_SIZE >= OCFS2_MAX_CLUSTERSIZE)
+#if (PAGE_SIZE >= OCFS2_MAX_CLUSTERSIZE)
 #define OCFS2_MAX_CTXT_PAGES	1
 #else
 #define OCFS2_MAX_CTXT_PAGES	(OCFS2_MAX_CLUSTERSIZE / PAGE_SIZE)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 881242c3a8d2..744d5d90c363 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2956,7 +2956,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
 		}
 
 		/*
-		 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
+		 * In case PAGE_SIZE <= CLUSTER_SIZE, This page
 		 * can't be dirtied before we CoW it out.
 		 */
 		if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize)
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 8c6487238bb3..2ace90e981f0 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -599,7 +599,7 @@ static int journal_list_still_alive(struct super_block *s,
  * This does a check to see if the buffer belongs to one of these
  * lost pages before doing the final put_bh.  If page->mapping was
  * null, it tries to free buffers on the page, which should make the
- * final page_cache_release drop the page from the lru.
+ * final put_page drop the page from the lru.
  */
 static void release_buffer_page(struct buffer_head *bh)
 {
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 27e501af0e8b..23813c078cc9 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -30,7 +30,7 @@
  * access the metadata and fragment caches.
  *
  * To avoid out of memory and fragmentation issues with vmalloc the cache
- * uses sequences of kmalloced PAGE_CACHE_SIZE buffers.
+ * uses sequences of kmalloced PAGE_SIZE buffers.
  *
  * It should be noted that the cache is not used for file datablocks, these
  * are decompressed and cached in the page-cache in the normal way.  The
@@ -231,7 +231,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
 /*
  * Initialise cache allocating the specified number of entries, each of
  * size block_size.  To avoid vmalloc fragmentation issues each entry
- * is allocated as a sequence of kmalloced PAGE_CACHE_SIZE buffers.
+ * is allocated as a sequence of kmalloced PAGE_SIZE buffers.
  */
 struct squashfs_cache *squashfs_cache_init(char *name, int entries,
 	int block_size)
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 437de9e89221..13d80947bf9e 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -382,7 +382,7 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
 
 	/*
 	 * Loop copying datablock into pages.  As the datablock likely covers
-	 * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly
+	 * many PAGE_SIZE pages (default block size is 128 KiB) explicitly
 	 * grab the pages from the page cache, except for the page that we've
 	 * been called to fill.
 	 */
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 1a9c6640e604..446753d8ac34 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -554,7 +554,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
 		 * VFS copied less data to the page that it intended and
 		 * declared in its '->write_begin()' call via the @len
 		 * argument. If the page was not up-to-date, and @len was
-		 * @PAGE_CACHE_SIZE, the 'ubifs_write_begin()' function did
+		 * @PAGE_SIZE, the 'ubifs_write_begin()' function did
 		 * not load it from the media (for optimization reasons). This
 		 * means that part of the page contains garbage. So read the
 		 * page now.
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 20daea9aa657..e98c24ee25a1 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2237,7 +2237,7 @@ static int __init ubifs_init(void)
 	BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4);
 
 	/*
-	 * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to
+	 * We require that PAGE_SIZE is greater-than-or-equal-to
 	 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
 	 */
 	if (PAGE_SIZE < UBIFS_BLOCK_SIZE) {
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 5b8ae03ba855..e49b2406d15d 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1563,7 +1563,7 @@ xfs_vm_write_begin(
 	int			status;
 	struct xfs_mount	*mp = XFS_I(mapping->host)->i_mount;
 
-	ASSERT(len <= PAGE_CACHE_SIZE);
+	ASSERT(len <= PAGE_SIZE);
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
@@ -1620,7 +1620,7 @@ xfs_vm_write_end(
 {
 	int			ret;
 
-	ASSERT(len <= PAGE_CACHE_SIZE);
+	ASSERT(len <= PAGE_SIZE);
 
 	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
 	if (unlikely(ret < len)) {
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 6082f54e0557..187e14b696c2 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -556,10 +556,10 @@ xfs_max_file_offset(
 	/* Figure out maximum filesize, on Linux this can depend on
 	 * the filesystem blocksize (on 32 bit platforms).
 	 * __block_write_begin does this in an [unsigned] long...
-	 *      page->index << (PAGE_CACHE_SHIFT - bbits)
+	 *      page->index << (PAGE_SHIFT - bbits)
 	 * So, for page sized blocks (4K on 32 bit platforms),
 	 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
-	 *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
+	 *      (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
 	 * but for smaller blocksizes it is less (bbits = log2 bsize).
 	 * Note1: get_block_t takes a long (implicit cast from above)
 	 * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 1b4d69f68c33..3f103076d0bf 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -135,7 +135,7 @@ struct bdi_writeback {
 
 struct backing_dev_info {
 	struct list_head bdi_list;
-	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
+	unsigned long ra_pages;	/* max readahead in PAGE_SIZE units */
 	unsigned int capabilities; /* Device capabilities */
 	congested_fn *congested_fn; /* Function pointer if device is md/dm */
 	void *congested_data;	/* Pointer to aux data for congested func */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ed6407d1b7b5..ffcff53e3b2b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -623,7 +623,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
  *
  * A page may belong to an inode's memory mapping. In this case, page->mapping
  * is the pointer to the inode, and page->index is the file offset of the page,
- * in units of PAGE_CACHE_SIZE.
+ * in units of PAGE_SIZE.
  *
  * If pagecache pages are not associated with an inode, they are said to be
  * anonymous pages. These may become associated with the swapcache, and in that
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 944b2b37313b..c2d75b4fa86c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -341,7 +341,7 @@ struct vm_area_struct {
 
 	/* Information about our backing store: */
 	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
-					   units, *not* PAGE_CACHE_SIZE */
+					   units */
 	struct file * vm_file;		/* File we map to (can be NULL). */
 	void * vm_private_data;		/* was vm_pte (shared mem) */
 
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index efada239205e..957049f72290 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -41,8 +41,8 @@ struct nfs_page {
 	struct page		*wb_page;	/* page to read in/write out */
 	struct nfs_open_context	*wb_context;	/* File state context info */
 	struct nfs_lock_context	*wb_lock_context;	/* lock context info */
-	pgoff_t			wb_index;	/* Offset >> PAGE_CACHE_SHIFT */
-	unsigned int		wb_offset,	/* Offset & ~PAGE_CACHE_MASK */
+	pgoff_t			wb_index;	/* Offset >> PAGE_SHIFT */
+	unsigned int		wb_offset,	/* Offset & ~PAGE_MASK */
 				wb_pgbase,	/* Start of page data */
 				wb_bytes;	/* Length of request */
 	struct kref		wb_kref;	/* reference count */
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 9abb763e4b86..e9fcf90b270d 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -331,7 +331,7 @@ static inline unsigned nilfs_rec_len_from_disk(__le16 dlen)
 {
 	unsigned len = le16_to_cpu(dlen);
 
-#if !defined(__KERNEL__) || (PAGE_CACHE_SIZE >= 65536)
+#if !defined(__KERNEL__) || (PAGE_SIZE >= 65536)
 	if (len == NILFS_MAX_REC_LEN)
 		return 1 << 16;
 #endif
@@ -340,7 +340,7 @@ static inline unsigned nilfs_rec_len_from_disk(__le16 dlen)
 
 static inline __le16 nilfs_rec_len_to_disk(unsigned len)
 {
-#if !defined(__KERNEL__) || (PAGE_CACHE_SIZE >= 65536)
+#if !defined(__KERNEL__) || (PAGE_SIZE >= 65536)
 	if (len == (1 << 16))
 		return cpu_to_le16(NILFS_MAX_REC_LEN);
 	else if (len > (1 << 16))
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index f396ccb900cc..b3fc0370c14f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -535,8 +535,7 @@ extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter);
 /*
  * Fault a userspace page into pagetables.  Return non-zero on a fault.
  *
- * This assumes that two userspace pages are always sufficient.  That's
- * not true if PAGE_CACHE_SIZE > PAGE_SIZE.
+ * This assumes that two userspace pages are always sufficient.
  */
 static inline int fault_in_pages_writeable(char __user *uaddr, int size)
 {
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index cc0fc712bb82..7ca44fb5b675 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -129,7 +129,7 @@ static inline void svc_get(struct svc_serv *serv)
  *
  * These happen to all be powers of 2, which is not strictly
  * necessary but helps enforce the real limitation, which is
- * that they should be multiples of PAGE_CACHE_SIZE.
+ * that they should be multiples of PAGE_SIZE.
  *
  * For UDP transports, a block plus NFS,RPC, and UDP headers
  * has to fit into the IP datagram limit of 64K.  The largest
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3d980ea1c946..2b83359c19ca 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -433,7 +433,7 @@ struct backing_dev_info;
 #define si_swapinfo(val) \
 	do { (val)->freeswap = (val)->totalswap = 0; } while (0)
 /* only sparc can not include linux/pagemap.h in this file
- * so leave page_cache_release and release_pages undeclared... */
+ * so leave put_page and release_pages undeclared... */
 #define free_page_and_swap_cache(page) \
 	put_page(page)
 #define free_pages_and_swap_cache(pages, nr) \
diff --git a/mm/gup.c b/mm/gup.c
index 7f1c4fb77cfa..fb87aea9edc8 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1107,7 +1107,7 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
  * @addr: user address
  *
  * Returns struct page pointer of user page pinned for dump,
- * to be freed afterwards by page_cache_release() or put_page().
+ * to be freed afterwards by put_page().
  *
  * Returns NULL on any kind of failure - a hole must then be inserted into
  * the corefile, to preserve alignment with its headers; and also returns
diff --git a/mm/memory.c b/mm/memory.c
index 07a420488cda..93897f23cc11 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2400,7 +2400,6 @@ static inline void unmap_mapping_range_tree(struct rb_root *root,
 
 		vba = vma->vm_pgoff;
 		vea = vba + vma_pages(vma) - 1;
-		/* Assume for now that PAGE_CACHE_SHIFT == PAGE_SHIFT */
 		zba = details->first_index;
 		if (zba < vba)
 			zba = vba;
diff --git a/mm/mincore.c b/mm/mincore.c
index 0eed23d50adb..c0b5ba965200 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -211,7 +211,7 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v
  * return values:
  *  zero    - success
  *  -EFAULT - vec points to an illegal address
- *  -EINVAL - addr is not a multiple of PAGE_CACHE_SIZE
+ *  -EINVAL - addr is not a multiple of PAGE_SIZE
  *  -ENOMEM - Addresses in the range [addr, addr + len] are
  *		invalid for the address space of this process, or
  *		specify one or more pages which are not currently
@@ -233,7 +233,7 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
 	if (!access_ok(VERIFY_READ, (void __user *) start, len))
 		return -ENOMEM;
 
-	/* This also avoids any overflows on PAGE_CACHE_ALIGN */
+	/* This also avoids any overflows on PAGE_ALIGN */
 	pages = len >> PAGE_SHIFT;
 	pages += (offset_in_page(len)) != 0;
 
diff --git a/mm/swap.c b/mm/swap.c
index ea641e247033..a0bc206b4ac6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -698,7 +698,7 @@ void lru_add_drain_all(void)
 }
 
 /**
- * release_pages - batched page_cache_release()
+ * release_pages - batched put_page()
  * @pages: array of pages to release
  * @nr: number of pages
  * @cold: whether the pages are cache cold
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 1e6aba0501a1..6bdb3865212d 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -164,7 +164,7 @@ EXPORT_SYMBOL_GPL(xdr_inline_pages);
  * Note: the addresses pgto_base and pgfrom_base are both calculated in
  *       the same way:
  *            if a memory area starts at byte 'base' in page 'pages[i]',
- *            then its address is given as (i << PAGE_CACHE_SHIFT) + base
+ *            then its address is given as (i << PAGE_SHIFT) + base
  * Also note: pgfrom_base must be < pgto_base, but the memory areas
  * 	they point to may overlap.
  */
-- 
cgit v1.2.3


From 1fa64f198b9f8d6ec0f7aec7c18dc94684391140 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 1 Apr 2016 15:29:49 +0300
Subject: mm: drop PAGE_CACHE_* and page_cache_{get,release} definition

All users gone.  We can remove these macros.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b3fc0370c14f..7e1ab155c67c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -86,21 +86,6 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 				(__force unsigned long)mask;
 }
 
-/*
- * The page cache can be done in larger chunks than
- * one page, because it allows for more efficient
- * throughput (it can then be mapped into user
- * space in smaller chunks for same flexibility).
- *
- * Or rather, it _will_ be done in larger chunks.
- */
-#define PAGE_CACHE_SHIFT	PAGE_SHIFT
-#define PAGE_CACHE_SIZE		PAGE_SIZE
-#define PAGE_CACHE_MASK		PAGE_MASK
-#define PAGE_CACHE_ALIGN(addr)	(((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK)
-
-#define page_cache_get(page)		get_page(page)
-#define page_cache_release(page)	put_page(page)
 void release_pages(struct page **pages, int nr, bool cold);
 
 /*
-- 
cgit v1.2.3


From b70bb984489363aadd5ccc94d919629d9e264d36 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 15 Mar 2016 22:37:17 +0100
Subject: iommu: provide of_xlate pointer unconditionally

iommu drivers that support the standard DT bindings use a of_xlate
callback pointer, but that is only part of struct iommu_ops when
CONFIG_OF_IOMMU is enabled, leading to build errors in randconfig
builds when that is not provided:

drivers/iommu/mtk_iommu.c:497:2: error: unknown field 'of_xlate' specified in initializer
  .of_xlate = mtk_iommu_of_xlate,
  ^
drivers/iommu/mtk_iommu.c:497:14: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types]
  .of_xlate = mtk_iommu_of_xlate,
              ^~~~~~~~~~~~~~~~~~
drivers/iommu/mtk_iommu.c:497:14: note: (near initialization for 'mtk_iommu_ops.domain_get_attr')

We can work around it by adding more #ifdefs in each driver, but
it seems nicer to just allow setting the pointer even if it is
unused. This makes the driver code look nicer, and it gives better
compile-time coverage when test building on other architectures.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 0df4fabe208d ("iommu/mediatek: Add mt8173 IOMMU driver")
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a5c539fa5d2b..ef7a6ecd8584 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -195,9 +195,7 @@ struct iommu_ops {
 	/* Get the number of windows per domain */
 	u32 (*domain_get_windows)(struct iommu_domain *domain);
 
-#ifdef CONFIG_OF_IOMMU
 	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
-#endif
 
 	unsigned long pgsize_bitmap;
 	void *priv;
-- 
cgit v1.2.3


From 95272c29378ee7dc15f43fa2758cb28a5913a06d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 31 Mar 2016 09:38:51 +0200
Subject: compiler-gcc: disable -ftracer for __noclone functions

-ftracer can duplicate asm blocks causing compilation to fail in
noclone functions.  For example, KVM declares a global variable
in an asm like

    asm("2: ... \n
         .pushsection data \n
         .global vmx_return \n
         vmx_return: .long 2b");

and -ftracer causes a double declaration.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: stable@vger.kernel.org
Cc: kvm@vger.kernel.org
Reported-by: Linda Walsh <lkml@tlinx.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/compiler-gcc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 22ab246feed3..eeae401a2412 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -199,7 +199,7 @@
 #define unreachable() __builtin_unreachable()
 
 /* Mark a function definition as prohibited from being cloned. */
-#define __noclone	__attribute__((__noclone__))
+#define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))
 
 #endif /* GCC_VERSION >= 40500 */
 
-- 
cgit v1.2.3


From a0ca153f98db8cf25298565a09e11fe9d82846ad Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Tue, 5 Apr 2016 09:13:39 -0700
Subject: GRE: Disable segmentation offloads w/ CSUM and we are encapsulated
 via FOU

This patch fixes an issue I found in which we were dropping frames if we
had enabled checksums on GRE headers that were encapsulated by either FOU
or GUE.  Without this patch I was barely able to get 1 Gb/s of throughput.
With this patch applied I am now at least getting around 6 Gb/s.

The issue is due to the fact that with FOU or GUE applied we do not provide
a transport offset pointing to the GRE header, nor do we offload it in
software as the GRE header is completely skipped by GSO and treated like a
VXLAN or GENEVE type header.  As such we need to prevent the stack from
generating it and also prevent GRE from generating it via any interface we
create.

Fixes: c3483384ee511 ("gro: Allow tunnel stacking in the case of FOU/GUE")
Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  5 ++++-
 net/core/dev.c            |  1 +
 net/ipv4/fou.c            |  6 ++++++
 net/ipv4/gre_offload.c    |  8 ++++++++
 net/ipv4/ip_gre.c         | 13 ++++++++++---
 5 files changed, 29 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cb0d5d09c2e4..8395308a2445 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2120,7 +2120,10 @@ struct napi_gro_cb {
 	/* Used in foo-over-udp, set in udp[46]_gro_receive */
 	u8	is_ipv6:1;
 
-	/* 7 bit hole */
+	/* Used in GRE, set in fou/gue_gro_receive */
+	u8	is_fou:1;
+
+	/* 6 bit hole */
 
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
diff --git a/net/core/dev.c b/net/core/dev.c
index b9bcbe77d913..77a71cd68535 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4439,6 +4439,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
 		NAPI_GRO_CB(skb)->encap_mark = 0;
+		NAPI_GRO_CB(skb)->is_fou = 0;
 		NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
 
 		/* Setup for GRO checksum validation */
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 5a94aea280d3..a39068b4a4d9 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -203,6 +203,9 @@ static struct sk_buff **fou_gro_receive(struct sk_buff **head,
 	 */
 	NAPI_GRO_CB(skb)->encap_mark = 0;
 
+	/* Flag this frame as already having an outer encap header */
+	NAPI_GRO_CB(skb)->is_fou = 1;
+
 	rcu_read_lock();
 	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 	ops = rcu_dereference(offloads[proto]);
@@ -368,6 +371,9 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
 	 */
 	NAPI_GRO_CB(skb)->encap_mark = 0;
 
+	/* Flag this frame as already having an outer encap header */
+	NAPI_GRO_CB(skb)->is_fou = 1;
+
 	rcu_read_lock();
 	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 	ops = rcu_dereference(offloads[guehdr->proto_ctype]);
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index c47539d04b88..6a5bd4317866 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -150,6 +150,14 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
 	if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0)
 		goto out;
 
+	/* We can only support GRE_CSUM if we can track the location of
+	 * the GRE header.  In the case of FOU/GUE we cannot because the
+	 * outer UDP header displaces the GRE header leaving us in a state
+	 * of limbo.
+	 */
+	if ((greh->flags & GRE_CSUM) && NAPI_GRO_CB(skb)->is_fou)
+		goto out;
+
 	type = greh->protocol;
 
 	rcu_read_lock();
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 31936d387cfd..af5d1f38217f 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -862,9 +862,16 @@ static void __gre_tunnel_init(struct net_device *dev)
 	dev->hw_features	|= GRE_FEATURES;
 
 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
-		/* TCP offload with GRE SEQ is not supported. */
-		dev->features    |= NETIF_F_GSO_SOFTWARE;
-		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+		/* TCP offload with GRE SEQ is not supported, nor
+		 * can we support 2 levels of outer headers requiring
+		 * an update.
+		 */
+		if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
+		    (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
+			dev->features    |= NETIF_F_GSO_SOFTWARE;
+			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+		}
+
 		/* Can use a lockless transmit, unless we generate
 		 * output sequences
 		 */
-- 
cgit v1.2.3